[postgis-tickets] r16149 - Proof of content for reading table extent from

Paul Ramsey pramsey at cleverelephant.ca
Tue Dec 12 14:53:48 PST 2017


Author: pramsey
Date: 2017-12-12 14:53:48 -0800 (Tue, 12 Dec 2017)
New Revision: 16149

Modified:
   trunk/libpgcommon/gserialized_gist.c
   trunk/libpgcommon/gserialized_gist.h
   trunk/postgis/gserialized_estimate.c
   trunk/postgis/gserialized_gist_2d.c
   trunk/postgis/gserialized_gist_nd.c
   trunk/postgis/lwgeom_transform.c
   trunk/postgis/postgis.sql.in
   trunk/regress/estimatedextent.sql
Log:
Proof of content for reading table extent from
the top of the index. Can be called with
_postgis_index_extent(tbl regclass, col text)
and returns box2d.

Currently not added to the st_estimatedextent()
code, because it has issues with indexes that
have EMPTY added to them. This is a symptom 
of #2487. While it seems impossible to every
allow index-assisted searches for EMPTY 
geometry, it will be necessary to create
invalid keys for them (NaN, NaN, NaN, Nan)
and then handle those keys so they don't 
pollute the index with strange bounds.

References #2256


Modified: trunk/libpgcommon/gserialized_gist.c
===================================================================
--- trunk/libpgcommon/gserialized_gist.c	2017-12-12 22:16:55 UTC (rev 16148)
+++ trunk/libpgcommon/gserialized_gist.c	2017-12-12 22:53:48 UTC (rev 16149)
@@ -109,7 +109,7 @@
 }
 
 /* Convert a gidx to a gbox */
-static void gbox_from_gidx(GIDX *a, GBOX *gbox, int flags)
+void gbox_from_gidx(GIDX *a, GBOX *gbox, int flags)
 {
 	gbox->xmin = (double)GIDX_GET_MIN(a,0);
 	gbox->xmax = (double)GIDX_GET_MAX(a,0);

Modified: trunk/libpgcommon/gserialized_gist.h
===================================================================
--- trunk/libpgcommon/gserialized_gist.h	2017-12-12 22:16:55 UTC (rev 16148)
+++ trunk/libpgcommon/gserialized_gist.h	2017-12-12 22:53:48 UTC (rev 16149)
@@ -87,6 +87,24 @@
 /* Returns the size required to store a GIDX of requested dimension */
 #define GIDX_SIZE(dimensions) (sizeof(int32) + 2*(dimensions)*sizeof(float))
 
+/* Allocate a copy of the box */
+BOX2DF* box2df_copy(BOX2DF *b);
+
+/* Grow the first argument to contain the second */
+void box2df_merge(BOX2DF *b_union, BOX2DF *b_new);
+
+/* Allocate a copy of the box */
+GIDX* gidx_copy(GIDX *b);
+
+/* Grow the first argument to contain the second */
+void gidx_merge(GIDX **b_union, GIDX *b_new);
+
+/* Fill in a gbox from a GIDX */
+void gbox_from_gidx(GIDX *a, GBOX *gbox, int flags);
+
+/* Fill in a gbox from a BOX2DF */
+int box2df_to_gbox_p(BOX2DF *a, GBOX *box);
+
 /*********************************************************************************
 ** GSERIALIZED support functions.
 **

Modified: trunk/postgis/gserialized_estimate.c
===================================================================
--- trunk/postgis/gserialized_estimate.c	2017-12-12 22:16:55 UTC (rev 16148)
+++ trunk/postgis/gserialized_estimate.c	2017-12-12 22:53:48 UTC (rev 16149)
@@ -60,6 +60,34 @@
 **********************************************************************/
 
 #include "postgres.h"
+
+#include "access/genam.h"
+#include "access/gin.h"
+#include "access/gist.h"
+#include "access/gist_private.h"
+#include "access/gistscan.h"
+#include "utils/datum.h"
+#include "access/heapam.h"
+#include "catalog/index.h"
+#include "catalog/pg_am.h"
+#include "miscadmin.h"
+#include "storage/lmgr.h"
+#include "catalog/namespace.h"
+#include "catalog/indexing.h"
+#if PG_VERSION_NUM >= 100000
+#include "utils/regproc.h"
+#include "utils/varlena.h"
+#endif
+#include "utils/tqual.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/snapmgr.h"
+#include "utils/fmgroids.h"
+#include "funcapi.h"
+#include "access/heapam.h"
+#include "catalog/pg_type.h"
+#include "access/relscan.h"
+
 #include "executor/spi.h"
 #include "fmgr.h"
 #include "commands/vacuum.h"
@@ -93,6 +121,10 @@
 #include <errno.h>
 #include <ctype.h>
 
+
+/************************************************************************/
+
+
 /* Fall back to older finite() if necessary */
 #ifndef HAVE_ISFINITE
 # ifdef HAVE_GNU_ISFINITE
@@ -112,14 +144,20 @@
 Datum gserialized_gist_sel_nd(PG_FUNCTION_ARGS);
 Datum gserialized_analyze_nd(PG_FUNCTION_ARGS);
 Datum gserialized_estimated_extent(PG_FUNCTION_ARGS);
+Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS);
 Datum _postgis_gserialized_sel(PG_FUNCTION_ARGS);
 Datum _postgis_gserialized_joinsel(PG_FUNCTION_ARGS);
 Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS);
 
+/* Local prototypes */
+static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type);
+static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type);
+
+
 /* Old Prototype */
 Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
 
-/**
+/*
 * Assign a number to the n-dimensional statistics kind
 *
 * tgl suggested:
@@ -135,6 +173,13 @@
 #define STATISTIC_SLOT_2D 1
 
 /*
+* To look-up the spatial index associated with a table we
+* need to find GIST indexes using our spatial keys.
+*/
+#define INDEX_KEY_ND "gidx"
+#define INDEX_KEY_2D "box2df"
+
+/*
 * The SD factor restricts the side of the statistics histogram
 * based on the standard deviation of the extent of the data.
 * SDFACTOR is the number of standard deviations from the mean
@@ -262,10 +307,11 @@
 text_p_get_mode(const text *txt)
 {
 	int mode = 2;
-	char *modestr = text2cstring(txt);
+	if (VARSIZE(txt) - VARHDRSZ <= 0)
+		return mode;
+	char *modestr = (char*)VARDATA(txt);
 	if ( modestr[0] == 'N' )
 		mode = 0;
-	pfree(modestr);
 	return mode;
 }
 
@@ -872,7 +918,7 @@
 	free_attstatsslot(&sslot);
 #endif
 
-    return nd_stats;
+	return nd_stats;
 }
 
 /**
@@ -1873,7 +1919,7 @@
 	double max[ND_DIMS];
 	double total_count = 0.0;
 	int ndims_max = Max(nd_stats->ndims, gbox_ndims(box));
-//	int ndims_min = Min(nd_stats->ndims, gbox_ndims(box));
+	/* int ndims_min = Min(nd_stats->ndims, gbox_ndims(box)); */
 
 	/* Calculate the overlap of the box on the histogram */
 	if ( ! nd_stats )
@@ -2239,10 +2285,11 @@
 	char *tbl = NULL;
 	text *col = NULL;
 	char *nsp_tbl = NULL;
-	Oid tbl_oid;
+	Oid tbl_oid, idx_oid;
 	ND_STATS *nd_stats;
-	GBOX *gbox;
+	GBOX *gbox = NULL;
 	bool only_parent = false;
+	int key_type;
 
 	if ( PG_NARGS() == 4 )
 	{
@@ -2280,26 +2327,38 @@
 		PG_RETURN_NULL();
 	}
 
-	/* Estimated extent only returns 2D bounds, so use mode 2 */
-	nd_stats = pg_get_nd_stats_by_name(tbl_oid, col, 2, only_parent);
+#if 0
+	/* Read the extent from the head of the spatial index, if there is one */
+	idx_oid = table_get_spatial_index(tbl_oid, col, &key_type);
+	if (!idx_oid)
+		elog(DEBUG2, "index for \"%s.%s\" does not exist", tbl, text2cstring(col));
+	gbox = spatial_index_read_extent(idx_oid, key_type);
+#endif
 
-	/* Error out on no stats */
-	if ( ! nd_stats ) {
-		elog(WARNING, "stats for \"%s.%s\" do not exist", tbl, text2cstring(col));
-		PG_RETURN_NULL();
+	/* Fall back to reading the stats, if no index answer */
+	if (!gbox)
+	{
+		/* Estimated extent only returns 2D bounds, so use mode 2 */
+		nd_stats = pg_get_nd_stats_by_name(tbl_oid, col, 2, only_parent);
+
+		/* Error out on no stats */
+		if ( ! nd_stats ) {
+			elog(WARNING, "stats for \"%s.%s\" do not exist", tbl, text2cstring(col));
+			PG_RETURN_NULL();
+		}
+
+		/* Construct the box */
+		gbox = palloc(sizeof(GBOX));
+		FLAGS_SET_GEODETIC(gbox->flags, 0);
+		FLAGS_SET_Z(gbox->flags, 0);
+		FLAGS_SET_M(gbox->flags, 0);
+		gbox->xmin = nd_stats->extent.min[0];
+		gbox->xmax = nd_stats->extent.max[0];
+		gbox->ymin = nd_stats->extent.min[1];
+		gbox->ymax = nd_stats->extent.max[1];
+		pfree(nd_stats);
 	}
 
-	/* Construct the box */
-	gbox = palloc(sizeof(GBOX));
-	FLAGS_SET_GEODETIC(gbox->flags, 0);
-	FLAGS_SET_Z(gbox->flags, 0);
-	FLAGS_SET_M(gbox->flags, 0);
-	gbox->xmin = nd_stats->extent.min[0];
-	gbox->xmax = nd_stats->extent.max[0];
-	gbox->ymin = nd_stats->extent.min[1];
-	gbox->ymax = nd_stats->extent.max[1];
-
-	pfree(nd_stats);
 	PG_RETURN_POINTER(gbox);
 }
 
@@ -2331,3 +2390,178 @@
 	elog(ERROR, "geometry_estimated_extent() called with wrong number of arguments");
 	PG_RETURN_NULL();
 }
+
+/************************************************************************/
+
+static Oid
+typname_to_oid(const char *typname)
+{
+    Oid typoid = TypenameGetTypid(typname);
+    if (OidIsValid(typoid) && get_typisdefined(typoid))
+		return typoid;
+	else
+		return InvalidOid;
+}
+
+static Oid
+table_get_spatial_index(Oid tbl_oid, text *col, int *key_type)
+{
+	Relation tbl_rel;
+	ListCell *lc;
+	List *idx_list;
+	Oid result = InvalidOid;
+	char *colname = text2cstring(col);
+
+	/* Lookup our spatial index key types */
+	Oid b2d_oid = typname_to_oid(INDEX_KEY_2D);
+	Oid gdx_oid = typname_to_oid(INDEX_KEY_ND);
+
+	if (!(b2d_oid && gdx_oid))
+		return InvalidOid;
+
+	tbl_rel = RelationIdGetRelation(tbl_oid);
+	idx_list = RelationGetIndexList(tbl_rel);
+	RelationClose(tbl_rel);
+
+	/* For each index associated with this table... */
+	foreach(lc, idx_list)
+	{
+		Form_pg_class idx_form;
+		HeapTuple idx_tup;
+		int idx_relam;
+		Oid idx_oid = lfirst_oid(lc);
+
+		idx_tup = SearchSysCache1(RELOID, ObjectIdGetDatum(idx_oid));
+		if (!HeapTupleIsValid(idx_tup))
+			elog(ERROR, "%s: unable to lookup index %u in syscache", __func__, idx_oid);
+		idx_form = (Form_pg_class) GETSTRUCT(idx_tup);
+		idx_relam = idx_form->relam;
+		ReleaseSysCache(idx_tup);
+
+		/* Does the index use a GIST access method? */
+		if (idx_relam == GIST_AM_OID)
+		{
+			Form_pg_attribute att;
+			Oid atttypid;
+			/* Is the index on the column name we are looking for? */
+			HeapTuple att_tup = SearchSysCache2(ATTNAME,
+			                                    ObjectIdGetDatum(idx_oid),
+			                                    PointerGetDatum(colname));
+			if (!HeapTupleIsValid(att_tup))
+				continue;
+
+			att = (Form_pg_attribute) GETSTRUCT(att_tup);
+			atttypid = att->atttypid;
+			ReleaseSysCache(att_tup);
+
+			/* Is the column actually spatial? */
+			if (b2d_oid == atttypid || gdx_oid == atttypid)
+			{
+				/* Save result, clean up, and break out */
+				result = idx_oid;
+				if (key_type)
+					*key_type = (atttypid == b2d_oid ? STATISTIC_SLOT_2D : STATISTIC_SLOT_ND);
+				break;
+			}
+		}
+	}
+	return result;
+}
+
+static GBOX *
+spatial_index_read_extent(Oid idx_oid, int key_type)
+{
+	BOX2DF *bounds_2df = NULL;
+	GIDX *bounds_gidx = NULL;
+	GBOX *gbox = NULL;
+
+	if (!idx_oid)
+		return NULL;
+
+	Relation idx_rel = index_open(idx_oid, AccessExclusiveLock);
+	Buffer buffer = ReadBuffer(idx_rel, GIST_ROOT_BLKNO);
+	Page page = (Page) BufferGetPage(buffer);
+	OffsetNumber offset = FirstOffsetNumber;
+	unsigned long offset_max = PageGetMaxOffsetNumber(page);
+	while (offset <= offset_max)
+	{
+		ItemId iid = PageGetItemId(page, offset);
+		if (!iid)
+		{
+			ReleaseBuffer(buffer);
+			index_close(idx_rel, AccessExclusiveLock);
+			return NULL;
+		}
+		IndexTuple ituple = (IndexTuple) PageGetItem(page, iid);
+		if (!GistTupleIsInvalid(ituple))
+		{
+			bool isnull;
+			Datum idx_attr = index_getattr(ituple, 1, idx_rel->rd_att, &isnull);
+			if (!isnull)
+			{
+				if (key_type == STATISTIC_SLOT_2D)
+				{
+					BOX2DF *b = (BOX2DF*)DatumGetPointer(idx_attr);
+					if (bounds_2df)
+						box2df_merge(bounds_2df, b);
+					else
+						bounds_2df = box2df_copy(b);
+				}
+				else
+				{
+					GIDX *b = (GIDX*)DatumGetPointer(idx_attr);
+					if (bounds_gidx)
+						gidx_merge(&bounds_gidx, b);
+					else
+						bounds_gidx = gidx_copy(b);
+				}
+			}
+		}
+		offset++;
+	}
+
+	ReleaseBuffer(buffer);
+	index_close(idx_rel, AccessExclusiveLock);
+
+	if (key_type == STATISTIC_SLOT_2D && bounds_2df)
+	{
+		gbox = gbox_new(0);
+		box2df_to_gbox_p(bounds_2df, gbox);
+	}
+	else if (key_type == STATISTIC_SLOT_ND && bounds_gidx)
+	{
+		gbox = gbox_new(0);
+		gbox_from_gidx(bounds_gidx, gbox, 0);
+	}
+	else
+		return NULL;
+
+	return gbox;
+}
+
+/*
+CREATE OR REPLACE FUNCTION _postgis_index_extent(tbl regclass, col text)
+	RETURNS box2d
+	AS '$libdir/postgis-2.5','_postgis_gserialized_index_extent'
+	LANGUAGE 'c' STABLE STRICT;
+*/
+
+PG_FUNCTION_INFO_V1(_postgis_gserialized_index_extent);
+Datum _postgis_gserialized_index_extent(PG_FUNCTION_ARGS)
+{
+	GBOX *gbox = NULL;
+	int key_type;
+	Oid tbl_oid = PG_GETARG_DATUM(0);
+	text *col = PG_GETARG_TEXT_P(1);
+
+	Oid idx_oid = table_get_spatial_index(tbl_oid, col, &key_type);
+	if (!idx_oid)
+		PG_RETURN_NULL();
+
+	gbox = spatial_index_read_extent(idx_oid, key_type);
+	if (!gbox)
+		PG_RETURN_NULL();
+	else
+		PG_RETURN_POINTER(gbox);
+}
+

Modified: trunk/postgis/gserialized_gist_2d.c
===================================================================
--- trunk/postgis/gserialized_gist_2d.c	2017-12-12 22:16:55 UTC (rev 16148)
+++ trunk/postgis/gserialized_gist_2d.c	2017-12-12 22:53:48 UTC (rev 16149)
@@ -145,7 +145,7 @@
 }
 
 /* Allocate a new copy of BOX2DF */
-static BOX2DF* box2df_copy(BOX2DF *b)
+BOX2DF* box2df_copy(BOX2DF *b)
 {
 	BOX2DF *c = (BOX2DF*)palloc(sizeof(BOX2DF));
 	memcpy((void*)c, (void*)b, sizeof(BOX2DF));
@@ -157,7 +157,7 @@
 
 /* Enlarge b_union to contain b_new. If b_new contains more
    dimensions than b_union, expand b_union to contain those dimensions. */
-static void box2df_merge(BOX2DF *b_union, BOX2DF *b_new)
+void box2df_merge(BOX2DF *b_union, BOX2DF *b_new)
 {
 
 	POSTGIS_DEBUGF(5, "merging %s with %s", box2df_to_string(b_union), box2df_to_string(b_new));
@@ -289,6 +289,16 @@
 	return LW_SUCCESS;
 }
 
+int box2df_to_gbox_p(BOX2DF *a, GBOX *box)
+{
+	memset(box, 0, sizeof(GBOX));
+	box->xmin = a->xmin;
+	box->xmax = a->xmax;
+	box->ymin = a->ymin;
+	box->ymax = a->ymax;
+	return LW_SUCCESS;
+}
+
 /***********************************************************************
 ** BOX3DF tests for 2D index operators.
 */

Modified: trunk/postgis/gserialized_gist_nd.c
===================================================================
--- trunk/postgis/gserialized_gist_nd.c	2017-12-12 22:16:55 UTC (rev 16148)
+++ trunk/postgis/gserialized_gist_nd.c	2017-12-12 22:53:48 UTC (rev 16149)
@@ -123,7 +123,7 @@
 
 
 /* Allocate a new copy of GIDX */
-static GIDX* gidx_copy(GIDX *b)
+GIDX* gidx_copy(GIDX *b)
 {
 	GIDX *c = (GIDX*)palloc(VARSIZE(b));
 	POSTGIS_DEBUGF(5, "copied gidx (%p) to gidx (%p)", b, c);
@@ -170,7 +170,7 @@
 
 /* Enlarge b_union to contain b_new. If b_new contains more
    dimensions than b_union, expand b_union to contain those dimensions. */
-static void gidx_merge(GIDX **b_union, GIDX *b_new)
+void gidx_merge(GIDX **b_union, GIDX *b_new)
 {
 	int i, dims_union, dims_new;
 	Assert(b_union);

Modified: trunk/postgis/lwgeom_transform.c
===================================================================
--- trunk/postgis/lwgeom_transform.c	2017-12-12 22:16:55 UTC (rev 16148)
+++ trunk/postgis/lwgeom_transform.c	2017-12-12 22:53:48 UTC (rev 16149)
@@ -62,12 +62,6 @@
 	geom = PG_GETARG_GSERIALIZED_P_COPY(0);
 	input_srid = gserialized_get_srid(geom);
 
-	/* Moved this back down after input_srid set otherwise FreeBSD 11 (bessie goes nuts)
-	  Refer to https://trac.osgeo.org/postgis/ticket/3940 */
-	/* Input SRID and output SRID are equal, noop */
-	if ( input_srid == output_srid )
-		PG_RETURN_POINTER(PG_GETARG_DATUM(0));
-
 	if ( input_srid == SRID_UNKNOWN )
 	{
 		PG_FREE_IF_COPY(geom, 0);
@@ -75,8 +69,10 @@
 		PG_RETURN_NULL();
 	}
 
+	/* Input SRID and output SRID are equal, noop */
+	if ( input_srid == output_srid )
+		PG_RETURN_POINTER(PG_GETARG_DATUM(0));
 
-
 	if ( GetProjectionsUsingFCInfo(fcinfo, input_srid, output_srid, &input_pj, &output_pj) == LW_FAILURE )
 	{
 		PG_FREE_IF_COPY(geom, 0);

Modified: trunk/postgis/postgis.sql.in
===================================================================
--- trunk/postgis/postgis.sql.in	2017-12-12 22:16:55 UTC (rev 16148)
+++ trunk/postgis/postgis.sql.in	2017-12-12 22:53:48 UTC (rev 16149)
@@ -512,6 +512,14 @@
 	AS 'MODULE_PATHNAME', '_postgis_gserialized_stats'
 	LANGUAGE 'c' STRICT _PARALLEL;
 
+-- Availability: 2.5.0
+-- Given a table and a column, returns the extent of all boxes in the
+-- first page of the index (the head of the index)
+CREATE OR REPLACE FUNCTION _postgis_index_extent(tbl regclass, col text)
+	RETURNS box2d
+	AS 'MODULE_PATHNAME','_postgis_gserialized_index_extent'
+	LANGUAGE 'c' STABLE STRICT;
+
 -- Availability: 2.1.0
 CREATE OR REPLACE FUNCTION gserialized_gist_sel_2d (internal, oid, internal, int4)
 	RETURNS float8

Modified: trunk/regress/estimatedextent.sql
===================================================================
--- trunk/regress/estimatedextent.sql	2017-12-12 22:16:55 UTC (rev 16148)
+++ trunk/regress/estimatedextent.sql	2017-12-12 22:53:48 UTC (rev 16149)
@@ -3,7 +3,6 @@
 select '#877.1', ST_EstimatedExtent('t','g');
 analyze t;
 select '#877.2', ST_EstimatedExtent('public', 't','g');
-SET client_min_messages TO DEBUG;
 select '#877.2.deprecated', ST_Estimated_Extent('public', 't','g');
 SET client_min_messages TO NOTICE;
 insert into t(g) values ('LINESTRING(-10 -50, 20 30)');
@@ -188,3 +187,18 @@
 
 drop table p cascade;
 
+
+-- create table test (id serial primary key, geom1 geometry, geom2 geometry);
+-- create index test_x1 on test using gist (geom1);
+-- create index test_x2 on test using gist (geom2);
+-- select _postgis_gserialized_index_extent('test', 'geom1');
+-- select _postgis_gserialized_index_extent('test', 'geom2');
+-- insert into test (geom1, geom2) select NULL, NULL;
+-- insert into test (geom1, geom2) select 'POINT EMPTY', 'LINESTRING EMPTY';
+-- insert into test (geom1, geom2) select 'POINT EMPTY', 'LINESTRING EMPTY';
+-- select _postgis_gserialized_index_extent('test', 'geom1');
+-- select _postgis_gserialized_index_extent('test', 'geom2');
+-- insert into test (geom1, geom2) select st_makepoint(s, s), st_makepoint(2*s, 2*s) from generate_series(-100,100) s;
+-- select _postgis_gserialized_index_extent('test', 'geom1');
+-- select _postgis_gserialized_index_extent('test', 'geom2');
+



More information about the postgis-tickets mailing list