[postgis-tickets] r16151 - _postgis_index_extent() for extent from index

Paul Ramsey pramsey at cleverelephant.ca
Wed Dec 13 12:51:36 PST 2017


Author: pramsey
Date: 2017-12-13 12:51:36 -0800 (Wed, 13 Dec 2017)
New Revision: 16151

Modified:
   trunk/NEWS
   trunk/libpgcommon/gserialized_gist.h
   trunk/postgis/gserialized_estimate.c
   trunk/postgis/gserialized_gist_2d.c
   trunk/postgis/gserialized_gist_nd.c
   trunk/regress/estimatedextent.sql
   trunk/regress/estimatedextent_expected
Log:
_postgis_index_extent() for extent from index 


Modified: trunk/NEWS
===================================================================
--- trunk/NEWS	2017-12-13 14:03:52 UTC (rev 16150)
+++ trunk/NEWS	2017-12-13 20:51:36 UTC (rev 16151)
@@ -6,6 +6,7 @@
   - #3564, ST_LineInterpolatePoints (Dan Baston)
   - #3896, PostGIS_Extensions_Upgrade()
   - #3913, Upgrade when creating extension from unpackaged (Sandro Santilli)
+  - #2256, _postgis_index_extent() for extent from index (Paul Ramsey)
 
 * Breaking Changes *
   - #3885, version number removed from address_standardize lib file

Modified: trunk/libpgcommon/gserialized_gist.h
===================================================================
--- trunk/libpgcommon/gserialized_gist.h	2017-12-13 14:03:52 UTC (rev 16150)
+++ trunk/libpgcommon/gserialized_gist.h	2017-12-13 20:51:36 UTC (rev 16151)
@@ -66,6 +66,8 @@
 /* Increase the size of a GIDX */
 void gidx_expand(GIDX *a, float d);
 
+/* Note empty GIDX */
+bool gidx_is_unknown(const GIDX *a);
 
 /* Generate human readable form for GIDX. */
 char* gidx_to_string(GIDX *a) ;
@@ -99,6 +101,9 @@
 /* Grow the first argument to contain the second */
 void gidx_merge(GIDX **b_union, GIDX *b_new);
 
+/* Note empty BOX2DF */
+bool box2df_is_empty(const BOX2DF *a);
+
 /* Fill in a gbox from a GIDX */
 void gbox_from_gidx(GIDX *a, GBOX *gbox, int flags);
 

Modified: trunk/postgis/gserialized_estimate.c
===================================================================
--- trunk/postgis/gserialized_estimate.c	2017-12-13 14:03:52 UTC (rev 16150)
+++ trunk/postgis/gserialized_estimate.c	2017-12-13 20:51:36 UTC (rev 16151)
@@ -2327,7 +2327,7 @@
 		PG_RETURN_NULL();
 	}
 
-#if 0
+#if 1
 	/* Read the extent from the head of the spatial index, if there is one */
 	idx_oid = table_get_spatial_index(tbl_oid, col, &key_type);
 	if (!idx_oid)
@@ -2525,11 +2525,15 @@
 
 	if (key_type == STATISTIC_SLOT_2D && bounds_2df)
 	{
+		if (box2df_is_empty(bounds_2df))
+			return NULL;
 		gbox = gbox_new(0);
 		box2df_to_gbox_p(bounds_2df, gbox);
 	}
 	else if (key_type == STATISTIC_SLOT_ND && bounds_gidx)
 	{
+		if (gidx_is_unknown(bounds_gidx))
+			return NULL;
 		gbox = gbox_new(0);
 		gbox_from_gidx(bounds_gidx, gbox, 0);
 	}

Modified: trunk/postgis/gserialized_gist_2d.c
===================================================================
--- trunk/postgis/gserialized_gist_2d.c	2017-12-13 14:03:52 UTC (rev 16150)
+++ trunk/postgis/gserialized_gist_2d.c	2017-12-13 20:51:36 UTC (rev 16151)
@@ -153,8 +153,33 @@
 	return c;
 }
 
+inline bool box2df_is_empty(const BOX2DF *a)
+{
+	if (isnan(a->xmin))
+		return true;
+	else
+		return false;
+}
 
+static inline void box2df_set_empty(BOX2DF *a)
+{
+	a->xmin = a->xmax = a->ymin = a->ymax = NAN;
+	return;
+}
 
+static inline void box2df_set_finite(BOX2DF *a)
+{
+	if ( ! isfinite(a->xmax) )
+		a->xmax = FLT_MAX;
+	if ( ! isfinite(a->ymax) )
+		a->ymax = FLT_MAX;
+	if ( ! isfinite(a->ymin) )
+		a->ymin = -1*FLT_MAX;
+	if ( ! isfinite(a->xmin) )
+		a->xmin = -1*FLT_MAX;
+	return;
+}
+
 /* Enlarge b_union to contain b_new. If b_new contains more
    dimensions than b_union, expand b_union to contain those dimensions. */
 void box2df_merge(BOX2DF *b_union, BOX2DF *b_new)
@@ -202,7 +227,7 @@
 {
 	float result;
 
-	if ( a == NULL )
+	if ( a == NULL || box2df_is_empty(a) )
 		return (float)0.0;
 
 	if ( (a->xmax <= a->xmin) || (a->ymax <= a->ymin) )
@@ -219,7 +244,7 @@
 
 static float box2df_edge(const BOX2DF *a)
 {
-	if ( a == NULL )
+	if ( a == NULL || box2df_is_empty(a) )
 		return (float)0.0;
 
 	return ((a->xmax) - (a->xmin)) + ((a->ymax) - (a->ymin));
@@ -237,10 +262,10 @@
 		return 0.0;
 	}
 
-	if ( a == NULL )
+	if ( a == NULL || box2df_is_empty(a) )
 		return box2df_size(b);
 
-	if ( b == NULL )
+	if ( b == NULL || box2df_is_empty(b) )
 		return box2df_size(a);
 
 	result = ((double)Max(a->xmax,b->xmax) - (double)Min(a->xmin,b->xmin)) *
@@ -264,10 +289,10 @@
 		return 0.0;
 	}
 
-	if ( a == NULL )
+	if ( a == NULL || box2df_is_empty(a) )
 		return box2df_edge(b);
 
-	if ( b == NULL )
+	if ( b == NULL || box2df_is_empty(b) )
 		return box2df_edge(a);
 
 	result = (Max(a->xmax,b->xmax) - Min(a->xmin,b->xmin)) +
@@ -308,6 +333,10 @@
 {
 	float tmp;
 	POSTGIS_DEBUGF(5,"validating box2df (%s)", box2df_to_string(b));
+
+	if ( box2df_is_empty(b) )
+		return;
+
 	if ( b->xmax < b->xmin )
 	{
 		tmp = b->xmin;
@@ -325,7 +354,8 @@
 
 static bool box2df_overlaps(const BOX2DF *a, const BOX2DF *b)
 {
-	if ( ! a || ! b ) return false; /* TODO: might be smarter for EMPTY */
+	if ( !a || !b || box2df_is_empty(a) || box2df_is_empty(b) )
+		return false;
 
 	if ( (a->xmin > b->xmax) || (b->xmin > a->xmax) ||
 	     (a->ymin > b->ymax) || (b->ymin > a->ymax) )
@@ -338,8 +368,13 @@
 
 bool box2df_contains(const BOX2DF *a, const BOX2DF *b)
 {
-	if ( ! a || ! b ) return false; /* TODO: might be smarter for EMPTY */
+	if ( !a || !b )
+		return false;
 
+	/* All things can contain EMPTY (except EMPTY) */
+	if ( box2df_is_empty(b) && ! box2df_is_empty(a) )
+		return true;
+
 	if ( (a->xmin > b->xmin) || (a->xmax < b->xmax) ||
 	     (a->ymin > b->ymin) || (a->ymax < b->ymax) )
 	{
@@ -351,33 +386,37 @@
 
 static bool box2df_within(const BOX2DF *a, const BOX2DF *b)
 {
-	if ( ! a || ! b ) return false; /* TODO: might be smarter for EMPTY */
+	if ( !a || !b )
+		return false;
 
+	/* EMPTY is within all other things (except EMPTY) */
+	if ( box2df_is_empty(a) && ! box2df_is_empty(b) )
+		return true;
+
 	POSTGIS_DEBUG(5, "entered function");
 	return box2df_contains(b,a);
 }
 
 static bool box2df_equals(const BOX2DF *a, const BOX2DF *b)
 {
-	if ( a &&  b ) {
-		if ( (a->xmin != b->xmin) || (a->xmax != b->xmax) ||
-		     (a->ymin != b->ymin) || (a->ymax != b->ymax) )
-		{
-			return false;
-		}
+	if ( !a && !b )
 		return true;
-	} else if ( a || b ) {
-		/* one empty, one not */
+	else if ( !a || !b )
 		return false;
-	} else {
-		/* both empty */
+	else if ( box2df_is_empty(a) && box2df_is_empty(b) )
 		return true;
-	}
+	else if ( box2df_is_empty(a) || box2df_is_empty(b) )
+		return false;
+	else if ((a->xmin == b->xmin) && (a->xmax == b->xmax) && (a->ymin == b->ymin) && (a->ymax == b->ymax))
+		return true;
+	else
+		return false;
 }
 
 static bool box2df_overleft(const BOX2DF *a, const BOX2DF *b)
 {
-	if ( ! a || ! b ) return false; /* TODO: might be smarter for EMPTY */
+	if ( !a || !b || box2df_is_empty(a) || box2df_is_empty(b) )
+		return false;
 
 	/* a.xmax <= b.xmax */
 	return a->xmax <= b->xmax;
@@ -385,7 +424,8 @@
 
 static bool box2df_left(const BOX2DF *a, const BOX2DF *b)
 {
-	if ( ! a || ! b ) return false; /* TODO: might be smarter for EMPTY */
+	if ( !a || !b || box2df_is_empty(a) || box2df_is_empty(b) )
+		return false;
 
 	/* a.xmax < b.xmin */
 	return a->xmax < b->xmin;
@@ -393,7 +433,8 @@
 
 static bool box2df_right(const BOX2DF *a, const BOX2DF *b)
 {
-	if ( ! a || ! b ) return false; /* TODO: might be smarter for EMPTY */
+	if ( !a || !b || box2df_is_empty(a) || box2df_is_empty(b) )
+		return false;
 
 	/* a.xmin > b.xmax */
 	return a->xmin > b->xmax;
@@ -401,7 +442,8 @@
 
 static bool box2df_overright(const BOX2DF *a, const BOX2DF *b)
 {
-	if ( ! a || ! b ) return false; /* TODO: might be smarter for EMPTY */
+	if ( !a || !b || box2df_is_empty(a) || box2df_is_empty(b) )
+		return false;
 
 	/* a.xmin >= b.xmin */
 	return a->xmin >= b->xmin;
@@ -409,7 +451,8 @@
 
 static bool box2df_overbelow(const BOX2DF *a, const BOX2DF *b)
 {
-	if ( ! a || ! b ) return false; /* TODO: might be smarter for EMPTY */
+	if ( !a || !b || box2df_is_empty(a) || box2df_is_empty(b) )
+		return false;
 
 	/* a.ymax <= b.ymax */
 	return a->ymax <= b->ymax;
@@ -417,7 +460,8 @@
 
 static bool box2df_below(const BOX2DF *a, const BOX2DF *b)
 {
-	if ( ! a || ! b ) return false; /* TODO: might be smarter for EMPTY */
+	if ( !a || !b || box2df_is_empty(a) || box2df_is_empty(b) )
+		return false;
 
 	/* a.ymax < b.ymin */
 	return a->ymax < b->ymin;
@@ -425,7 +469,8 @@
 
 static bool box2df_above(const BOX2DF *a, const BOX2DF *b)
 {
-	if ( ! a || ! b ) return false; /* TODO: might be smarter for EMPTY */
+	if ( !a || !b || box2df_is_empty(a) || box2df_is_empty(b) )
+		return false;
 
 	/* a.ymin > b.ymax */
 	return a->ymin > b->ymax;
@@ -433,7 +478,8 @@
 
 static bool box2df_overabove(const BOX2DF *a, const BOX2DF *b)
 {
-	if ( ! a || ! b ) return false; /* TODO: might be smarter for EMPTY */
+	if ( !a || !b || box2df_is_empty(a) || box2df_is_empty(b) )
+		return false;
 
 	/* a.ymin >= b.ymin */
 	return a->ymin >= b->ymin;
@@ -955,8 +1001,12 @@
 	/* Is the bounding box valid (non-empty, non-infinite)? If not, return input uncompressed. */
 	if ( result == LW_FAILURE )
 	{
+		box2df_set_empty(&bbox_out);
+		gistentryinit(*entry_out, PointerGetDatum(box2df_copy(&bbox_out)),
+		              entry_in->rel, entry_in->page, entry_in->offset, false);
+
 		POSTGIS_DEBUG(4, "[GIST] empty geometry!");
-		PG_RETURN_POINTER(entry_in);
+		PG_RETURN_POINTER(entry_out);
 	}
 
 	POSTGIS_DEBUGF(4, "[GIST] got entry_in->key: %s", box2df_to_string(&bbox_out));
@@ -965,8 +1015,12 @@
 	if ( ! isfinite(bbox_out.xmax) || ! isfinite(bbox_out.xmin) ||
 	     ! isfinite(bbox_out.ymax) || ! isfinite(bbox_out.ymin) )
 	{
+		box2df_set_finite(&bbox_out);
+		gistentryinit(*entry_out, PointerGetDatum(box2df_copy(&bbox_out)),
+		              entry_in->rel, entry_in->page, entry_in->offset, false);
+
 		POSTGIS_DEBUG(4, "[GIST] infinite geometry!");
-		PG_RETURN_POINTER(entry_in);
+		PG_RETURN_POINTER(entry_out);
 	}
 
 	/* Enure bounding box has minimums below maximums. */

Modified: trunk/postgis/gserialized_gist_nd.c
===================================================================
--- trunk/postgis/gserialized_gist_nd.c	2017-12-13 14:03:52 UTC (rev 16150)
+++ trunk/postgis/gserialized_gist_nd.c	2017-12-13 20:51:36 UTC (rev 16151)
@@ -154,7 +154,7 @@
 /* An "unknown" GIDX is used to represent the bounds of an EMPTY
    geometry or other-wise unindexable geometry (like one with NaN
    or Inf bounds) */
-static inline bool gidx_is_unknown(const GIDX *a)
+inline bool gidx_is_unknown(const GIDX *a)
 {
 	size_t size = VARSIZE(a) - VARHDRSZ;
 	/* "unknown" gidx objects have a too-small size of one float */
@@ -168,6 +168,11 @@
 	SET_VARSIZE(a, VARHDRSZ);
 }
 
+static inline void gidx_set_finite(GIDX *a)
+{
+	SET_VARSIZE(a, VARHDRSZ);
+}
+
 /* Enlarge b_union to contain b_new. If b_new contains more
    dimensions than b_union, expand b_union to contain those dimensions. */
 void gidx_merge(GIDX **b_union, GIDX *b_new)

Modified: trunk/regress/estimatedextent.sql
===================================================================
--- trunk/regress/estimatedextent.sql	2017-12-13 14:03:52 UTC (rev 16150)
+++ trunk/regress/estimatedextent.sql	2017-12-13 20:51:36 UTC (rev 16151)
@@ -187,18 +187,29 @@
 
 drop table p cascade;
 
+--
+-- Index assisted extent generation
+--
+create table test (id serial primary key, geom1 geometry, geom2 geometry);
+create index test_x1 on test using gist (geom1);
+create index test_x2 on test using gist (geom2);
+select '1.a null', _postgis_index_extent('test', 'geom1');
+select '1.b null', _postgis_index_extent('test', 'geom2');
+insert into test (geom1, geom2) select NULL, NULL;
+insert into test (geom1, geom2) select 'POINT EMPTY', 'LINESTRING EMPTY';
+select '2.a null', _postgis_index_extent('test', 'geom1');
+select '2.b null', _postgis_index_extent('test', 'geom2');
+insert into test (geom1, geom2) select 'POINT EMPTY', 'LINESTRING EMPTY' from generate_series(0,1024);
+select '3.a null', _postgis_index_extent('test', 'geom1');
+select '3.b null', _postgis_index_extent('test', 'geom2');
+insert into test (geom1, geom2) select st_makepoint(s, s), st_makepoint(2*s, 2*s) from generate_series(-100,100) s;
+select '4.a box',_postgis_index_extent('test', 'geom1');
+select '4.b box',_postgis_index_extent('test', 'geom2');
+delete from test;
+select '5.a bad-box',_postgis_index_extent('test', 'geom1');
+select '5.b bad-box',_postgis_index_extent('test', 'geom2');
+vacuum full test;
+select '6.a null', _postgis_index_extent('test', 'geom1');
+select '6.b null', _postgis_index_extent('test', 'geom2');
+drop table test cascade;
 
--- create table test (id serial primary key, geom1 geometry, geom2 geometry);
--- create index test_x1 on test using gist (geom1);
--- create index test_x2 on test using gist (geom2);
--- select _postgis_gserialized_index_extent('test', 'geom1');
--- select _postgis_gserialized_index_extent('test', 'geom2');
--- insert into test (geom1, geom2) select NULL, NULL;
--- insert into test (geom1, geom2) select 'POINT EMPTY', 'LINESTRING EMPTY';
--- insert into test (geom1, geom2) select 'POINT EMPTY', 'LINESTRING EMPTY';
--- select _postgis_gserialized_index_extent('test', 'geom1');
--- select _postgis_gserialized_index_extent('test', 'geom2');
--- insert into test (geom1, geom2) select st_makepoint(s, s), st_makepoint(2*s, 2*s) from generate_series(-100,100) s;
--- select _postgis_gserialized_index_extent('test', 'geom1');
--- select _postgis_gserialized_index_extent('test', 'geom2');
-

Modified: trunk/regress/estimatedextent_expected
===================================================================
--- trunk/regress/estimatedextent_expected	2017-12-13 14:03:52 UTC (rev 16150)
+++ trunk/regress/estimatedextent_expected	2017-12-13 20:51:36 UTC (rev 16151)
@@ -39,3 +39,15 @@
 #3391.19|0.00|1.00|0.00|1.00
 #3391.20|0.00|1.00|0.00|1.00
 NOTICE:  drop cascades to 2 other objects
+1.a null|
+1.b null|
+2.a null|
+2.b null|
+3.a null|
+3.b null|
+4.a box|BOX(-100 -100,100 100)
+4.b box|BOX(-200 -200,200 200)
+5.a bad-box|BOX(-100 -100,100 100)
+5.b bad-box|BOX(-200 -200,200 200)
+6.a null|
+6.b null|



More information about the postgis-tickets mailing list