[postgis-tickets] r17070 - Stabilize GiST ND indexes for mixed dimensions

Darafei komzpa at gmail.com
Sat Nov 24 01:30:36 PST 2018


Author: komzpa
Date: 2018-11-24 01:30:36 -0800 (Sat, 24 Nov 2018)
New Revision: 17070

Modified:
   trunk/NEWS
   trunk/postgis/gserialized_gist_nd.c
   trunk/regress/core/Makefile.in
   trunk/regress/core/regress_gist_index_nd.sql
   trunk/regress/core/regress_gist_index_nd_expected
Log:
Stabilize GiST ND indexes for mixed dimensions

Patch by Darafei Praliaskouski

Thanks to 
 Arthur Lesuisse for synthesizing test case, 
 Andrew Gierth for finding runaway memcpy,
 Raúl Marín for pointing to memory problem in index construction.

This is not backpatchable to 2.x, that requires separate solution.

References #4139
Closes https://github.com/postgis/postgis/pull/336


Modified: trunk/NEWS
===================================================================
--- trunk/NEWS	2018-11-24 05:35:03 UTC (rev 17069)
+++ trunk/NEWS	2018-11-24 09:30:36 UTC (rev 17070)
@@ -43,6 +43,8 @@
   - #4249, Fix undefined behaviour in raster intersection (Raúl Marín)
   - #4246, Fix undefined behaviour in ST_3DDistance (Raúl Marín)
   - #4244, Avoid unaligned memory access in BOX2D_out (Raúl Marín)
+  - #4139, Make mixed-dimension ND index build tree correctly (Darafei Praliaskouski,
+           Arthur Lesuisse, Andrew Gierth, Raúl Marín)
 
 PostGIS 2.5.0
 2018/09/23

Modified: trunk/postgis/gserialized_gist_nd.c
===================================================================
--- trunk/postgis/gserialized_gist_nd.c	2018-11-24 05:35:03 UTC (rev 17069)
+++ trunk/postgis/gserialized_gist_nd.c	2018-11-24 09:30:36 UTC (rev 17070)
@@ -154,8 +154,7 @@
 	SET_VARSIZE(a, VARHDRSZ);
 }
 
-/* Enlarge b_union to contain b_new. If b_new contains more
-   dimensions than b_union, expand b_union to contain those dimensions. */
+/* Enlarge b_union to contain b_new. */
 void
 gidx_merge(GIDX **b_union, GIDX *b_new)
 {
@@ -165,10 +164,12 @@
 	Assert(b_new);
 
 	/* Can't merge an unknown into any thing */
+	/* Q: Unknown is 0 dimensions. Should we reset result to unknown instead? (ticket #4232) */
 	if (gidx_is_unknown(b_new))
 		return;
 
 	/* Merge of unknown and known is known */
+	/* Q: Unknown is 0 dimensions. Should we never modify unknown instead? (ticket #4232) */
 	if (gidx_is_unknown(*b_union))
 	{
 		*b_union = b_new;
@@ -178,17 +179,16 @@
 	dims_union = GIDX_NDIMS(*b_union);
 	dims_new = GIDX_NDIMS(b_new);
 
-	POSTGIS_DEBUGF(4, "merging gidx (%s) into gidx (%s)", gidx_to_string(b_new), gidx_to_string(*b_union));
-
-	if (dims_new > dims_union)
+	/* Shrink unshared dimensions.
+	 * Unset dimension is essentially [-FLT_MAX, FLT_MAX], so we can either trim it or reset to that range.*/
+	if (dims_new < dims_union)
 	{
-		POSTGIS_DEBUGF(5, "reallocating b_union from %d dims to %d dims", dims_union, dims_new);
 		*b_union = (GIDX *)repalloc(*b_union, GIDX_SIZE(dims_new));
 		SET_VARSIZE(*b_union, VARSIZE(b_new));
 		dims_union = dims_new;
 	}
 
-	for (i = 0; i < dims_new; i++)
+	for (i = 0; i < dims_union; i++)
 	{
 		/* Adjust minimums */
 		GIDX_SET_MIN(*b_union, i, Min(GIDX_GET_MIN(*b_union, i), GIDX_GET_MIN(b_new, i)));
@@ -195,9 +195,6 @@
 		/* Adjust maximums */
 		GIDX_SET_MAX(*b_union, i, Max(GIDX_GET_MAX(*b_union, i), GIDX_GET_MAX(b_new, i)));
 	}
-
-	POSTGIS_DEBUGF(5, "merge complete (%s)", gidx_to_string(*b_union));
-	return;
 }
 
 /* Calculate the volume (in n-d units) of the GIDX */
@@ -1071,7 +1068,7 @@
 		retval = false;
 	}
 
-	return (retval);
+	return retval;
 }
 
 /*
@@ -1105,7 +1102,7 @@
 		retval = false;
 	}
 
-	return (retval);
+	return retval;
 }
 
 /*
@@ -1426,7 +1423,11 @@
 	if (*pos)
 		gidx_merge(box_union, box_current);
 	else
-		memcpy((void *)(*box_union), (void *)box_current, VARSIZE(box_current));
+	{
+		pfree(*box_union);
+		*box_union = gidx_copy(box_current);
+	}
+
 	list[*pos] = num;
 	(*pos)++;
 }
@@ -1721,7 +1722,7 @@
 	** sides of the page union box can occasionally all end up in one place, leading
 	** to this condition.
 	*/
-	if (gserialized_gist_picksplit_badratios(pos, ndims_pageunion) == true)
+	if (gserialized_gist_picksplit_badratios(pos, ndims_pageunion))
 	{
 		/*
 		** Instead we split on center points and see if we do better.

Modified: trunk/regress/core/Makefile.in
===================================================================
--- trunk/regress/core/Makefile.in	2018-11-24 05:35:03 UTC (rev 17069)
+++ trunk/regress/core/Makefile.in	2018-11-24 09:30:36 UTC (rev 17070)
@@ -107,6 +107,7 @@
 	quantize_coordinates \
 	regress \
 	regress_bdpoly \
+	regress_gist_index_nd \
 	regress_index \
 	regress_index_nulls \
 	regress_management \
@@ -189,7 +190,6 @@
 	delaunaytriangles
 
 
-
 ifeq ($(INTERRUPTTESTS),yes)
 	# Allow CI servers to configure --with-interrupt-tests
 	TESTS += \
@@ -234,8 +234,7 @@
 	TESTS += \
 	regress_spgist_index_2d \
 	regress_spgist_index_3d \
-	regress_spgist_index_nd #\
-#	regress_gist_index_nd
+	regress_spgist_index_nd
 endif
 
 ifeq ($(HAVE_PROTOBUF),yes)

Modified: trunk/regress/core/regress_gist_index_nd.sql
===================================================================
--- trunk/regress/core/regress_gist_index_nd.sql	2018-11-24 05:35:03 UTC (rev 17069)
+++ trunk/regress/core/regress_gist_index_nd.sql	2018-11-24 09:30:36 UTC (rev 17070)
@@ -85,5 +85,4 @@
 
 DROP TABLE tbl_geomcollection_nd CASCADE;
 DROP TABLE test_gist_idx_nd CASCADE;
-DROP FUNCTION qnodes;
-
+DROP FUNCTION qnodes (text);

Modified: trunk/regress/core/regress_gist_index_nd_expected
===================================================================
--- trunk/regress/core/regress_gist_index_nd_expected	2018-11-24 05:35:03 UTC (rev 17069)
+++ trunk/regress/core/regress_gist_index_nd_expected	2018-11-24 09:30:36 UTC (rev 17070)
@@ -1,4 +1,4 @@
 &&&|180502|Seq Scan|180502|Index Scan
-~~ |35498|Seq Scan|35498|Index Scan
-@@ |35498|Seq Scan|35498|Index Scan
+~~ |39682|Seq Scan|39682|Index Scan
+@@ |39682|Seq Scan|39682|Index Scan
 ~~=|480|Seq Scan|480|Index Scan



More information about the postgis-tickets mailing list