[postgis-tickets] r17565 - Rewrite GiST penalty.

Darafei komzpa at gmail.com
Fri Jun 28 03:00:19 PDT 2019


Author: komzpa
Date: 2019-06-28 03:00:18 -0700 (Fri, 28 Jun 2019)
New Revision: 17565

Modified:
   trunk/.clang-format
   trunk/NEWS
   trunk/postgis/gserialized_gist_2d.c
   trunk/postgis/gserialized_gist_nd.c
Log:
Rewrite GiST penalty.

Use value of 0 as often as possible, as it's special code path in Postgres.
Fix picksplit to use correct penalty function.
Drop dead code.

Closes #4441
Closes https://github.com/postgis/postgis/pull/425




Modified: trunk/.clang-format
===================================================================
--- trunk/.clang-format	2019-06-28 09:42:49 UTC (rev 17564)
+++ trunk/.clang-format	2019-06-28 10:00:18 UTC (rev 17565)
@@ -26,7 +26,7 @@
   AfterFunction:   true
   AfterNamespace:  false
   AfterObjCDeclaration: false
-  AfterStruct:     true
+  AfterStruct:     false
   AfterUnion:      false
   AfterExternBlock: false
   BeforeCatch:     true

Modified: trunk/NEWS
===================================================================
--- trunk/NEWS	2019-06-28 09:42:49 UTC (rev 17564)
+++ trunk/NEWS	2019-06-28 10:00:18 UTC (rev 17565)
@@ -183,6 +183,8 @@
   - #4327, Avoid pfree'ing the result of getenv (Raúl Marín)
   - #4406, Throw on invalid characters when decoding geohash (Raúl Marín)
   - #4394, Allow FULL OUTER JOIN on geometry (Darafei Praliaskouski)
+  - #4441, Make GiST penalty friendly to multi-column indexes and build single-column 
+           ones faster. (Darafei Praliaskouski)
 
 
 PostGIS 2.5.0

Modified: trunk/postgis/gserialized_gist_2d.c
===================================================================
--- trunk/postgis/gserialized_gist_2d.c	2019-06-28 09:42:49 UTC (rev 17564)
+++ trunk/postgis/gserialized_gist_2d.c	2019-06-28 10:00:18 UTC (rev 17565)
@@ -19,11 +19,10 @@
  **********************************************************************
  *
  * Copyright 2009 Paul Ramsey <pramsey at cleverelephant.ca>
- * Copyright 2017 Darafei Praliaskouski <me at komzpa.net>
+ * Copyright 2017-2019 Darafei Praliaskouski <me at komzpa.net>
  *
  **********************************************************************/
 
-
 /*
 ** R-Tree Bibliography
 **
@@ -50,7 +49,6 @@
 #include "lwgeom_pg.h"       /* For debugging macros. */
 #include "gserialized_gist.h"	     /* For utility functions. */
 
-
 #include <float.h> /* For FLT_MAX */
 #include <math.h>
 
@@ -61,12 +59,6 @@
 #define LIMIT_RATIO 0.1
 
 /*
-** 0 == don't use it
-** 1 == use it
-*/
-#define KOROTKOV_SPLIT 1
-
-/*
 ** For debugging
 */
 #if POSTGIS_DEBUG_LEVEL > 0
@@ -170,8 +162,7 @@
 	return;
 }
 
-/* Enlarge b_union to contain b_new. If b_new contains more
-   dimensions than b_union, expand b_union to contain those dimensions. */
+/* Enlarge b_union to contain b_new. */
 void box2df_merge(BOX2DF *b_union, BOX2DF *b_new)
 {
 
@@ -191,108 +182,7 @@
 	return;
 }
 
-#if KOROTKOV_SPLIT < 1
-static bool box2df_intersection(const BOX2DF *a, const BOX2DF *b, BOX2DF *n)
-{
-	POSTGIS_DEBUGF(5, "calculating intersection of %s with %s", box2df_to_string(a), box2df_to_string(b));
 
-	if( a == NULL || b == NULL || n == NULL )
-		return false;
-
-	n->xmax = Min(a->xmax, b->xmax);
-	n->ymax = Min(a->ymax, b->ymax);
-	n->xmin = Max(a->xmin, b->xmin);
-	n->ymin = Max(a->ymin, b->ymin);
-
-	POSTGIS_DEBUGF(5, "intersection is %s", box2df_to_string(n));
-
-	if ( (n->xmax < n->xmin) || (n->ymax < n->ymin) )
-		return false;
-
-	return true;
-}
-#endif
-
-static float box2df_size(const BOX2DF *a)
-{
-	float result;
-
-	if ( a == NULL || box2df_is_empty(a) )
-		return (float)0.0;
-
-	if ( (a->xmax <= a->xmin) || (a->ymax <= a->ymin) )
-	{
-		result =  (float) 0.0;
-	}
-	else
-	{
-		result = (((double) a->xmax)-((double) a->xmin)) * (((double) a->ymax)-((double) a->ymin));
-	}
-
-	return result;
-}
-
-static float box2df_edge(const BOX2DF *a)
-{
-	if ( a == NULL || box2df_is_empty(a) )
-		return (float)0.0;
-
-	return ((a->xmax) - (a->xmin)) + ((a->ymax) - (a->ymin));
-}
-
-static float box2df_union_size(const BOX2DF *a, const BOX2DF *b)
-{
-	float result;
-
-	POSTGIS_DEBUG(5,"entered function");
-
-	if ( a == NULL && b == NULL )
-	{
-		elog(ERROR, "box2df_union_size received two null arguments");
-		return 0.0;
-	}
-
-	if ( a == NULL || box2df_is_empty(a) )
-		return box2df_size(b);
-
-	if ( b == NULL || box2df_is_empty(b) )
-		return box2df_size(a);
-
-	result = ((double)Max(a->xmax,b->xmax) - (double)Min(a->xmin,b->xmin)) *
- 	         ((double)Max(a->ymax,b->ymax) - (double)Min(a->ymin,b->ymin));
-
-	POSTGIS_DEBUGF(5, "union size of %s and %s is %.8g", box2df_to_string(a), box2df_to_string(b), result);
-
-	return result;
-}
-
-
-static float box2df_union_edge(const BOX2DF *a, const BOX2DF *b)
-{
-	float result;
-
-	POSTGIS_DEBUG(5,"entered function");
-
-	if ( a == NULL && b == NULL )
-	{
-		elog(ERROR, "box2df_union_edge received two null arguments");
-		return 0.0;
-	}
-
-	if ( a == NULL || box2df_is_empty(a) )
-		return box2df_edge(b);
-
-	if ( b == NULL || box2df_is_empty(b) )
-		return box2df_edge(a);
-
-	result = (Max(a->xmax,b->xmax) - Min(a->xmin,b->xmin)) +
- 	         (Max(a->ymax,b->ymax) - Min(a->ymin,b->ymin));
-
-	POSTGIS_DEBUGF(5, "union edge of %s and %s is %.8g", box2df_to_string(a), box2df_to_string(b), result);
-
-	return result;
-}
-
 /* Convert a double-based GBOX into a float-based BOX2DF,
    ensuring the float box is larger than the double box */
 static inline int box2df_from_gbox_p(GBOX *box, BOX2DF *a)
@@ -315,7 +205,7 @@
 }
 
 /***********************************************************************
-** BOX3DF tests for 2D index operators.
+** BOX2DF tests for 2D index operators.
 */
 
 /* Ensure all minimums are below maximums. */
@@ -322,7 +212,6 @@
 inline void box2df_validate(BOX2DF *b)
 {
 	float tmp;
-	POSTGIS_DEBUGF(5,"validating box2df (%s)", box2df_to_string(b));
 
 	if ( box2df_is_empty(b) )
 		return;
@@ -1227,33 +1116,68 @@
 }
 
 /*
-** Function to pack floats of different realms
-** This function serves to pack bit flags inside float type
-** Resulted value represent can be from four different "realms"
-** Every value from realm 3 is greater than any value from realms 2, 1 and 0.
-** Every value from realm 2 is less than every value from realm 3 and greater
-** than any value from realm 1 and 0, and so on. Values from the same realm
-** loose two bits of precision. This technique is possible due to floating
-** point numbers specification according to IEEE 754: exponent bits are highest
+** Function to pack floats of different realms.
+** This function serves to pack bit flags inside float type.
+** Result value represent can be from two different "realms".
+** Every value from realm 1 is greater than any value from realm 0.
+** Values from the same realm loose one bit of precision.
+**
+** This technique is possible due to floating point numbers specification
+** according to IEEE 754: exponent bits are highest
 ** (excluding sign bits, but here penalty is always positive). If float a is
 ** greater than float b, integer A with same bit representation as a is greater
 ** than integer B with same bits as b.
 */
-static float pack_float(const float value, const int realm)
+static inline float
+pack_float(const float value, const uint8_t realm)
 {
-  union {
-    float f;
-    struct { unsigned value:31, sign:1; } vbits;
-    struct { unsigned value:29, realm:2, sign:1; } rbits;
-  } a;
+	union {
+		float f;
+		struct {
+			unsigned value : 31, sign : 1;
+		} vbits;
+		struct {
+			unsigned value : 30, realm : 1, sign : 1;
+		} rbits;
+	} a;
 
-  a.f = value;
-  a.rbits.value = a.vbits.value >> 2;
-  a.rbits.realm = realm;
+	a.f = value;
+	a.rbits.value = a.vbits.value >> 1;
+	a.rbits.realm = realm;
 
-  return a.f;
+	return a.f;
 }
 
+static inline float
+box2df_penalty(const BOX2DF *b1, const BOX2DF *b2)
+{
+	float b1xmin = b1->xmin, b1xmax = b1->xmax;
+	float b1ymin = b1->ymin, b1ymax = b1->ymax;
+	float b2xmin = b2->xmin, b2xmax = b2->xmax;
+	float b2ymin = b2->ymin, b2ymax = b2->ymax;
+
+	float box_union_xmin = Min(b1xmin, b2xmin), box_union_xmax = Max(b1xmax, b2xmax);
+	float box_union_ymin = Min(b1ymin, b2ymin), box_union_ymax = Max(b1ymax, b2ymax);
+
+	float b1dx = b1xmax - b1xmin, b1dy = b1ymax - b1ymin;
+	float box_union_dx = box_union_xmax - box_union_xmin, box_union_dy = box_union_ymax - box_union_ymin;
+
+	float box_union_area = box_union_dx * box_union_dy, box1area = b1dx * b1dy;
+	float box_union_edge = box_union_dx + box_union_dy, box1edge = b1dx + b1dy;
+
+	float area_extension = box_union_area - box1area;
+	float edge_extension = box_union_edge - box1edge;
+
+	/* REALM 1: Area extension is nonzero, return it */
+	if (area_extension > FLT_EPSILON)
+		return pack_float(area_extension, 1);
+	/* REALM 0: Area extension is zero, return nonzero edge extension */
+	else if (edge_extension > FLT_EPSILON)
+		return pack_float(edge_extension, 0);
+
+	return 0;
+}
+
 /*
 ** GiST support function. Calculate the "penalty" cost of adding this entry into an existing entry.
 ** Calculate the change in volume of the old entry once the new entry is added.
@@ -1264,60 +1188,20 @@
 	GISTENTRY *origentry = (GISTENTRY*) PG_GETARG_POINTER(0);
 	GISTENTRY *newentry = (GISTENTRY*) PG_GETARG_POINTER(1);
 	float *result = (float*) PG_GETARG_POINTER(2);
-	BOX2DF *gbox_index_orig, *gbox_index_new;
-	float size_union, size_orig, edge_union, edge_orig;
+	BOX2DF *b1, *b2;
 
-	POSTGIS_DEBUG(4, "[GIST] 'penalty' function called");
+	b1 = (BOX2DF *)DatumGetPointer(origentry->key);
+	b2 = (BOX2DF *)DatumGetPointer(newentry->key);
 
-	gbox_index_orig = (BOX2DF*)DatumGetPointer(origentry->key);
-	gbox_index_new = (BOX2DF*)DatumGetPointer(newentry->key);
+	/* Penalty value of 0 has special code path in Postgres's gistchoose.
+	 * It is used as an early exit condition in subtree loop, allowing faster tree descend.
+	 * For multi-column index, it lets next column break the tie, possibly more confidently.
+	 */
+	*result = 0;
 
-	/* Drop out if we're dealing with null inputs. Shouldn't happen. */
-	if ( (gbox_index_orig == NULL) && (gbox_index_new == NULL) )
-	{
-		POSTGIS_DEBUG(4, "[GIST] both inputs NULL! returning penalty of zero");
-		*result = 0.0;
-		PG_RETURN_FLOAT8(*result);
-	}
+	if (b1 && b2 && !box2df_is_empty(b1) && !box2df_is_empty(b2))
+		*result = box2df_penalty(b1, b2);
 
-	/* Calculate the size difference of the boxes. */
-	size_union = box2df_union_size(gbox_index_orig, gbox_index_new);
-	size_orig = box2df_size(gbox_index_orig);
-	*result = size_union - size_orig;
-
-	/* REALM 0: No extension is required, volume is zero, return edge */
- 	/* REALM 1: No extension is required, return nonzero area */
- 	/* REALM 2: Area extension is zero, return nonzero edge extension */
- 	/* REALM 3: Area extension is nonzero, return it */
-
- 	if( *result == 0 )
- 	{
-		if (size_orig > 0)
-		{
-			*result = pack_float(size_orig, 1); /* REALM 1 */
-		}
-		else
-		{
-			edge_union = box2df_union_edge(gbox_index_orig, gbox_index_new);
-			edge_orig = box2df_edge(gbox_index_orig);
- 			*result = edge_union - edge_orig;
- 			if( *result == 0 )
-	 		{
-	 			*result = pack_float(edge_orig, 0); /* REALM 0 */
- 			}
- 			else
- 			{
- 				*result = pack_float(*result, 2); /* REALM 2 */
- 			}
-		}
- 	}
- 	else
- 	{
- 		*result = pack_float(*result, 3); /* REALM 3 */
- 	}
-
-	POSTGIS_DEBUGF(4, "[GIST] 'penalty', union size (%.12f), original size (%.12f), penalty (%.12f)", size_union, size_orig, *result);
-
 	PG_RETURN_POINTER(result);
 }
 
@@ -1371,7 +1255,6 @@
 	PG_RETURN_POINTER(result);
 }
 
-#if KOROTKOV_SPLIT > 0
 /*
  * Adjust BOX2DF b boundaries with insertion of addon.
  */
@@ -1684,23 +1567,6 @@
 }
 
 /*
- * Return increase of original BOX2DF area by new BOX2DF area insertion.
- */
-static float
-box_penalty(BOX2DF *original, BOX2DF *new)
-{
-	float		union_width,
-				union_height;
-
-	union_width = Max(original->xmax, new->xmax) -
-		Min(original->xmin, new->xmin);
-	union_height = Max(original->ymax, new->ymax) -
-		Min(original->ymin, new->ymin);
-	return union_width * union_height - (original->xmax - original->xmin) *
-		(original->ymax - original->ymin);
-}
-
-/*
  * Compare common entries by their deltas.
  */
 static int
@@ -2059,8 +1925,7 @@
 		{
 			box = (BOX2DF *) DatumGetPointer(entryvec->vector[
 												commonEntries[i].index].key);
-			commonEntries[i].delta = Abs(box_penalty(leftBox, box) -
-										 box_penalty(rightBox, box));
+			commonEntries[i].delta = Abs(box2df_penalty(leftBox, box) - box2df_penalty(rightBox, box));
 		}
 
 		/*
@@ -2088,7 +1953,7 @@
 			else
 			{
 				/* Otherwise select the group by minimal penalty */
-				if (box_penalty(leftBox, box) < box_penalty(rightBox, box))
+				if (box2df_penalty(leftBox, box) < box2df_penalty(rightBox, box))
 					PLACE_LEFT(box, commonEntries[i].index);
 				else
 					PLACE_RIGHT(box, commonEntries[i].index);
@@ -2103,269 +1968,8 @@
 	PG_RETURN_POINTER(v);
 }
 
-#else /* !KOROTOV_SPLIT */
-
-typedef struct
-{
-	BOX2DF *key;
-	int pos;
-}
-KBsort;
-
-static int
-compare_KB(const void* a, const void* b)
-{
-	BOX2DF *abox = ((KBsort*)a)->key;
-	BOX2DF *bbox = ((KBsort*)b)->key;
-	float sa = (abox->xmax - abox->xmin) * (abox->ymax - abox->ymin);
-	float sb = (bbox->xmax - bbox->xmin) * (bbox->ymax - bbox->ymin);
-
-	if ( sa==sb ) return 0;
-	return ( sa>sb ) ? 1 : -1;
-}
-
-/**
-** The GiST PickSplit method
-** New linear algorithm, see 'New Linear Node Splitting Algorithm for R-tree',
-** C.H.Ang and T.C.Tan
-*/
-PG_FUNCTION_INFO_V1(gserialized_gist_picksplit_2d);
-Datum gserialized_gist_picksplit_2d(PG_FUNCTION_ARGS)
-{
-	GistEntryVector	*entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
-
-	GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
-	OffsetNumber i;
-	OffsetNumber *listL, *listR, *listB, *listT;
-	BOX2DF *unionL, *unionR, *unionB, *unionT;
-	int posL, posR, posB, posT;
-	BOX2DF pageunion;
-	BOX2DF *cur;
-	char direction = ' ';
-	bool allisequal = true;
-	OffsetNumber maxoff;
-	int nbytes;
-
-	POSTGIS_DEBUG(3, "[GIST] 'picksplit' entered");
-
-	posL = posR = posB = posT = 0;
-
-	maxoff = entryvec->n - 1;
-	cur = (BOX2DF*) DatumGetPointer(entryvec->vector[FirstOffsetNumber].key);
-
-	memcpy((void *) &pageunion, (void *) cur, sizeof(BOX2DF));
-
-	/* find MBR */
-	for (i = OffsetNumberNext(FirstOffsetNumber); i <= maxoff; i = OffsetNumberNext(i))
-	{
-		cur = (BOX2DF *) DatumGetPointer(entryvec->vector[i].key);
-
-		if ( allisequal == true &&  (
-		            pageunion.xmax != cur->xmax ||
-		            pageunion.ymax != cur->ymax ||
-		            pageunion.xmin != cur->xmin ||
-		            pageunion.ymin != cur->ymin
-		        ) )
-			allisequal = false;
-
-		if (pageunion.xmax < cur->xmax)
-			pageunion.xmax = cur->xmax;
-		if (pageunion.xmin > cur->xmin)
-			pageunion.xmin = cur->xmin;
-		if (pageunion.ymax < cur->ymax)
-			pageunion.ymax = cur->ymax;
-		if (pageunion.ymin > cur->ymin)
-			pageunion.ymin = cur->ymin;
-	}
-
-	POSTGIS_DEBUGF(4, "pageunion is %s", box2df_to_string(&pageunion));
-
-	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
-	listL = (OffsetNumber *) palloc(nbytes);
-	listR = (OffsetNumber *) palloc(nbytes);
-	unionL = (BOX2DF *) palloc(sizeof(BOX2DF));
-	unionR = (BOX2DF *) palloc(sizeof(BOX2DF));
-
-	if (allisequal)
-	{
-		POSTGIS_DEBUG(4, " AllIsEqual!");
-
-		cur = (BOX2DF*) DatumGetPointer(entryvec->vector[OffsetNumberNext(FirstOffsetNumber)].key);
-
-
-		if (memcmp((void *) cur, (void *) &pageunion, sizeof(BOX2DF)) == 0)
-		{
-			v->spl_left = listL;
-			v->spl_right = listR;
-			v->spl_nleft = v->spl_nright = 0;
-			memcpy((void *) unionL, (void *) &pageunion, sizeof(BOX2DF));
-			memcpy((void *) unionR, (void *) &pageunion, sizeof(BOX2DF));
-
-			for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
-			{
-				if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
-				{
-					v->spl_left[v->spl_nleft] = i;
-					v->spl_nleft++;
-				}
-				else
-				{
-					v->spl_right[v->spl_nright] = i;
-					v->spl_nright++;
-				}
-			}
-			v->spl_ldatum = PointerGetDatum(unionL);
-			v->spl_rdatum = PointerGetDatum(unionR);
-
-			PG_RETURN_POINTER(v);
-		}
-	}
-
-	listB = (OffsetNumber *) palloc(nbytes);
-	listT = (OffsetNumber *) palloc(nbytes);
-	unionB = (BOX2DF *) palloc(sizeof(BOX2DF));
-	unionT = (BOX2DF *) palloc(sizeof(BOX2DF));
-
-#define ADDLIST( list, unionD, pos, num ) do { \
-	if ( pos ) { \
-		if ( unionD->xmax < cur->xmax )    unionD->xmax	= cur->xmax; \
-		if ( unionD->xmin	> cur->xmin  ) unionD->xmin	= cur->xmin; \
-		if ( unionD->ymax < cur->ymax )    unionD->ymax	= cur->ymax; \
-		if ( unionD->ymin	> cur->ymin  ) unionD->ymin	= cur->ymin; \
-	} else { \
-			memcpy( (void*)unionD, (void*) cur, sizeof( BOX2DF ) );  \
-	} \
-	list[pos] = num; \
-	(pos)++; \
-} while(0)
-
-	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
-	{
-		cur = (BOX2DF*) DatumGetPointer(entryvec->vector[i].key);
-
-		if (cur->xmin - pageunion.xmin < pageunion.xmax - cur->xmax)
-			ADDLIST(listL, unionL, posL,i);
-		else
-			ADDLIST(listR, unionR, posR,i);
-		if (cur->ymin - pageunion.ymin < pageunion.ymax - cur->ymax)
-			ADDLIST(listB, unionB, posB,i);
-		else
-			ADDLIST(listT, unionT, posT,i);
-	}
-
-	POSTGIS_DEBUGF(4, "unionL is %s", box2df_to_string(unionL));
-	POSTGIS_DEBUGF(4, "unionR is %s", box2df_to_string(unionR));
-	POSTGIS_DEBUGF(4, "unionT is %s", box2df_to_string(unionT));
-	POSTGIS_DEBUGF(4, "unionB is %s", box2df_to_string(unionB));
-
-	/* bad disposition, sort by ascending and resplit */
-	if ( (posR==0 || posL==0) && (posT==0 || posB==0) )
-	{
-		KBsort *arr = (KBsort*)palloc( sizeof(KBsort) * maxoff );
-		posL = posR = posB = posT = 0;
-		for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
-		{
-			arr[i-1].key = (BOX2DF*) DatumGetPointer(entryvec->vector[i].key);
-			arr[i-1].pos = i;
-		}
-		qsort( arr, maxoff, sizeof(KBsort), compare_KB );
-		for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
-		{
-			cur = arr[i-1].key;
-			if (cur->xmin - pageunion.xmin < pageunion.xmax - cur->xmax)
-				ADDLIST(listL, unionL, posL,arr[i-1].pos);
-			else if ( cur->xmin - pageunion.xmin == pageunion.xmax - cur->xmax )
-			{
-				if ( posL>posR )
-					ADDLIST(listR, unionR, posR,arr[i-1].pos);
-				else
-					ADDLIST(listL, unionL, posL,arr[i-1].pos);
-			}
-			else
-				ADDLIST(listR, unionR, posR,arr[i-1].pos);
-
-			if (cur->ymin - pageunion.ymin < pageunion.ymax - cur->ymax)
-				ADDLIST(listB, unionB, posB,arr[i-1].pos);
-			else if ( cur->ymin - pageunion.ymin == pageunion.ymax - cur->ymax )
-			{
-				if ( posB>posT )
-					ADDLIST(listT, unionT, posT,arr[i-1].pos);
-				else
-					ADDLIST(listB, unionB, posB,arr[i-1].pos);
-			}
-			else
-				ADDLIST(listT, unionT, posT,arr[i-1].pos);
-		}
-		pfree(arr);
-	}
-
-	/* which split more optimal? */
-	if (Max(posL, posR) < Max(posB, posT))
-		direction = 'x';
-	else if (Max(posL, posR) > Max(posB, posT))
-		direction = 'y';
-	else
-	{
-		float sizeLR, sizeBT;
-		BOX2DF interLR, interBT;
-
-		if ( box2df_intersection(unionL, unionR, &interLR) == false )
-			sizeLR = 0.0;
-		else
-			sizeLR = box2df_size(&interLR);
-
-		if ( box2df_intersection(unionB, unionT, &interBT) == false )
-			sizeBT = 0.0;
-		else
-			sizeBT = box2df_size(&interBT);
-
-		if (sizeLR < sizeBT)
-			direction = 'x';
-		else
-			direction = 'y';
-	}
-
-	POSTGIS_DEBUGF(4, "split direction '%c'", direction);
-
-	if (direction == 'x')
-	{
-		pfree(unionB);
-		pfree(listB);
-		pfree(unionT);
-		pfree(listT);
-
-		v->spl_left = listL;
-		v->spl_right = listR;
-		v->spl_nleft = posL;
-		v->spl_nright = posR;
-		v->spl_ldatum = PointerGetDatum(unionL);
-		v->spl_rdatum = PointerGetDatum(unionR);
-	}
-	else
-	{
-		pfree(unionR);
-		pfree(listR);
-		pfree(unionL);
-		pfree(listL);
-
-		v->spl_left = listB;
-		v->spl_right = listT;
-		v->spl_nleft = posB;
-		v->spl_nright = posT;
-		v->spl_ldatum = PointerGetDatum(unionB);
-		v->spl_rdatum = PointerGetDatum(unionT);
-	}
-
-	POSTGIS_DEBUG(4, "[GIST] 'picksplit' completed");
-
-	PG_RETURN_POINTER(v);
-}
-
-#endif
-
-
 /*
-** The BOX32DF key must be defined as a PostgreSQL type, even though it is only
+** The BOX2DF key must be defined as a PostgreSQL type, even though it is only
 ** ever used internally. These no-op stubs are used to bind the type.
 */
 PG_FUNCTION_INFO_V1(box2df_in);

Modified: trunk/postgis/gserialized_gist_nd.c
===================================================================
--- trunk/postgis/gserialized_gist_nd.c	2019-06-28 09:42:49 UTC (rev 17564)
+++ trunk/postgis/gserialized_gist_nd.c	2019-06-28 10:00:18 UTC (rev 17565)
@@ -19,7 +19,7 @@
  **********************************************************************
  *
  * Copyright 2009 Paul Ramsey <pramsey at cleverelephant.ca>
- * Copyright 2017 Darafei Praliaskouski <me at komzpa.net>
+ * Copyright 2017-2019 Darafei Praliaskouski <me at komzpa.net>
  *
  **********************************************************************/
 
@@ -1134,35 +1134,33 @@
 }
 
 /*
-** Function to pack floats of different realms
-** This function serves to pack bit flags inside float type
-** Resulted value represent can be from four different "realms"
-** Every value from realm 3 is greater than any value from realms 2, 1 and 0.
-** Every value from realm 2 is less than every value from realm 3 and greater
-** than any value from realm 1 and 0, and so on. Values from the same realm
-** loose two bits of precision. This technique is possible due to floating
-** point numbers specification according to IEEE 754: exponent bits are highest
+** Function to pack floats of different realms.
+** This function serves to pack bit flags inside float type.
+** Result value represent can be from two different "realms".
+** Every value from realm 1 is greater than any value from realm 0.
+** Values from the same realm loose one bit of precision.
+**
+** This technique is possible due to floating point numbers specification
+** according to IEEE 754: exponent bits are highest
 ** (excluding sign bits, but here penalty is always positive). If float a is
 ** greater than float b, integer A with same bit representation as a is greater
 ** than integer B with same bits as b.
 */
-static float
-pack_float(const float value, const int realm)
+static inline float
+pack_float(const float value, const uint8_t realm)
 {
 	union {
 		float f;
-		struct
-		{
+		struct {
 			unsigned value : 31, sign : 1;
 		} vbits;
-		struct
-		{
-			unsigned value : 29, realm : 2, sign : 1;
+		struct {
+			unsigned value : 30, realm : 1, sign : 1;
 		} rbits;
 	} a;
 
 	a.f = value;
-	a.rbits.value = a.vbits.value >> 2;
+	a.rbits.value = a.vbits.value >> 1;
 	a.rbits.realm = realm;
 
 	return a.f;
@@ -1179,52 +1177,37 @@
 	GISTENTRY *newentry = (GISTENTRY *)PG_GETARG_POINTER(1);
 	float *result = (float *)PG_GETARG_POINTER(2);
 	GIDX *gbox_index_orig, *gbox_index_new;
-	float size_union, size_orig, edge_union, edge_orig;
 
-	POSTGIS_DEBUG(4, "[GIST] 'penalty' function called");
-
 	gbox_index_orig = (GIDX *)DatumGetPointer(origentry->key);
 	gbox_index_new = (GIDX *)DatumGetPointer(newentry->key);
 
+	/* Penalty value of 0 has special code path in Postgres's gistchoose.
+	 * It is used as an early exit condition in subtree loop, allowing faster tree descend.
+	 * For multi-column index, it lets next column break the tie, possibly more confidently.
+	 */
+	*result = 0.0;
+
 	/* Drop out if we're dealing with null inputs. Shouldn't happen. */
-	if (!gbox_index_orig && !gbox_index_new)
+	if (gbox_index_orig && gbox_index_new)
 	{
-		POSTGIS_DEBUG(4, "[GIST] both inputs NULL! returning penalty of zero");
-		*result = 0.0;
-		PG_RETURN_FLOAT8(*result);
-	}
+		/* Calculate the size difference of the boxes (volume difference in this case). */
+		float size_union = gidx_union_volume(gbox_index_orig, gbox_index_new);
+		float size_orig = gidx_volume(gbox_index_orig);
+		float volume_extension = size_union - size_orig;
 
-	/* Calculate the size difference of the boxes (volume difference in this case). */
-	size_union = gidx_union_volume(gbox_index_orig, gbox_index_new);
-	size_orig = gidx_volume(gbox_index_orig);
-	*result = size_union - size_orig;
-
-	/* REALM 0: No extension is required, volume is zero, return edge */
-	/* REALM 1: No extension is required, return nonzero area */
-	/* REALM 2: Area extension is zero, return nonzero edge extension */
-	/* REALM 3: Area extension is nonzero, return it */
-
-	if (*result == 0)
-	{
-		if (size_orig > 0)
-			*result = pack_float(size_orig, 1); /* REALM 1 */
+		/* REALM 1: Area extension is nonzero, return it */
+		if (volume_extension > FLT_EPSILON)
+			*result = pack_float(volume_extension, 1);
 		else
 		{
-			edge_union = gidx_union_edge(gbox_index_orig, gbox_index_new);
-			edge_orig = gidx_edge(gbox_index_orig);
-			*result = edge_union - edge_orig;
-			if (*result == 0)
-				*result = pack_float(edge_orig, 0); /* REALM 0 */
-			else
-				*result = pack_float(*result, 2); /* REALM 2 */
+			/* REALM 0: Area extension is zero, return nonzero edge extension */
+			float edge_union = gidx_union_edge(gbox_index_orig, gbox_index_new);
+			float edge_orig = gidx_edge(gbox_index_orig);
+			float edge_extension = edge_union - edge_orig;
+			if (edge_extension > FLT_EPSILON)
+				*result = pack_float(edge_extension, 0);
 		}
 	}
-	else
-		*result = pack_float(*result, 3); /* REALM 3 */
-
-	POSTGIS_DEBUGF(
-	    4, "[GIST] union size (%.12f), original size (%.12f), penalty (%.12f)", size_union, size_orig, *result);
-
 	PG_RETURN_POINTER(result);
 }
 



More information about the postgis-tickets mailing list