[postgis-tickets] r15283 - #3418, KNN recheck in 9.5 fails with index returned tuples in wrong order

Paul Ramsey pramsey at cleverelephant.ca
Fri Jan 6 08:22:38 PST 2017


Author: pramsey
Date: 2017-01-06 08:22:38 -0800 (Fri, 06 Jan 2017)
New Revision: 15283

Modified:
   trunk/postgis/gserialized_gist_2d.c
Log:
#3418, KNN recheck in 9.5 fails with index returned tuples in wrong order 

While we store float boxes, it is important to carry out *comparisons*
of those boxes in double space, so we can capture small differences. 
This works well because the boxes are "overdetermined", they are slightly
larger than they need to be. The bug in this case was caused by a case
where the distance calculation between two boxes was carried out in float
space and as a result the distance got slightly overdetermined, ending
up larger than the actual distance between the objects themselves.



Modified: trunk/postgis/gserialized_gist_2d.c
===================================================================
--- trunk/postgis/gserialized_gist_2d.c	2017-01-06 10:31:17 UTC (rev 15282)
+++ trunk/postgis/gserialized_gist_2d.c	2017-01-06 16:22:38 UTC (rev 15283)
@@ -396,7 +396,6 @@
 
 /**
 * Calculate the centroid->centroid distance between the boxes.
-* We return the square distance to avoid a call to sqrt.
 */
 static double box2df_distance_leaf_centroid(const BOX2DF *a, const BOX2DF *b)
 {
@@ -407,7 +406,6 @@
     double b_y = (b->ymax + b->ymin) / 2.0;
 
     /* This "distance" is only used for comparisons, */
-    /* so for speed we drop contants and skip the sqrt step. */
     return sqrt((a_x - b_x) * (a_x - b_x) + (a_y - b_y) * (a_y - b_y));
 }
 
@@ -499,27 +497,27 @@
 */
 static double box2df_distance(const BOX2DF *a, const BOX2DF *b)
 {
-    /* Check for overlap */
-    if ( box2df_overlaps(a, b) )
-        return 0.0;
+	/* Check for overlap */
+	if ( box2df_overlaps(a, b) )
+		return 0.0;
 
-    if ( box2df_left(a, b) )
-    {
-        if ( box2df_above(a, b) )
+	if ( box2df_left(a, b) )
+	{
+		if ( box2df_above(a, b) )
 			return pt_distance(a->xmax, a->ymin, b->xmin, b->ymax);
 		if ( box2df_below(a, b) )
 			return pt_distance(a->xmax, a->ymax, b->xmin, b->ymin);
 		else
-			return b->xmin - a->xmax;
+			return (double)b->xmin - (double)a->xmax;
 	}
 	if ( box2df_right(a, b) )
 	{
-        if ( box2df_above(a, b) )
+		if ( box2df_above(a, b) )
 			return pt_distance(a->xmin, a->ymin, b->xmax, b->ymax);
 		if ( box2df_below(a, b) )
 			return pt_distance(a->xmin, a->ymax, b->xmax, b->ymin);
 		else
-			return a->xmin - b->xmax;
+			return (double)a->xmin - (double)b->xmax;
 	}
 	if ( box2df_above(a, b) )
 	{
@@ -528,7 +526,7 @@
 		if ( box2df_right(a, b) )
 			return pt_distance(a->xmin, a->ymin, b->xmax, b->ymax);
 		else
-			return a->ymin - b->ymax;
+			return (double)a->ymin - (double)b->ymax;
 	}
 	if ( box2df_below(a, b) )
 	{
@@ -537,9 +535,9 @@
 		if ( box2df_right(a, b) )
 			return pt_distance(a->xmin, a->ymax, b->xmax, b->ymin);
 		else
-			return b->ymin - a->ymax;
+			return (double)b->ymin - (double)a->ymax;
 	}
-	
+
 	return FLT_MAX;
 }
 
@@ -1202,7 +1200,7 @@
 	/* Box-style distance test */
 	if ( strategy == 14 ) /* operator <#> */
 	{
-		distance = (double)box2df_distance(entry_box, &query_box);
+		distance = box2df_distance(entry_box, &query_box);
 	}
 	/* True distance test (formerly centroid distance) */
 	else if ( strategy == 13 ) /* operator <-> */
@@ -1210,7 +1208,7 @@
 		/* In all cases, since we only have keys (boxes) we'll return */
 		/* the minimum possible distance, which is the box2df_distance */
 		/* and let the recheck sort things out in the case of leaves */
-		distance = (double)box2df_distance(entry_box, &query_box);
+		distance = box2df_distance(entry_box, &query_box);
 
 		if (GIST_LEAF(entry))
 			*recheck = true;



More information about the postgis-tickets mailing list