[postgis-tickets] r15923 - Performance improvements for b-tree geometry sorts

Paul Ramsey pramsey at cleverelephant.ca
Fri Oct 6 06:31:48 PDT 2017


Author: pramsey
Date: 2017-10-06 06:31:48 -0700 (Fri, 06 Oct 2017)
New Revision: 15923

Modified:
   branches/2.4/NEWS
   branches/2.4/liblwgeom/g_serialized.c
   branches/2.4/postgis/geography_btree.c
Log:
Performance improvements for b-tree geometry sorts
References #3864


Modified: branches/2.4/NEWS
===================================================================
--- branches/2.4/NEWS	2017-10-06 13:29:01 UTC (rev 15922)
+++ branches/2.4/NEWS	2017-10-06 13:31:48 UTC (rev 15923)
@@ -10,6 +10,7 @@
   - #3878, Single defn of signum in header
   - #3880, Undefined behaviour in TYPMOD_GET_SRID
   - #3875, Fix undefined behaviour in shift operation
+  - #3864, Performance improvements for b-tree geometry sorts
 
 
 PostGIS 2.4.0

Modified: branches/2.4/liblwgeom/g_serialized.c
===================================================================
--- branches/2.4/liblwgeom/g_serialized.c	2017-10-06 13:29:01 UTC (rev 15922)
+++ branches/2.4/liblwgeom/g_serialized.c	2017-10-06 13:31:48 UTC (rev 15923)
@@ -19,6 +19,7 @@
  **********************************************************************
  *
  * Copyright 2009 Paul Ramsey <pramsey at cleverelephant.ca>
+ * Copyright 2017 Darafei Praliaskouski <me at komzpa.net>
  *
  **********************************************************************/
 
@@ -109,7 +110,7 @@
 	if ( srid == 0 )
 		return SRID_UNKNOWN;
 	else
-		return clamp_srid(srid);
+		return srid;
 }
 
 void gserialized_set_srid(GSERIALIZED *s, int32_t srid)
@@ -128,6 +129,15 @@
 	s->srid[2] = (srid & 0x000000FF);
 }
 
+inline static int gserialized_cmp_srid(const GSERIALIZED *s1, const GSERIALIZED *s2)
+{
+	return (
+		s1->srid[0] == s2->srid[0] &&
+		s1->srid[1] == s2->srid[1] &&
+		s1->srid[2] == s2->srid[2]
+	) ? 0 : 1;
+}
+
 GSERIALIZED* gserialized_copy(const GSERIALIZED *g)
 {
 	GSERIALIZED *g_out = NULL;
@@ -240,15 +250,15 @@
 }
 #endif
 
+union floatuint {
+	uint32_t u;
+	float f;
+};
 
 uint64_t gbox_get_sortable_hash(const GBOX *g)
 {
+	union floatuint x, y;
 
-	union floatuint {
-		uint32_t u;
-		float f;
-	} x, y;
-
 	/*
 	* Since in theory the bitwise representation of an IEEE
 	* float is sortable (exponents come before mantissa, etc)
@@ -284,8 +294,43 @@
 {
 	int g1_is_empty, g2_is_empty, cmp;
 	GBOX box1, box2;
+	uint64_t hash1, hash2;
 	size_t sz1 = SIZE_GET(g1->size);
 	size_t sz2 = SIZE_GET(g2->size);
+	union floatuint x, y;
+
+	/*
+	* For two non-same planar points, we can skip a lot of machinery.
+	*/
+	if (
+		sz1 > 16 && // 16 is size of EMPTY, if it's larger - it has coordinates
+		sz2 > 16 &&
+		*(uint32_t*)(g1->data) == POINTTYPE &&
+		*(uint32_t*)(g2->data) == POINTTYPE &&
+		!FLAGS_GET_BBOX(g1->flags) &&
+		!FLAGS_GET_GEODETIC(g1->flags) &&
+		!FLAGS_GET_BBOX(g2->flags) &&
+		!FLAGS_GET_GEODETIC(g2->flags)
+	)
+	{
+		double *dptr = (double*)(g1->data);
+		x.f = 2.0 * dptr[1];
+		y.f = 2.0 * dptr[2];
+		hash1 = uint32_interleave_2(x.u, y.u);
+
+		dptr = (double*)(g2->data);
+		x.f = 2.0 * dptr[1];
+		y.f = 2.0 * dptr[2];
+		hash2 = uint32_interleave_2(x.u, y.u);
+
+		if ( hash1 > hash2 )
+			return 1;
+		if ( hash1 < hash2 )
+			return -1;
+
+		// if hashes happen to be the same, go to full compare.
+	}
+
 	size_t hsz1 = gserialized_header_size(g1);
 	size_t hsz2 = gserialized_header_size(g2);
 
@@ -295,9 +340,7 @@
 	size_t bsz2 = sz2 - hsz2;
 	size_t bsz = bsz1 < bsz2 ? bsz1 : bsz2;
 
-	uint64_t hash1, hash2;
-	int32_t srid1 = gserialized_get_srid(g1);
-	int32_t srid2 = gserialized_get_srid(g2);
+	int cmp_srid = gserialized_cmp_srid(g1, g2);
 
 	g1_is_empty = (gserialized_get_gbox_p(g1, &box1) == LW_FAILURE);
 	g2_is_empty = (gserialized_get_gbox_p(g2, &box2) == LW_FAILURE);
@@ -322,7 +365,7 @@
 
 	/* Return equality for perfect equality only */
 	cmp = memcmp(b1, b2, bsz);
-	if ( bsz1 == bsz2 && srid1 == srid2 && cmp == 0 )
+	if ( bsz1 == bsz2 && cmp_srid == 0 && cmp == 0 )
 		return 0;
 
 	/* Using the centroids, calculate somewhat sortable */

Modified: branches/2.4/postgis/geography_btree.c
===================================================================
--- branches/2.4/postgis/geography_btree.c	2017-10-06 13:29:01 UTC (rev 15922)
+++ branches/2.4/postgis/geography_btree.c	2017-10-06 13:31:48 UTC (rev 15923)
@@ -41,20 +41,7 @@
 Datum geography_gt(PG_FUNCTION_ARGS);
 Datum geography_cmp(PG_FUNCTION_ARGS);
 
-
 /*
-** Utility function to return the center point of a
-** geocentric bounding box. We don't divide by two
-** because we're only using the values for comparison.
-*/
-static void geography_gidx_center(const GIDX *gidx, POINT3D *p)
-{
-	p->x = GIDX_GET_MIN(gidx, 0) + GIDX_GET_MAX(gidx, 0);
-	p->y = GIDX_GET_MIN(gidx, 1) + GIDX_GET_MAX(gidx, 1);
-	p->z = GIDX_GET_MIN(gidx, 2) + GIDX_GET_MAX(gidx, 2);
-}
-
-/*
 ** BTree support function. Based on two geographies return true if
 ** they are "less than" and false otherwise.
 */



More information about the postgis-tickets mailing list