[postgis-tickets] r15923 - Performance improvements for b-tree geometry sorts
Paul Ramsey
pramsey at cleverelephant.ca
Fri Oct 6 06:31:48 PDT 2017
Author: pramsey
Date: 2017-10-06 06:31:48 -0700 (Fri, 06 Oct 2017)
New Revision: 15923
Modified:
branches/2.4/NEWS
branches/2.4/liblwgeom/g_serialized.c
branches/2.4/postgis/geography_btree.c
Log:
Performance improvements for b-tree geometry sorts
References #3864
Modified: branches/2.4/NEWS
===================================================================
--- branches/2.4/NEWS 2017-10-06 13:29:01 UTC (rev 15922)
+++ branches/2.4/NEWS 2017-10-06 13:31:48 UTC (rev 15923)
@@ -10,6 +10,7 @@
- #3878, Single defn of signum in header
- #3880, Undefined behaviour in TYPMOD_GET_SRID
- #3875, Fix undefined behaviour in shift operation
+ - #3864, Performance improvements for b-tree geometry sorts
PostGIS 2.4.0
Modified: branches/2.4/liblwgeom/g_serialized.c
===================================================================
--- branches/2.4/liblwgeom/g_serialized.c 2017-10-06 13:29:01 UTC (rev 15922)
+++ branches/2.4/liblwgeom/g_serialized.c 2017-10-06 13:31:48 UTC (rev 15923)
@@ -19,6 +19,7 @@
**********************************************************************
*
* Copyright 2009 Paul Ramsey <pramsey at cleverelephant.ca>
+ * Copyright 2017 Darafei Praliaskouski <me at komzpa.net>
*
**********************************************************************/
@@ -109,7 +110,7 @@
if ( srid == 0 )
return SRID_UNKNOWN;
else
- return clamp_srid(srid);
+ return srid;
}
void gserialized_set_srid(GSERIALIZED *s, int32_t srid)
@@ -128,6 +129,15 @@
s->srid[2] = (srid & 0x000000FF);
}
+inline static int gserialized_cmp_srid(const GSERIALIZED *s1, const GSERIALIZED *s2)
+{
+ return (
+ s1->srid[0] == s2->srid[0] &&
+ s1->srid[1] == s2->srid[1] &&
+ s1->srid[2] == s2->srid[2]
+ ) ? 0 : 1;
+}
+
GSERIALIZED* gserialized_copy(const GSERIALIZED *g)
{
GSERIALIZED *g_out = NULL;
@@ -240,15 +250,15 @@
}
#endif
+union floatuint {
+ uint32_t u;
+ float f;
+};
uint64_t gbox_get_sortable_hash(const GBOX *g)
{
+ union floatuint x, y;
- union floatuint {
- uint32_t u;
- float f;
- } x, y;
-
/*
* Since in theory the bitwise representation of an IEEE
* float is sortable (exponents come before mantissa, etc)
@@ -284,8 +294,43 @@
{
int g1_is_empty, g2_is_empty, cmp;
GBOX box1, box2;
+ uint64_t hash1, hash2;
size_t sz1 = SIZE_GET(g1->size);
size_t sz2 = SIZE_GET(g2->size);
+ union floatuint x, y;
+
+ /*
+ * For two non-same planar points, we can skip a lot of machinery.
+ */
+ if (
+ sz1 > 16 && // 16 is size of EMPTY, if it's larger - it has coordinates
+ sz2 > 16 &&
+ *(uint32_t*)(g1->data) == POINTTYPE &&
+ *(uint32_t*)(g2->data) == POINTTYPE &&
+ !FLAGS_GET_BBOX(g1->flags) &&
+ !FLAGS_GET_GEODETIC(g1->flags) &&
+ !FLAGS_GET_BBOX(g2->flags) &&
+ !FLAGS_GET_GEODETIC(g2->flags)
+ )
+ {
+ double *dptr = (double*)(g1->data);
+ x.f = 2.0 * dptr[1];
+ y.f = 2.0 * dptr[2];
+ hash1 = uint32_interleave_2(x.u, y.u);
+
+ dptr = (double*)(g2->data);
+ x.f = 2.0 * dptr[1];
+ y.f = 2.0 * dptr[2];
+ hash2 = uint32_interleave_2(x.u, y.u);
+
+ if ( hash1 > hash2 )
+ return 1;
+ if ( hash1 < hash2 )
+ return -1;
+
+ // if hashes happen to be the same, go to full compare.
+ }
+
size_t hsz1 = gserialized_header_size(g1);
size_t hsz2 = gserialized_header_size(g2);
@@ -295,9 +340,7 @@
size_t bsz2 = sz2 - hsz2;
size_t bsz = bsz1 < bsz2 ? bsz1 : bsz2;
- uint64_t hash1, hash2;
- int32_t srid1 = gserialized_get_srid(g1);
- int32_t srid2 = gserialized_get_srid(g2);
+ int cmp_srid = gserialized_cmp_srid(g1, g2);
g1_is_empty = (gserialized_get_gbox_p(g1, &box1) == LW_FAILURE);
g2_is_empty = (gserialized_get_gbox_p(g2, &box2) == LW_FAILURE);
@@ -322,7 +365,7 @@
/* Return equality for perfect equality only */
cmp = memcmp(b1, b2, bsz);
- if ( bsz1 == bsz2 && srid1 == srid2 && cmp == 0 )
+ if ( bsz1 == bsz2 && cmp_srid == 0 && cmp == 0 )
return 0;
/* Using the centroids, calculate somewhat sortable */
Modified: branches/2.4/postgis/geography_btree.c
===================================================================
--- branches/2.4/postgis/geography_btree.c 2017-10-06 13:29:01 UTC (rev 15922)
+++ branches/2.4/postgis/geography_btree.c 2017-10-06 13:31:48 UTC (rev 15923)
@@ -41,20 +41,7 @@
Datum geography_gt(PG_FUNCTION_ARGS);
Datum geography_cmp(PG_FUNCTION_ARGS);
-
/*
-** Utility function to return the center point of a
-** geocentric bounding box. We don't divide by two
-** because we're only using the values for comparison.
-*/
-static void geography_gidx_center(const GIDX *gidx, POINT3D *p)
-{
- p->x = GIDX_GET_MIN(gidx, 0) + GIDX_GET_MAX(gidx, 0);
- p->y = GIDX_GET_MIN(gidx, 1) + GIDX_GET_MAX(gidx, 1);
- p->z = GIDX_GET_MIN(gidx, 2) + GIDX_GET_MAX(gidx, 2);
-}
-
-/*
** BTree support function. Based on two geographies return true if
** they are "less than" and false otherwise.
*/
More information about the postgis-tickets
mailing list