[postgis-tickets] r15392 - Gist penalty speed improvement for 2d and nd points
Regina Obe
lr at pcorp.us
Thu May 11 16:44:39 PDT 2017
Author: robe
Date: 2017-05-11 16:44:39 -0700 (Thu, 11 May 2017)
New Revision: 15392
Modified:
trunk/NEWS
trunk/postgis/gserialized_gist_2d.c
trunk/postgis/gserialized_gist_nd.c
Log:
Gist penalty speed improvement for 2d and nd points
References #3753 for PostGIS 2.4.0
Modified: trunk/NEWS
===================================================================
--- trunk/NEWS 2017-05-11 18:17:10 UTC (rev 15391)
+++ trunk/NEWS 2017-05-11 23:44:39 UTC (rev 15392)
@@ -6,6 +6,8 @@
- #3599, Geobuf output support via ST_AsGeobuf (Björn Harrtell)
- #3661, Mapbox vector tile output support via ST_AsMVT (Björn Harrtell / CartoDB)
- #3689, Add orientation checking and forcing functions (Dan Baston)
+ - #3753, Gist penalty speed improvements for 2d and nd points
+ (Darafei Praliaskouski)
* Bug Fixes *
- #3682, Boolean (FTLogical) should be 1 byte not 2 bytes
Modified: trunk/postgis/gserialized_gist_2d.c
===================================================================
--- trunk/postgis/gserialized_gist_2d.c 2017-05-11 18:17:10 UTC (rev 15391)
+++ trunk/postgis/gserialized_gist_2d.c 2017-05-11 23:44:39 UTC (rev 15392)
@@ -19,6 +19,7 @@
**********************************************************************
*
* Copyright 2009 Paul Ramsey <pramsey at cleverelephant.ca>
+ * Copyright 2017 Darafei Praliaskouski <me at komzpa.net>
*
**********************************************************************/
@@ -216,6 +217,14 @@
return result;
}
+static float box2df_edge(const BOX2DF *a)
+{
+ if ( a == NULL )
+ return (float)0.0;
+
+ return ((a->xmax) - (a->xmin)) + ((a->ymax) - (a->ymin));
+}
+
static float box2df_union_size(const BOX2DF *a, const BOX2DF *b)
{
float result;
@@ -243,6 +252,32 @@
}
+static float box2df_union_edge(const BOX2DF *a, const BOX2DF *b)
+{
+ float result;
+
+ POSTGIS_DEBUG(5,"entered function");
+
+ if ( a == NULL && b == NULL )
+ {
+ elog(ERROR, "box2df_union_edge received two null arguments");
+ return 0.0;
+ }
+
+ if ( a == NULL )
+ return box2df_edge(b);
+
+ if ( b == NULL )
+ return box2df_edge(a);
+
+ result = (Max(a->xmax,b->xmax) - Min(a->xmin,b->xmin)) +
+ (Max(a->ymax,b->ymax) - Min(a->ymin,b->ymin));
+
+ POSTGIS_DEBUGF(5, "union edge of %s and %s is %.8g", box2df_to_string(a), box2df_to_string(b), result);
+
+ return result;
+}
+
/* Convert a double-based GBOX into a float-based BOX2DF,
ensuring the float box is larger than the double box */
static inline int box2df_from_gbox_p(GBOX *box, BOX2DF *a)
@@ -1243,9 +1278,36 @@
}
/*
+** Function to pack floats of different realms
+** This function serves to pack bit flags inside float type
+** Resulted value represent can be from four different "realms"
+** Every value from realm 3 is greater than any value from realms 2, 1 and 0.
+** Every value from realm 2 is less than every value from realm 3 and greater
+** than any value from realm 1 and 0, and so on. Values from the same realm
+** loose two bits of precision. This technique is possible due to floating
+** point numbers specification according to IEEE 754: exponent bits are highest
+** (excluding sign bits, but here penalty is always positive). If float a is
+** greater than float b, integer A with same bit representation as a is greater
+** than integer B with same bits as b.
+*/
+static float pack_float(const float value, const int realm)
+{
+ union {
+ float f;
+ struct { unsigned value:31, sign:1; } vbits;
+ struct { unsigned value:29, realm:2, sign:1; } rbits;
+ } a;
+
+ a.f = value;
+ a.rbits.value = a.vbits.value >> 2;
+ a.rbits.realm = realm;
+
+ return a.f;
+}
+
+/*
** GiST support function. Calculate the "penalty" cost of adding this entry into an existing entry.
** Calculate the change in volume of the old entry once the new entry is added.
-** TODO: Re-evaluate this in light of R*Tree penalty approaches.
*/
PG_FUNCTION_INFO_V1(gserialized_gist_penalty_2d);
Datum gserialized_gist_penalty_2d(PG_FUNCTION_ARGS)
@@ -1254,7 +1316,7 @@
GISTENTRY *newentry = (GISTENTRY*) PG_GETARG_POINTER(1);
float *result = (float*) PG_GETARG_POINTER(2);
BOX2DF *gbox_index_orig, *gbox_index_new;
- float size_union, size_orig;
+ float size_union, size_orig, edge_union, edge_orig;
POSTGIS_DEBUG(4, "[GIST] 'penalty' function called");
@@ -1273,6 +1335,37 @@
size_union = box2df_union_size(gbox_index_orig, gbox_index_new);
size_orig = box2df_size(gbox_index_orig);
*result = size_union - size_orig;
+
+ /* REALM 0: No extension is required, volume is zero, return edge */
+ /* REALM 1: No extension is required, return nonzero area */
+ /* REALM 2: Area extension is zero, return nonzero edge extension */
+ /* REALM 3: Area extension is nonzero, return it */
+
+ if( *result == 0 )
+ {
+ if (size_orig > 0)
+ {
+ *result = pack_float(size_orig, 1); /* REALM 1 */
+ }
+ else
+ {
+ edge_union = box2df_union_edge(gbox_index_orig, gbox_index_new);
+ edge_orig = box2df_edge(gbox_index_orig);
+ *result = edge_union - edge_orig;
+ if( *result == 0 )
+ {
+ *result = pack_float(edge_orig, 0); /* REALM 0 */
+ }
+ else
+ {
+ *result = pack_float(*result, 2); /* REALM 2 */
+ }
+ }
+ }
+ else
+ {
+ *result = pack_float(*result, 3); /* REALM 3 */
+ }
POSTGIS_DEBUGF(4, "[GIST] 'penalty', union size (%.12f), original size (%.12f), penalty (%.12f)", size_union, size_orig, *result);
Modified: trunk/postgis/gserialized_gist_nd.c
===================================================================
--- trunk/postgis/gserialized_gist_nd.c 2017-05-11 18:17:10 UTC (rev 15391)
+++ trunk/postgis/gserialized_gist_nd.c 2017-05-11 23:44:39 UTC (rev 15392)
@@ -19,6 +19,7 @@
**********************************************************************
*
* Copyright 2009 Paul Ramsey <pramsey at cleverelephant.ca>
+ * Copyright 2017 Darafei Praliaskouski <me at komzpa.net>
*
**********************************************************************/
@@ -229,6 +230,22 @@
return result;
}
+/* Calculate the edge of the GIDX */
+static float gidx_edge(GIDX *a)
+{
+ float result;
+ int i;
+ if ( a == NULL || gidx_is_unknown(a) )
+ {
+ return 0.0;
+ }
+ result = GIDX_GET_MAX(a,0) - GIDX_GET_MIN(a,0);
+ for ( i = 1; i < GIDX_NDIMS(a); i++ )
+ result += (GIDX_GET_MAX(a,i) - GIDX_GET_MIN(a,i));
+ POSTGIS_DEBUGF(5, "calculated edge of %s as %.8g", gidx_to_string(a), result);
+ return result;
+}
+
/* Ensure the first argument has the higher dimensionality. */
static void gidx_dimensionality_check(GIDX **a, GIDX **b)
{
@@ -292,6 +309,58 @@
return result;
}
+/* Calculate the edge of the union of the boxes. Avoids creating an intermediate box. */
+static float gidx_union_edge(GIDX *a, GIDX *b)
+{
+ float result;
+ int i;
+ int ndims_a, ndims_b;
+
+ POSTGIS_DEBUG(5,"entered function");
+
+ if ( a == NULL && b == NULL )
+ {
+ elog(ERROR, "gidx_union_edge received two null arguments");
+ return 0.0;
+ }
+
+ if ( a == NULL || gidx_is_unknown(a) )
+ return gidx_volume(b);
+
+ if ( b == NULL || gidx_is_unknown(b) )
+ return gidx_volume(a);
+
+ if ( gidx_is_unknown(a) && gidx_is_unknown(b) )
+ {
+ return 0.0;
+ }
+
+ /* Ensure 'a' has the most dimensions. */
+ gidx_dimensionality_check(&a, &b);
+
+ ndims_a = GIDX_NDIMS(a);
+ ndims_b = GIDX_NDIMS(b);
+
+ /* Initialize with maximal length of first dimension. */
+ result = Max(GIDX_GET_MAX(a,0),GIDX_GET_MAX(b,0)) - Min(GIDX_GET_MIN(a,0),GIDX_GET_MIN(b,0));
+
+ /* Add maximal length of remaining dimensions. */
+ for ( i = 1; i < ndims_b; i++ )
+ {
+ result += (Max(GIDX_GET_MAX(a,i),GIDX_GET_MAX(b,i)) - Min(GIDX_GET_MIN(a,i),GIDX_GET_MIN(b,i)));
+ }
+
+ /* Add in dimensions of higher dimensional box. */
+ for ( i = ndims_b; i < ndims_a; i++ )
+ {
+ result += (GIDX_GET_MAX(a,i) - GIDX_GET_MIN(a,i));
+ }
+
+ POSTGIS_DEBUGF(5, "edge( %s union %s ) = %.12g", gidx_to_string(a), gidx_to_string(b), result);
+
+ return result;
+}
+
/* Calculate the volume of the intersection of the boxes. */
static float gidx_inter_volume(GIDX *a, GIDX *b)
{
@@ -1238,11 +1307,37 @@
PG_RETURN_BOOL(result);
}
+/*
+** Function to pack floats of different realms
+** This function serves to pack bit flags inside float type
+** Resulted value represent can be from four different "realms"
+** Every value from realm 3 is greater than any value from realms 2, 1 and 0.
+** Every value from realm 2 is less than every value from realm 3 and greater
+** than any value from realm 1 and 0, and so on. Values from the same realm
+** loose two bits of precision. This technique is possible due to floating
+** point numbers specification according to IEEE 754: exponent bits are highest
+** (excluding sign bits, but here penalty is always positive). If float a is
+** greater than float b, integer A with same bit representation as a is greater
+** than integer B with same bits as b.
+*/
+static float pack_float(const float value, const int realm)
+{
+ union {
+ float f;
+ struct { unsigned value:31, sign:1; } vbits;
+ struct { unsigned value:29, realm:2, sign:1; } rbits;
+ } a;
+ a.f = value;
+ a.rbits.value = a.vbits.value >> 2;
+ a.rbits.realm = realm;
+
+ return a.f;
+}
+
/*
** GiST support function. Calculate the "penalty" cost of adding this entry into an existing entry.
** Calculate the change in volume of the old entry once the new entry is added.
-** TODO: Re-evaluate this in light of R*Tree penalty approaches.
*/
PG_FUNCTION_INFO_V1(gserialized_gist_penalty);
Datum gserialized_gist_penalty(PG_FUNCTION_ARGS)
@@ -1251,7 +1346,7 @@
GISTENTRY *newentry = (GISTENTRY*) PG_GETARG_POINTER(1);
float *result = (float*) PG_GETARG_POINTER(2);
GIDX *gbox_index_orig, *gbox_index_new;
- float size_union, size_orig;
+ float size_union, size_orig, edge_union, edge_orig;
POSTGIS_DEBUG(4, "[GIST] 'penalty' function called");
@@ -1271,19 +1366,37 @@
size_orig = gidx_volume(gbox_index_orig);
*result = size_union - size_orig;
- /* All things being equal, we prefer to expand small boxes rather than large boxes.
- NOTE: This code seemed to be causing badly balanced trees to be built
- and therefore has been commented out. Not sure why it didn't work,
- but it didn't.
- if( FP_IS_ZERO(*result) )
- if( FP_IS_ZERO(size_orig) )
- *result = 0.0;
- else
- *result = 1.0 - (1.0/(1.0 + size_orig));
- else
- *result += 1.0;
- */
-
+ /* REALM 0: No extension is required, volume is zero, return edge */
+ /* REALM 1: No extension is required, return nonzero area */
+ /* REALM 2: Area extension is zero, return nonzero edge extension */
+ /* REALM 3: Area extension is nonzero, return it */
+
+ if( *result == 0 )
+ {
+ if (size_orig > 0)
+ {
+ *result = pack_float(size_orig, 1); /* REALM 1 */
+ }
+ else
+ {
+ edge_union = gidx_union_edge(gbox_index_orig, gbox_index_new);
+ edge_orig = gidx_edge(gbox_index_orig);
+ *result = edge_union - edge_orig;
+ if( *result == 0 )
+ {
+ *result = pack_float(edge_orig, 0); /* REALM 0 */
+ }
+ else
+ {
+ *result = pack_float(*result, 2); /* REALM 2 */
+ }
+ }
+ }
+ else
+ {
+ *result = pack_float(*result, 3); /* REALM 3 */
+ }
+
POSTGIS_DEBUGF(4, "[GIST] union size (%.12f), original size (%.12f), penalty (%.12f)", size_union, size_orig, *result);
PG_RETURN_POINTER(result);
More information about the postgis-tickets
mailing list