[postgis-tickets] r17812 - Speed up ST_RemoveRepeatedPoints
Raul
raul at rmr.ninja
Mon Sep 16 05:32:05 PDT 2019
Author: algunenano
Date: 2019-09-16 05:32:05 -0700 (Mon, 16 Sep 2019)
New Revision: 17812
Modified:
trunk/NEWS
trunk/liblwgeom/cunit/cu_algorithm.c
trunk/liblwgeom/liblwgeom.h.in
trunk/liblwgeom/lwgeom.c
trunk/liblwgeom/ptarray.c
trunk/postgis/lwgeom_functions_basic.c
Log:
Speed up ST_RemoveRepeatedPoints
Closes #4491
Closes https://github.com/postgis/postgis/pull/468
Modified: trunk/NEWS
===================================================================
--- trunk/NEWS 2019-09-14 19:09:20 UTC (rev 17811)
+++ trunk/NEWS 2019-09-16 12:32:05 UTC (rev 17812)
@@ -15,6 +15,7 @@
- #4504, shp2pgsql -D not working with schema qualified tables
- #4505, Speed up conversion of geometries to/from GEOS (Dan Baston)
- #4507, Use GEOSMakeValid and GEOSBuildArea for GEOS 3.8+ (Dan Baston)
+ - #4491, Speed up ST_RemoveRepeatedPoints (Raúl Marín)
PostGIS 3.0.0alpha4
2019/08/10
Modified: trunk/liblwgeom/cunit/cu_algorithm.c
===================================================================
--- trunk/liblwgeom/cunit/cu_algorithm.c 2019-09-14 19:09:20 UTC (rev 17811)
+++ trunk/liblwgeom/cunit/cu_algorithm.c 2019-09-16 12:32:05 UTC (rev 17812)
@@ -1159,9 +1159,11 @@
{
LWGEOM *g;
char *ewkt;
+ int modified = LW_FALSE;
g = lwgeom_from_wkt("MULTIPOINT(0 0, 10 0, 10 10, 10 10, 0 10, 0 10, 0 10, 0 0, 0 0, 0 0, 5 5, 0 0, 5 5)", LW_PARSER_CHECK_NONE);
- lwgeom_remove_repeated_points_in_place(g, 1);
+ modified = lwgeom_remove_repeated_points_in_place(g, 1);
+ ASSERT_INT_EQUAL(modified, LW_TRUE);
ewkt = lwgeom_to_ewkt(g);
ASSERT_STRING_EQUAL(ewkt, "MULTIPOINT(0 0,10 0,10 10,0 10,5 5)");
lwgeom_free(g);
@@ -1168,7 +1170,8 @@
lwfree(ewkt);
g = lwgeom_from_wkt("LINESTRING(1612830.15445 4841287.12672,1612830.15824 4841287.12674,1612829.98813 4841274.56198)", LW_PARSER_CHECK_NONE);
- lwgeom_remove_repeated_points_in_place(g, 0.01);
+ modified = lwgeom_remove_repeated_points_in_place(g, 0.01);
+ ASSERT_INT_EQUAL(modified, LW_TRUE);
ewkt = lwgeom_to_ewkt(g);
ASSERT_STRING_EQUAL(ewkt, "LINESTRING(1612830.15445 4841287.12672,1612829.98813 4841274.56198)");
lwgeom_free(g);
@@ -1175,7 +1178,8 @@
lwfree(ewkt);
g = lwgeom_from_wkt("MULTIPOINT(0 0,10 0,10 10,10 10,0 10,0 10,0 10,0 0,0 0,0 0,5 5,5 5,5 8,8 8,8 8,8 8,8 5,8 5,5 5,5 5,5 5,5 5,5 5,50 50,50 50,50 50,50 60,50 60,50 60,60 60,60 50,60 50,50 50,55 55,55 58,58 58,58 55,58 55,55 55)", LW_PARSER_CHECK_NONE);
- lwgeom_remove_repeated_points_in_place(g, 1);
+ modified = lwgeom_remove_repeated_points_in_place(g, 1);
+ ASSERT_INT_EQUAL(modified, LW_TRUE);
ewkt = lwgeom_to_ewkt(g);
ASSERT_STRING_EQUAL(
ewkt, "MULTIPOINT(0 0,10 0,10 10,0 10,5 5,5 8,8 8,8 5,50 50,50 60,60 60,60 50,55 55,55 58,58 58,58 55)");
@@ -1183,11 +1187,64 @@
lwfree(ewkt);
g = lwgeom_from_wkt("POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))", LW_PARSER_CHECK_NONE);
- lwgeom_remove_repeated_points_in_place(g, 10000);
+ modified = lwgeom_remove_repeated_points_in_place(g, 10000);
+ ASSERT_INT_EQUAL(modified, LW_TRUE);
ewkt = lwgeom_to_ewkt(g);
ASSERT_STRING_EQUAL(ewkt, "POLYGON((0 0,1 1,1 0,0 0))");
lwgeom_free(g);
lwfree(ewkt);
+
+ // Test the return value (modified or not)
+ g = lwgeom_from_wkt("POINT(0 0)", LW_PARSER_CHECK_NONE);
+ modified = lwgeom_remove_repeated_points_in_place(g, 10000);
+ ASSERT_INT_EQUAL(modified, LW_FALSE);
+ lwgeom_free(g);
+
+ g = lwgeom_from_wkt("TRIANGLE((0 0, 5 0, 3 3, 0 0))", LW_PARSER_CHECK_NONE);
+ modified = lwgeom_remove_repeated_points_in_place(g, 10000);
+ ASSERT_INT_EQUAL(modified, LW_FALSE);
+ lwgeom_free(g);
+
+ g = lwgeom_from_wkt("POLYGON((0 0,0 1,1 1,1 0,0 0))", LW_PARSER_CHECK_NONE);
+ modified = lwgeom_remove_repeated_points_in_place(g, 0.1);
+ ASSERT_INT_EQUAL(modified, LW_FALSE);
+ ewkt = lwgeom_to_ewkt(g);
+ ASSERT_STRING_EQUAL(ewkt, "POLYGON((0 0,0 1,1 1,1 0,0 0))");
+ lwgeom_free(g);
+ lwfree(ewkt);
+
+ g = lwgeom_from_wkt("POLYGON((0 0,0 1,1 1,1 0,0 0), (0.4 0.4, 0.4 0.4, 0.4 0.5, 0.5 0.5, 0.5 0.4, 0.4 0.4))",
+ LW_PARSER_CHECK_NONE);
+ modified = lwgeom_remove_repeated_points_in_place(g, 0.1);
+ ASSERT_INT_EQUAL(modified, LW_TRUE);
+ ewkt = lwgeom_to_ewkt(g);
+ ASSERT_STRING_EQUAL(ewkt, "POLYGON((0 0,0 1,1 1,1 0,0 0),(0.4 0.4,0.5 0.5,0.5 0.4,0.4 0.4))");
+ lwgeom_free(g);
+ lwfree(ewkt);
+
+ g = lwgeom_from_wkt("GEOMETRYCOLLECTION(POINT(2 0),POLYGON((0 0,1 0,1 1,0 1,0 0)))", LW_PARSER_CHECK_NONE);
+ modified = lwgeom_remove_repeated_points_in_place(g, 0.1);
+ ASSERT_INT_EQUAL(modified, LW_FALSE);
+ ewkt = lwgeom_to_ewkt(g);
+ ASSERT_STRING_EQUAL(ewkt, "GEOMETRYCOLLECTION(POINT(2 0),POLYGON((0 0,1 0,1 1,0 1,0 0)))");
+ lwgeom_free(g);
+ lwfree(ewkt);
+
+ g = lwgeom_from_wkt("GEOMETRYCOLLECTION(POINT(2 0),POLYGON((0 0, 0 1, 1 1, 1 0, 0 0)))", LW_PARSER_CHECK_NONE);
+ modified = lwgeom_remove_repeated_points_in_place(g, 10000);
+ ASSERT_INT_EQUAL(modified, LW_TRUE);
+ ewkt = lwgeom_to_ewkt(g);
+ ASSERT_STRING_EQUAL(ewkt, "GEOMETRYCOLLECTION(POINT(2 0),POLYGON((0 0,1 1,1 0,0 0)))");
+ lwgeom_free(g);
+ lwfree(ewkt);
+
+ g = lwgeom_from_wkt("GEOMETRYCOLLECTION(POLYGON((0 0, 0 1, 1 1, 1 0, 0 0)),POINT(2 0))", LW_PARSER_CHECK_NONE);
+ modified = lwgeom_remove_repeated_points_in_place(g, 10000);
+ ASSERT_INT_EQUAL(modified, LW_TRUE);
+ ewkt = lwgeom_to_ewkt(g);
+ ASSERT_STRING_EQUAL(ewkt, "GEOMETRYCOLLECTION(POLYGON((0 0,1 1,1 0,0 0)),POINT(2 0))");
+ lwgeom_free(g);
+ lwfree(ewkt);
}
static void test_lwgeom_simplify(void)
Modified: trunk/liblwgeom/liblwgeom.h.in
===================================================================
--- trunk/liblwgeom/liblwgeom.h.in 2019-09-14 19:09:20 UTC (rev 17811)
+++ trunk/liblwgeom/liblwgeom.h.in 2019-09-16 12:32:05 UTC (rev 17812)
@@ -1365,7 +1365,7 @@
extern void lwgeom_simplify_in_place(LWGEOM *igeom, double dist, int preserve_collapsed);
extern void lwgeom_affine(LWGEOM *geom, const AFFINE *affine);
extern void lwgeom_scale(LWGEOM *geom, const POINT4D *factors);
-extern void lwgeom_remove_repeated_points_in_place(LWGEOM *in, double tolerance);
+extern int lwgeom_remove_repeated_points_in_place(LWGEOM *in, double tolerance);
/**
Modified: trunk/liblwgeom/lwgeom.c
===================================================================
--- trunk/liblwgeom/lwgeom.c 2019-09-14 19:09:20 UTC (rev 17811)
+++ trunk/liblwgeom/lwgeom.c 2019-09-16 12:32:05 UTC (rev 17812)
@@ -1550,20 +1550,23 @@
/**************************************************************/
-void
+int
lwgeom_remove_repeated_points_in_place(LWGEOM *geom, double tolerance)
{
+ int geometry_modified = LW_FALSE;
switch (geom->type)
{
/* No-op! Cannot remote points */
case POINTTYPE:
case TRIANGLETYPE:
- return;
+ return geometry_modified;
case LINETYPE:
{
LWLINE *g = (LWLINE*)(geom);
POINTARRAY *pa = g->points;
+ uint32_t npoints = pa->npoints;
ptarray_remove_repeated_points_in_place(pa, tolerance, 2);
+ geometry_modified = npoints != pa->npoints;
/* Invalid output */
if (pa->npoints == 1 && pa->maxpoints > 1)
{
@@ -1581,13 +1584,17 @@
{
POINTARRAY *pa = g->rings[i];
int minpoints = 4;
+ uint32_t npoints = 0;
/* Skip zero'ed out rings */
if(!pa)
continue;
+ npoints = pa->npoints;
ptarray_remove_repeated_points_in_place(pa, tolerance, minpoints);
+ geometry_modified |= npoints != pa->npoints;
/* Drop collapsed rings */
if(pa->npoints < 4)
{
+ geometry_modified = LW_TRUE;
ptarray_free(pa);
continue;
}
@@ -1608,7 +1615,8 @@
int use_heap = (mpt->ngeoms > out_stack_size);
/* No-op on empty */
- if (mpt->ngeoms == 0) return;
+ if (mpt->ngeoms == 0)
+ return geometry_modified;
/* We cannot write directly back to the multipoint */
/* geoms array because we're reading out of it still */
@@ -1645,14 +1653,15 @@
/* Copy remaining points back into the input */
/* array */
memcpy(mpt->geoms, out, sizeof(LWPOINT *) * n);
+ geometry_modified = mpt->ngeoms != n;
mpt->ngeoms = n;
if (use_heap) lwfree(out);
- return;
+ break;
}
case CIRCSTRINGTYPE:
/* Dunno how to handle these, will return untouched */
- return;
+ return geometry_modified;
/* Can process most multi* types as generic collection */
case MULTILINETYPE:
@@ -1672,7 +1681,7 @@
{
LWGEOM *g = col->geoms[i];
if (!g) continue;
- lwgeom_remove_repeated_points_in_place(g, tolerance);
+ geometry_modified |= lwgeom_remove_repeated_points_in_place(g, tolerance);
/* Drop zero'ed out geometries */
if(lwgeom_is_empty(g))
{
@@ -1691,7 +1700,12 @@
break;
}
}
- return;
+
+ if (geometry_modified)
+ {
+ lwgeom_drop_bbox(geom);
+ }
+ return geometry_modified;
}
Modified: trunk/liblwgeom/ptarray.c
===================================================================
--- trunk/liblwgeom/ptarray.c 2019-09-14 19:09:20 UTC (rev 17811)
+++ trunk/liblwgeom/ptarray.c 2019-09-16 12:32:05 UTC (rev 17812)
@@ -1464,9 +1464,10 @@
if ( n_points <= min_points ) return;
last = getPoint2d_cp(pa, 0);
+ void *p_to = ((char *)last) + pt_size;
for (i = 1; i < n_points; i++)
{
- int last_point = (i == n_points-1);
+ int last_point = (i == n_points - 1);
/* Look straight into the abyss */
pt = getPoint2d_cp(pa, i);
@@ -1498,11 +1499,14 @@
if (last_point && n_points_out > 1 && tolerance > 0.0 && dsq <= tolsq)
{
n_points_out--;
+ p_to -= pt_size;
}
}
/* Compact all remaining values to front of array */
- ptarray_copy_point(pa, i, n_points_out++);
+ memcpy(p_to, pt, pt_size);
+ n_points_out++;
+ p_to += pt_size;
last = pt;
}
/* Adjust array length */
Modified: trunk/postgis/lwgeom_functions_basic.c
===================================================================
--- trunk/postgis/lwgeom_functions_basic.c 2019-09-14 19:09:20 UTC (rev 17811)
+++ trunk/postgis/lwgeom_functions_basic.c 2019-09-16 12:32:05 UTC (rev 17812)
@@ -2748,12 +2748,12 @@
PG_FUNCTION_INFO_V1(ST_RemoveRepeatedPoints);
Datum ST_RemoveRepeatedPoints(PG_FUNCTION_ARGS)
{
- GSERIALIZED *g_in = PG_GETARG_GSERIALIZED_P(0);
- int type = gserialized_get_type(g_in);
+ GSERIALIZED *g_in = PG_GETARG_GSERIALIZED_P_COPY(0);
+ uint32_t type = gserialized_get_type(g_in);
GSERIALIZED *g_out;
LWGEOM *lwgeom_in = NULL;
- LWGEOM *lwgeom_out = NULL;
double tolerance = 0.0;
+ int modified = LW_FALSE;
/* Don't even start to think about points */
if (type == POINTTYPE)
@@ -2763,22 +2763,16 @@
tolerance = PG_GETARG_FLOAT8(1);
lwgeom_in = lwgeom_from_gserialized(g_in);
- lwgeom_out = lwgeom_remove_repeated_points(lwgeom_in, tolerance);
-
- /* COMPUTE_BBOX TAINTING */
- if (lwgeom_in->bbox)
- lwgeom_refresh_bbox(lwgeom_out);
-
- g_out = geometry_serialize(lwgeom_out);
-
- if (lwgeom_out != lwgeom_in)
+ modified = lwgeom_remove_repeated_points_in_place(lwgeom_in, tolerance);
+ if (!modified)
{
- lwgeom_free(lwgeom_out);
+ /* Since there were no changes, we can return the input to avoid the serialization */
+ PG_RETURN_POINTER(g_in);
}
- lwgeom_free(lwgeom_in);
+ g_out = geometry_serialize(lwgeom_in);
- PG_FREE_IF_COPY(g_in, 0);
+ pfree(g_in);
PG_RETURN_POINTER(g_out);
}
More information about the postgis-tickets
mailing list