[postgis-tickets] r17812 - Speed up ST_RemoveRepeatedPoints

Raul raul at rmr.ninja
Mon Sep 16 05:32:05 PDT 2019


Author: algunenano
Date: 2019-09-16 05:32:05 -0700 (Mon, 16 Sep 2019)
New Revision: 17812

Modified:
   trunk/NEWS
   trunk/liblwgeom/cunit/cu_algorithm.c
   trunk/liblwgeom/liblwgeom.h.in
   trunk/liblwgeom/lwgeom.c
   trunk/liblwgeom/ptarray.c
   trunk/postgis/lwgeom_functions_basic.c
Log:
Speed up ST_RemoveRepeatedPoints

Closes #4491
Closes https://github.com/postgis/postgis/pull/468



Modified: trunk/NEWS
===================================================================
--- trunk/NEWS	2019-09-14 19:09:20 UTC (rev 17811)
+++ trunk/NEWS	2019-09-16 12:32:05 UTC (rev 17812)
@@ -15,6 +15,7 @@
   - #4504, shp2pgsql -D not working with schema qualified tables
   - #4505, Speed up conversion of geometries to/from GEOS (Dan Baston)
   - #4507, Use GEOSMakeValid and GEOSBuildArea for GEOS 3.8+ (Dan Baston)
+  - #4491, Speed up ST_RemoveRepeatedPoints (Raúl Marín)
 
 PostGIS 3.0.0alpha4
 2019/08/10

Modified: trunk/liblwgeom/cunit/cu_algorithm.c
===================================================================
--- trunk/liblwgeom/cunit/cu_algorithm.c	2019-09-14 19:09:20 UTC (rev 17811)
+++ trunk/liblwgeom/cunit/cu_algorithm.c	2019-09-16 12:32:05 UTC (rev 17812)
@@ -1159,9 +1159,11 @@
 {
 	LWGEOM *g;
 	char *ewkt;
+	int modified = LW_FALSE;
 
 	g = lwgeom_from_wkt("MULTIPOINT(0 0, 10 0, 10 10, 10 10, 0 10, 0 10, 0 10, 0 0, 0 0, 0 0, 5 5, 0 0, 5 5)", LW_PARSER_CHECK_NONE);
-	lwgeom_remove_repeated_points_in_place(g, 1);
+	modified = lwgeom_remove_repeated_points_in_place(g, 1);
+	ASSERT_INT_EQUAL(modified, LW_TRUE);
 	ewkt = lwgeom_to_ewkt(g);
 	ASSERT_STRING_EQUAL(ewkt, "MULTIPOINT(0 0,10 0,10 10,0 10,5 5)");
 	lwgeom_free(g);
@@ -1168,7 +1170,8 @@
 	lwfree(ewkt);
 
 	g = lwgeom_from_wkt("LINESTRING(1612830.15445 4841287.12672,1612830.15824 4841287.12674,1612829.98813 4841274.56198)", LW_PARSER_CHECK_NONE);
-	lwgeom_remove_repeated_points_in_place(g, 0.01);
+	modified = lwgeom_remove_repeated_points_in_place(g, 0.01);
+	ASSERT_INT_EQUAL(modified, LW_TRUE);
 	ewkt = lwgeom_to_ewkt(g);
 	ASSERT_STRING_EQUAL(ewkt, "LINESTRING(1612830.15445 4841287.12672,1612829.98813 4841274.56198)");
 	lwgeom_free(g);
@@ -1175,7 +1178,8 @@
 	lwfree(ewkt);
 
 	g = lwgeom_from_wkt("MULTIPOINT(0 0,10 0,10 10,10 10,0 10,0 10,0 10,0 0,0 0,0 0,5 5,5 5,5 8,8 8,8 8,8 8,8 5,8 5,5 5,5 5,5 5,5 5,5 5,50 50,50 50,50 50,50 60,50 60,50 60,60 60,60 50,60 50,50 50,55 55,55 58,58 58,58 55,58 55,55 55)", LW_PARSER_CHECK_NONE);
-	lwgeom_remove_repeated_points_in_place(g, 1);
+	modified = lwgeom_remove_repeated_points_in_place(g, 1);
+	ASSERT_INT_EQUAL(modified, LW_TRUE);
 	ewkt = lwgeom_to_ewkt(g);
 	ASSERT_STRING_EQUAL(
 	    ewkt, "MULTIPOINT(0 0,10 0,10 10,0 10,5 5,5 8,8 8,8 5,50 50,50 60,60 60,60 50,55 55,55 58,58 58,58 55)");
@@ -1183,11 +1187,64 @@
 	lwfree(ewkt);
 
 	g = lwgeom_from_wkt("POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))", LW_PARSER_CHECK_NONE);
-	lwgeom_remove_repeated_points_in_place(g, 10000);
+	modified = lwgeom_remove_repeated_points_in_place(g, 10000);
+	ASSERT_INT_EQUAL(modified, LW_TRUE);
 	ewkt = lwgeom_to_ewkt(g);
 	ASSERT_STRING_EQUAL(ewkt, "POLYGON((0 0,1 1,1 0,0 0))");
 	lwgeom_free(g);
 	lwfree(ewkt);
+
+	// Test the return value (modified or not)
+	g = lwgeom_from_wkt("POINT(0 0)", LW_PARSER_CHECK_NONE);
+	modified = lwgeom_remove_repeated_points_in_place(g, 10000);
+	ASSERT_INT_EQUAL(modified, LW_FALSE);
+	lwgeom_free(g);
+
+	g = lwgeom_from_wkt("TRIANGLE((0 0, 5 0, 3 3, 0 0))", LW_PARSER_CHECK_NONE);
+	modified = lwgeom_remove_repeated_points_in_place(g, 10000);
+	ASSERT_INT_EQUAL(modified, LW_FALSE);
+	lwgeom_free(g);
+
+	g = lwgeom_from_wkt("POLYGON((0 0,0 1,1 1,1 0,0 0))", LW_PARSER_CHECK_NONE);
+	modified = lwgeom_remove_repeated_points_in_place(g, 0.1);
+	ASSERT_INT_EQUAL(modified, LW_FALSE);
+	ewkt = lwgeom_to_ewkt(g);
+	ASSERT_STRING_EQUAL(ewkt, "POLYGON((0 0,0 1,1 1,1 0,0 0))");
+	lwgeom_free(g);
+	lwfree(ewkt);
+
+	g = lwgeom_from_wkt("POLYGON((0 0,0 1,1 1,1 0,0 0), (0.4 0.4, 0.4 0.4, 0.4 0.5, 0.5 0.5, 0.5 0.4, 0.4 0.4))",
+			    LW_PARSER_CHECK_NONE);
+	modified = lwgeom_remove_repeated_points_in_place(g, 0.1);
+	ASSERT_INT_EQUAL(modified, LW_TRUE);
+	ewkt = lwgeom_to_ewkt(g);
+	ASSERT_STRING_EQUAL(ewkt, "POLYGON((0 0,0 1,1 1,1 0,0 0),(0.4 0.4,0.5 0.5,0.5 0.4,0.4 0.4))");
+	lwgeom_free(g);
+	lwfree(ewkt);
+
+	g = lwgeom_from_wkt("GEOMETRYCOLLECTION(POINT(2 0),POLYGON((0 0,1 0,1 1,0 1,0 0)))", LW_PARSER_CHECK_NONE);
+	modified = lwgeom_remove_repeated_points_in_place(g, 0.1);
+	ASSERT_INT_EQUAL(modified, LW_FALSE);
+	ewkt = lwgeom_to_ewkt(g);
+	ASSERT_STRING_EQUAL(ewkt, "GEOMETRYCOLLECTION(POINT(2 0),POLYGON((0 0,1 0,1 1,0 1,0 0)))");
+	lwgeom_free(g);
+	lwfree(ewkt);
+
+	g = lwgeom_from_wkt("GEOMETRYCOLLECTION(POINT(2 0),POLYGON((0 0, 0 1, 1 1, 1 0, 0 0)))", LW_PARSER_CHECK_NONE);
+	modified = lwgeom_remove_repeated_points_in_place(g, 10000);
+	ASSERT_INT_EQUAL(modified, LW_TRUE);
+	ewkt = lwgeom_to_ewkt(g);
+	ASSERT_STRING_EQUAL(ewkt, "GEOMETRYCOLLECTION(POINT(2 0),POLYGON((0 0,1 1,1 0,0 0)))");
+	lwgeom_free(g);
+	lwfree(ewkt);
+
+	g = lwgeom_from_wkt("GEOMETRYCOLLECTION(POLYGON((0 0, 0 1, 1 1, 1 0, 0 0)),POINT(2 0))", LW_PARSER_CHECK_NONE);
+	modified = lwgeom_remove_repeated_points_in_place(g, 10000);
+	ASSERT_INT_EQUAL(modified, LW_TRUE);
+	ewkt = lwgeom_to_ewkt(g);
+	ASSERT_STRING_EQUAL(ewkt, "GEOMETRYCOLLECTION(POLYGON((0 0,1 1,1 0,0 0)),POINT(2 0))");
+	lwgeom_free(g);
+	lwfree(ewkt);
 }
 
 static void test_lwgeom_simplify(void)

Modified: trunk/liblwgeom/liblwgeom.h.in
===================================================================
--- trunk/liblwgeom/liblwgeom.h.in	2019-09-14 19:09:20 UTC (rev 17811)
+++ trunk/liblwgeom/liblwgeom.h.in	2019-09-16 12:32:05 UTC (rev 17812)
@@ -1365,7 +1365,7 @@
 extern void lwgeom_simplify_in_place(LWGEOM *igeom, double dist, int preserve_collapsed);
 extern void lwgeom_affine(LWGEOM *geom, const AFFINE *affine);
 extern void lwgeom_scale(LWGEOM *geom, const POINT4D *factors);
-extern void lwgeom_remove_repeated_points_in_place(LWGEOM *in, double tolerance);
+extern int lwgeom_remove_repeated_points_in_place(LWGEOM *in, double tolerance);
 
 
 /**

Modified: trunk/liblwgeom/lwgeom.c
===================================================================
--- trunk/liblwgeom/lwgeom.c	2019-09-14 19:09:20 UTC (rev 17811)
+++ trunk/liblwgeom/lwgeom.c	2019-09-16 12:32:05 UTC (rev 17812)
@@ -1550,20 +1550,23 @@
 /**************************************************************/
 
 
-void
+int
 lwgeom_remove_repeated_points_in_place(LWGEOM *geom, double tolerance)
 {
+	int geometry_modified = LW_FALSE;
 	switch (geom->type)
 	{
 		/* No-op! Cannot remote points */
 		case POINTTYPE:
 		case TRIANGLETYPE:
-			return;
+			return geometry_modified;
 		case LINETYPE:
 		{
 			LWLINE *g = (LWLINE*)(geom);
 			POINTARRAY *pa = g->points;
+			uint32_t npoints = pa->npoints;
 			ptarray_remove_repeated_points_in_place(pa, tolerance, 2);
+			geometry_modified = npoints != pa->npoints;
 			/* Invalid output */
 			if (pa->npoints == 1 && pa->maxpoints > 1)
 			{
@@ -1581,13 +1584,17 @@
 			{
 				POINTARRAY *pa = g->rings[i];
 				int minpoints = 4;
+				uint32_t npoints = 0;
 				/* Skip zero'ed out rings */
 				if(!pa)
 					continue;
+				npoints = pa->npoints;
 				ptarray_remove_repeated_points_in_place(pa, tolerance, minpoints);
+				geometry_modified |= npoints != pa->npoints;
 				/* Drop collapsed rings */
 				if(pa->npoints < 4)
 				{
+					geometry_modified = LW_TRUE;
 					ptarray_free(pa);
 					continue;
 				}
@@ -1608,7 +1615,8 @@
 			int use_heap = (mpt->ngeoms > out_stack_size);
 
 			/* No-op on empty */
-			if (mpt->ngeoms == 0) return;
+			if (mpt->ngeoms == 0)
+				return geometry_modified;
 
 			/* We cannot write directly back to the multipoint */
 			/* geoms array because we're reading out of it still */
@@ -1645,14 +1653,15 @@
 			/* Copy remaining points back into the input */
 			/* array */
 			memcpy(mpt->geoms, out, sizeof(LWPOINT *) * n);
+			geometry_modified = mpt->ngeoms != n;
 			mpt->ngeoms = n;
 			if (use_heap) lwfree(out);
-			return;
+			break;
 		}
 
 		case CIRCSTRINGTYPE:
 			/* Dunno how to handle these, will return untouched */
-			return;
+			return geometry_modified;
 
 		/* Can process most multi* types as generic collection */
 		case MULTILINETYPE:
@@ -1672,7 +1681,7 @@
 			{
 				LWGEOM *g = col->geoms[i];
 				if (!g) continue;
-				lwgeom_remove_repeated_points_in_place(g, tolerance);
+				geometry_modified |= lwgeom_remove_repeated_points_in_place(g, tolerance);
 				/* Drop zero'ed out geometries */
 				if(lwgeom_is_empty(g))
 				{
@@ -1691,7 +1700,12 @@
 			break;
 		}
 	}
-	return;
+
+	if (geometry_modified)
+	{
+		lwgeom_drop_bbox(geom);
+	}
+	return geometry_modified;
 }
 
 

Modified: trunk/liblwgeom/ptarray.c
===================================================================
--- trunk/liblwgeom/ptarray.c	2019-09-14 19:09:20 UTC (rev 17811)
+++ trunk/liblwgeom/ptarray.c	2019-09-16 12:32:05 UTC (rev 17812)
@@ -1464,9 +1464,10 @@
 	if ( n_points <= min_points ) return;
 
 	last = getPoint2d_cp(pa, 0);
+	void *p_to = ((char *)last) + pt_size;
 	for (i = 1; i < n_points; i++)
 	{
-		int last_point = (i == n_points-1);
+		int last_point = (i == n_points - 1);
 
 		/* Look straight into the abyss */
 		pt = getPoint2d_cp(pa, i);
@@ -1498,11 +1499,14 @@
 			if (last_point && n_points_out > 1 && tolerance > 0.0 && dsq <= tolsq)
 			{
 				n_points_out--;
+				p_to -= pt_size;
 			}
 		}
 
 		/* Compact all remaining values to front of array */
-		ptarray_copy_point(pa, i, n_points_out++);
+		memcpy(p_to, pt, pt_size);
+		n_points_out++;
+		p_to += pt_size;
 		last = pt;
 	}
 	/* Adjust array length */

Modified: trunk/postgis/lwgeom_functions_basic.c
===================================================================
--- trunk/postgis/lwgeom_functions_basic.c	2019-09-14 19:09:20 UTC (rev 17811)
+++ trunk/postgis/lwgeom_functions_basic.c	2019-09-16 12:32:05 UTC (rev 17812)
@@ -2748,12 +2748,12 @@
 PG_FUNCTION_INFO_V1(ST_RemoveRepeatedPoints);
 Datum ST_RemoveRepeatedPoints(PG_FUNCTION_ARGS)
 {
-	GSERIALIZED *g_in = PG_GETARG_GSERIALIZED_P(0);
-	int type = gserialized_get_type(g_in);
+	GSERIALIZED *g_in = PG_GETARG_GSERIALIZED_P_COPY(0);
+	uint32_t type = gserialized_get_type(g_in);
 	GSERIALIZED *g_out;
 	LWGEOM *lwgeom_in = NULL;
-	LWGEOM *lwgeom_out = NULL;
 	double tolerance = 0.0;
+	int modified = LW_FALSE;
 
 	/* Don't even start to think about points */
 	if (type == POINTTYPE)
@@ -2763,22 +2763,16 @@
 		tolerance = PG_GETARG_FLOAT8(1);
 
 	lwgeom_in = lwgeom_from_gserialized(g_in);
-	lwgeom_out = lwgeom_remove_repeated_points(lwgeom_in, tolerance);
-
-	/* COMPUTE_BBOX TAINTING */
-	if (lwgeom_in->bbox)
-		lwgeom_refresh_bbox(lwgeom_out);
-
-	g_out = geometry_serialize(lwgeom_out);
-
-	if (lwgeom_out != lwgeom_in)
+	modified = lwgeom_remove_repeated_points_in_place(lwgeom_in, tolerance);
+	if (!modified)
 	{
-		lwgeom_free(lwgeom_out);
+		/* Since there were no changes, we can return the input to avoid the serialization */
+		PG_RETURN_POINTER(g_in);
 	}
 
-	lwgeom_free(lwgeom_in);
+	g_out = geometry_serialize(lwgeom_in);
 
-	PG_FREE_IF_COPY(g_in, 0);
+	pfree(g_in);
 	PG_RETURN_POINTER(g_out);
 }
 



More information about the postgis-tickets mailing list