[postgis-tickets] r15943 - Ptarray in place repeated point removal returns, passing all tests, even in topology. (References #3877)

Paul Ramsey pramsey at cleverelephant.ca
Tue Oct 10 09:27:08 PDT 2017


Author: pramsey
Date: 2017-10-10 09:27:08 -0700 (Tue, 10 Oct 2017)
New Revision: 15943

Modified:
   trunk/liblwgeom/cunit/cu_algorithm.c
   trunk/liblwgeom/cunit/cu_geos.c
   trunk/liblwgeom/liblwgeom_internal.h
   trunk/liblwgeom/lwcollection.c
   trunk/liblwgeom/lwgeom.c
   trunk/liblwgeom/lwline.c
   trunk/liblwgeom/lwmpoint.c
   trunk/liblwgeom/lwpoly.c
   trunk/liblwgeom/ptarray.c
Log:
Ptarray in place repeated point removal returns, passing all tests, even in topology. (References #3877)


Modified: trunk/liblwgeom/cunit/cu_algorithm.c
===================================================================
--- trunk/liblwgeom/cunit/cu_algorithm.c	2017-10-10 13:26:02 UTC (rev 15942)
+++ trunk/liblwgeom/cunit/cu_algorithm.c	2017-10-10 16:27:08 UTC (rev 15943)
@@ -1003,6 +1003,39 @@
 	CU_ASSERT_EQUAL(gh, rs);
 }
 
+static void test_lwgeom_remove_repeated_points(void)
+{
+	LWGEOM *g;
+	char *ewkt;
+
+	g = lwgeom_from_wkt("MULTIPOINT(0 0, 10 0, 10 10, 10 10, 0 10, 0 10, 0 10, 0 0, 0 0, 0 0, 5 5, 0 0, 5 5)", LW_PARSER_CHECK_NONE);
+	lwgeom_remove_repeated_points_in_place(g, 1);
+	ewkt = lwgeom_to_ewkt(g);
+	CU_ASSERT_STRING_EQUAL(ewkt, "MULTIPOINT(0 0,10 0,10 10,0 10,5 5)");
+	// printf("%s\n", ewkt);
+	lwgeom_free(g);
+	lwfree(ewkt);
+
+	g = lwgeom_from_wkt("LINESTRING(1612830.15445 4841287.12672,1612830.15824 4841287.12674,1612829.98813 4841274.56198)", LW_PARSER_CHECK_NONE);
+	lwgeom_remove_repeated_points_in_place(g, 0.01);
+	ewkt = lwgeom_to_ewkt(g);
+	CU_ASSERT_STRING_EQUAL(ewkt, "LINESTRING(1612830.15445 4841287.12672,1612829.98813 4841274.56198)");
+	// printf("%s\n", ewkt);
+	lwgeom_free(g);
+	lwfree(ewkt);
+
+
+	g = lwgeom_from_wkt("MULTIPOINT(0 0,10 0,10 10,10 10,0 10,0 10,0 10,0 0,0 0,0 0,5 5,5 5,5 8,8 8,8 8,8 8,8 5,8 5,5 5,5 5,5 5,5 5,5 5,50 50,50 50,50 50,50 60,50 60,50 60,60 60,60 50,60 50,50 50,55 55,55 58,58 58,58 55,58 55,55 55)", LW_PARSER_CHECK_NONE);
+	lwgeom_remove_repeated_points_in_place(g, 1);
+	ewkt = lwgeom_to_ewkt(g);
+	CU_ASSERT_STRING_EQUAL(ewkt, "MULTIPOINT(0 0,10 0,10 10,0 10,5 5,5 8,8 8,8 5,50 50,50 60,60 60,60 50,55 55,55 58,58 58,58 55)");
+	// printf("%s\n", ewkt);
+	lwgeom_free(g);
+	lwfree(ewkt);
+
+
+}
+
 static void test_lwgeom_simplify(void)
 {
 	LWGEOM *l;
@@ -1292,4 +1325,5 @@
 	PG_ADD_TEST(suite,test_median_handles_3d_correctly);
 	PG_ADD_TEST(suite,test_median_robustness);
 	PG_ADD_TEST(suite,test_lwpoly_construct_circle);
+	PG_ADD_TEST(suite,test_lwgeom_remove_repeated_points);
 }

Modified: trunk/liblwgeom/cunit/cu_geos.c
===================================================================
--- trunk/liblwgeom/cunit/cu_geos.c	2017-10-10 13:26:02 UTC (rev 15942)
+++ trunk/liblwgeom/cunit/cu_geos.c	2017-10-10 16:27:08 UTC (rev 15943)
@@ -51,7 +51,7 @@
 		geom_in = lwgeom_from_wkt(in_ewkt, LW_PARSER_CHECK_NONE);
 		geom_out = lwgeom_geos_noop(geom_in);
 		if ( ! geom_out ) {
-			fprintf(stderr, "\nNull return from lwgeom_geos_noop with wkt:   %s\n", in_ewkt);
+			// fprintf(stderr, "\nNull return from lwgeom_geos_noop with wkt:   %s\n", in_ewkt);
 			lwgeom_free(geom_in);
 			continue;
 		}

Modified: trunk/liblwgeom/liblwgeom_internal.h
===================================================================
--- trunk/liblwgeom/liblwgeom_internal.h	2017-10-10 13:26:02 UTC (rev 15942)
+++ trunk/liblwgeom/liblwgeom_internal.h	2017-10-10 16:27:08 UTC (rev 15943)
@@ -398,12 +398,10 @@
 /*
 * Repeated points
 */
-POINTARRAY *ptarray_remove_repeated_points_minpoints(const POINTARRAY *in, double tolerance, int minpoints);
 POINTARRAY *ptarray_remove_repeated_points(const POINTARRAY *in, double tolerance);
-LWGEOM* lwmpoint_remove_repeated_points(const LWMPOINT *in, double tolerance);
 LWGEOM* lwline_remove_repeated_points(const LWLINE *in, double tolerance);
-LWGEOM* lwcollection_remove_repeated_points(const LWCOLLECTION *in, double tolerance);
-LWGEOM* lwpoly_remove_repeated_points(const LWPOLY *in, double tolerance);
+void ptarray_remove_repeated_points_in_place(POINTARRAY *pa, double tolerance, int min_points);
+void lwgeom_remove_repeated_points_in_place(LWGEOM *geom, double tolerance);
 
 /*
 * Closure test

Modified: trunk/liblwgeom/lwcollection.c
===================================================================
--- trunk/liblwgeom/lwcollection.c	2017-10-10 13:26:02 UTC (rev 15942)
+++ trunk/liblwgeom/lwcollection.c	2017-10-10 16:27:08 UTC (rev 15943)
@@ -456,24 +456,7 @@
 	return outcol;
 }
 
-LWGEOM*
-lwcollection_remove_repeated_points(const LWCOLLECTION *coll, double tolerance)
-{
-	uint32_t i;
-	LWGEOM **newgeoms;
 
-	newgeoms = lwalloc(sizeof(LWGEOM *)*coll->ngeoms);
-	for (i=0; i<coll->ngeoms; i++)
-	{
-		newgeoms[i] = lwgeom_remove_repeated_points(coll->geoms[i], tolerance);
-	}
-
-	return (LWGEOM*)lwcollection_construct(coll->type,
-	                                       coll->srid, coll->bbox ? gbox_copy(coll->bbox) : NULL,
-	                                       coll->ngeoms, newgeoms);
-}
-
-
 LWCOLLECTION*
 lwcollection_force_dims(const LWCOLLECTION *col, int hasz, int hasm)
 {

Modified: trunk/liblwgeom/lwgeom.c
===================================================================
--- trunk/liblwgeom/lwgeom.c	2017-10-10 13:26:02 UTC (rev 15942)
+++ trunk/liblwgeom/lwgeom.c	2017-10-10 16:27:08 UTC (rev 15943)
@@ -1486,53 +1486,9 @@
 
 extern LWGEOM* lwgeom_remove_repeated_points(const LWGEOM *in, double tolerance)
 {
-	LWDEBUGF(4, "lwgeom_remove_repeated_points got type %s",
-	         lwtype_name(in->type));
-
-	if(lwgeom_is_empty(in))
-	{
-		return lwgeom_clone_deep(in);
-	}
-
-	switch (in->type)
-	{
-	case MULTIPOINTTYPE:
-		return lwmpoint_remove_repeated_points((LWMPOINT*)in, tolerance);
-		break;
-	case LINETYPE:
-		return lwline_remove_repeated_points((LWLINE*)in, tolerance);
-
-	case MULTILINETYPE:
-	case COLLECTIONTYPE:
-	case MULTIPOLYGONTYPE:
-	case POLYHEDRALSURFACETYPE:
-		return lwcollection_remove_repeated_points((LWCOLLECTION *)in, tolerance);
-
-	case POLYGONTYPE:
-		return lwpoly_remove_repeated_points((LWPOLY *)in, tolerance);
-		break;
-
-	case POINTTYPE:
-	case TRIANGLETYPE:
-	case TINTYPE:
-		/* No point is repeated for a single point, or for Triangle or TIN */
-		return lwgeom_clone_deep(in);
-
-	case CIRCSTRINGTYPE:
-	case COMPOUNDTYPE:
-	case MULTICURVETYPE:
-	case CURVEPOLYTYPE:
-	case MULTISURFACETYPE:
-		/* Dunno how to handle these, will return untouched */
-		return lwgeom_clone_deep(in);
-
-	default:
-		lwnotice("%s: unsupported geometry type: %s",
-		         __func__, lwtype_name(in->type));
-		return lwgeom_clone_deep(in);
-		break;
-	}
-	return 0;
+	LWGEOM *out = lwgeom_clone_deep(in);
+	lwgeom_remove_repeated_points_in_place(out, tolerance);
+	return out;
 }
 
 void lwgeom_swap_ordinates(LWGEOM *in, LWORD o1, LWORD o2)
@@ -1624,10 +1580,152 @@
 	}
 }
 
+
 /**************************************************************/
 
 
 void
+lwgeom_remove_repeated_points_in_place(LWGEOM *geom, double tolerance)
+{
+	switch (geom->type)
+	{
+		/* No-op! Cannot remote points */
+		case POINTTYPE:
+			return;
+		case LINETYPE:
+		{
+			LWLINE *g = (LWLINE*)(geom);
+			POINTARRAY *pa = g->points;
+			ptarray_remove_repeated_points_in_place(pa, tolerance, 2);
+			/* Invalid output */
+			if (pa->npoints == 1 && pa->maxpoints > 1)
+			{
+				/* Use first point as last point */
+				pa->npoints = 2;
+				ptarray_copy_point(pa, 0, 1);
+			}
+			break;
+		}
+		case POLYGONTYPE:
+		{
+			int i, j = 0;
+			LWPOLY *g = (LWPOLY*)(geom);
+			for (i = 0; i < g->nrings; i++)
+			{
+				POINTARRAY *pa = g->rings[i];
+				int minpoints = 4;
+				/* Skip zero'ed out rings */
+				if(!pa)
+					continue;
+				ptarray_remove_repeated_points_in_place(pa, tolerance, minpoints);
+				/* Drop collapsed rings */
+				if(pa->npoints < 4)
+				{
+					ptarray_free(pa);
+					continue;
+				}
+				g->rings[j++] = pa;
+			}
+			/* Update ring count */
+			g->nrings = j;
+			break;
+		}
+		case MULTIPOINTTYPE:
+		{
+			static int out_stack_size = 32;
+			double tolsq = tolerance*tolerance;
+			int i, j, n = 0;
+			LWMPOINT *mpt = (LWMPOINT *)(geom);
+			LWPOINT **out;
+			LWPOINT *out_stack[out_stack_size];
+			int use_heap = (mpt->ngeoms > out_stack_size);
+
+			/* No-op on empty */
+			if (mpt->ngeoms == 0) return;
+
+			/* We cannot write directly back to the multipoint */
+			/* geoms array because we're reading out of it still */
+			/* so we use a side array */
+			if (use_heap)
+				out = lwalloc(sizeof(LWMPOINT *) * mpt->ngeoms);
+			else
+				out = out_stack;
+
+			/* Inefficient O(n^2) implementation */
+			for (i = 0; i < mpt->ngeoms; i++)
+			{
+				int seen = 0;
+				LWPOINT *p1 = mpt->geoms[i];
+				const POINT2D *pt1 = getPoint2d_cp(p1->point, 0);
+				for (j = 0; j < n; j++)
+				{
+					LWPOINT *p2 = out[j];
+					const POINT2D *pt2 = getPoint2d_cp(p2->point, 0);
+					if (distance2d_sqr_pt_pt(pt1, pt2) <= tolsq)
+					{
+						seen = 1;
+						break;
+					}
+				}
+				if (seen) continue;
+				out[n++] = p1;
+			}
+
+			/* Copy remaining points back into the input */
+			/* array */
+			memcpy(mpt->geoms, out, sizeof(LWPOINT *) * n);
+			mpt->ngeoms = n;
+			if (use_heap) lwfree(out);
+			return;
+		}
+
+		case CIRCSTRINGTYPE:
+			/* Dunno how to handle these, will return untouched */
+			return;
+
+		/* Can process most multi* types as generic collection */
+		case MULTILINETYPE:
+		case MULTIPOLYGONTYPE:
+		case COLLECTIONTYPE:
+		/* Curve types we mostly ignore, but allow the linear */
+		/* portions to be processed by recursing into them */
+		case MULTICURVETYPE:
+		case CURVEPOLYTYPE:
+		case MULTISURFACETYPE:
+		case COMPOUNDTYPE:
+		{
+			int i, j = 0;
+			LWCOLLECTION *col = (LWCOLLECTION*)(geom);
+			for (i = 0; i < col->ngeoms; i++)
+			{
+				LWGEOM *g = col->geoms[i];
+				if (!g) continue;
+				lwgeom_remove_repeated_points_in_place(g, tolerance);
+				/* Drop zero'ed out geometries */
+				if(lwgeom_is_empty(g))
+				{
+					lwgeom_free(g);
+					continue;
+				}
+				col->geoms[j++] = g;
+			}
+			/* Update geometry count */
+			col->ngeoms = j;
+			break;
+		}
+		default:
+		{
+			lwerror("%s: unsupported geometry type: %s", __func__, lwtype_name(geom->type));
+			break;
+		}
+	}
+	return;
+}
+
+
+/**************************************************************/
+
+void
 lwgeom_simplify_in_place(LWGEOM *geom, double epsilon, int preserve_collapsed)
 {
 	switch (geom->type)
@@ -1722,9 +1820,6 @@
 	return;
 }
 
-
-/**************************************************************/
-
 LWGEOM* lwgeom_simplify(const LWGEOM *igeom, double dist, int preserve_collapsed)
 {
 	LWGEOM *lwgeom_out = lwgeom_clone_deep(igeom);
@@ -1733,7 +1828,9 @@
 	return lwgeom_out;
 }
 
+/**************************************************************/
 
+
 double lwgeom_area(const LWGEOM *geom)
 {
 	int type = geom->type;

Modified: trunk/liblwgeom/lwline.c
===================================================================
--- trunk/liblwgeom/lwline.c	2017-10-10 13:26:02 UTC (rev 15942)
+++ trunk/liblwgeom/lwline.c	2017-10-10 16:27:08 UTC (rev 15943)
@@ -449,13 +449,7 @@
 LWGEOM*
 lwline_remove_repeated_points(const LWLINE *lwline, double tolerance)
 {
-	POINTARRAY* npts = ptarray_remove_repeated_points_minpoints(lwline->points, tolerance, 2);
-
-	LWDEBUGF(3, "%s: npts %p", __func__, npts);
-
-	return (LWGEOM*)lwline_construct(lwline->srid,
-	                                 lwline->bbox ? gbox_copy(lwline->bbox) : 0,
-	                                 npts);
+	return lwgeom_remove_repeated_points((LWGEOM*)lwline, tolerance);
 }
 
 int

Modified: trunk/liblwgeom/lwmpoint.c
===================================================================
--- trunk/liblwgeom/lwmpoint.c	2017-10-10 13:26:02 UTC (rev 15942)
+++ trunk/liblwgeom/lwmpoint.c	2017-10-10 16:27:08 UTC (rev 15943)
@@ -88,41 +88,7 @@
 	lwfree(mpt);
 }
 
-LWGEOM*
-lwmpoint_remove_repeated_points(const LWMPOINT *mpoint, double tolerance)
-{
-	uint32_t nnewgeoms;
-	uint32_t i, j;
-	LWGEOM **newgeoms;
-	LWGEOM *lwpt1, *lwpt2;
 
-	newgeoms = lwalloc(sizeof(LWGEOM *)*mpoint->ngeoms);
-	nnewgeoms = 0;
-	for (i=0; i<mpoint->ngeoms; ++i)
-	{
-		lwpt1 = (LWGEOM*)mpoint->geoms[i];
-		/* Brute force, may be optimized by building an index */
-		int seen=0;
-		for (j=0; j<nnewgeoms; ++j)
-		{
-			lwpt2 = (LWGEOM*)newgeoms[j];
-			if ( lwgeom_mindistance2d(lwpt1, lwpt2) <= tolerance )
-			{
-				seen=1;
-				break;
-			}
-		}
-		if ( seen ) continue;
-		newgeoms[nnewgeoms++] = lwgeom_clone_deep(lwpt1);
-	}
-
-	return (LWGEOM*)lwcollection_construct(mpoint->type,
-	                                       mpoint->srid,
-										   mpoint->bbox ? gbox_copy(mpoint->bbox) : NULL,
-	                                       nnewgeoms, newgeoms);
-
-}
-
 LWMPOINT*
 lwmpoint_from_lwgeom(const LWGEOM *g)
 {

Modified: trunk/liblwgeom/lwpoly.c
===================================================================
--- trunk/liblwgeom/lwpoly.c	2017-10-10 13:26:02 UTC (rev 15942)
+++ trunk/liblwgeom/lwpoly.c	2017-10-10 16:27:08 UTC (rev 15943)
@@ -387,25 +387,6 @@
 	return ret;
 }
 
-LWGEOM*
-lwpoly_remove_repeated_points(const LWPOLY *poly, double tolerance)
-{
-	uint32_t i;
-	POINTARRAY **newrings;
-
-	newrings = lwalloc(sizeof(POINTARRAY *)*poly->nrings);
-	for (i=0; i<poly->nrings; i++)
-	{
-		newrings[i] = ptarray_remove_repeated_points_minpoints(poly->rings[i], tolerance, 4);
-	}
-
-	return (LWGEOM*)lwpoly_construct(poly->srid,
-	                                 poly->bbox ? gbox_copy(poly->bbox) : NULL,
-	                                 poly->nrings, newrings);
-
-}
-
-
 LWPOLY*
 lwpoly_force_dims(const LWPOLY *poly, int hasz, int hasm)
 {

Modified: trunk/liblwgeom/ptarray.c
===================================================================
--- trunk/liblwgeom/ptarray.c	2017-10-10 13:26:02 UTC (rev 15942)
+++ trunk/liblwgeom/ptarray.c	2017-10-10 16:27:08 UTC (rev 15943)
@@ -1429,93 +1429,86 @@
  *
  * Always returns a newly allocated object.
  */
-POINTARRAY *
+static POINTARRAY *
 ptarray_remove_repeated_points_minpoints(const POINTARRAY *in, double tolerance, int minpoints)
 {
-	POINTARRAY *out;
-	size_t ptsize = ptarray_point_size(in);
-	int has_z = FLAGS_GET_Z(in->flags);
-	int has_m = FLAGS_GET_M(in->flags);
-	int ipn = 1, opn = 1;
-	const POINT2D *prev_point, *this_point;
-	uint8_t *p1, *p2;
-	double tolsq = tolerance * tolerance;
+	POINTARRAY *out = ptarray_clone_deep(in);
+	ptarray_remove_repeated_points_in_place(out, tolerance, minpoints);
+	return out;
+}
 
-	LWDEBUGF(3, "%s called", __func__);
+POINTARRAY *
+ptarray_remove_repeated_points(const POINTARRAY *in, double tolerance)
+{
+	return ptarray_remove_repeated_points_minpoints(in, tolerance, 2);
+}
 
-	/* Single or zero point arrays can't have duplicates */
-	if ( in->npoints < 3 ) return ptarray_clone_deep(in);
 
-	/* Condition minpoints */
-	if ( minpoints < 1 ) minpoints = 1;
+void
+ptarray_remove_repeated_points_in_place(POINTARRAY *pa, double tolerance, int min_points)
+{
+	int i;
+	double tolsq = tolerance * tolerance;
+	const POINT2D *last = NULL;
+	const POINT2D *pt;
+	int n_points = pa->npoints;
+	int n_points_out = 0;
+	int pt_size = ptarray_point_size(pa);
+	double dsq;
 
-	/* Allocate enough output space for all points */
-	out = ptarray_construct(has_z, has_m, in->npoints);
+	/* No-op on short inputs */
+	if ( n_points <= 2 ) return;
 
-	/* Keep the first point */
-	p1 = getPoint_internal(out, 0);
-	p2 = getPoint_internal(in, 0);
-	memcpy(p1, p2, ptsize);
-
-	/* Now fill up the actual points */
-	prev_point = getPoint2d_cp(in, 0);
-	LWDEBUGF(3, " first point copied, out points: %d", opn);
-	for ( ipn = 1; ipn < in->npoints; ipn++ )
+	for (i = 0; i < n_points; i++)
 	{
-		this_point = getPoint2d_cp(in, ipn);
+		int last_point = (i == n_points-1);
 
-		/*
-		* If number of points left > number of points we need
-		* then it's still OK to drop dupes
-		*/
-		if ( in->npoints - ipn > minpoints - opn )
+		/* Look straight into the abyss */
+		pt = getPoint2d_cp(pa, i);
+
+		/* Preserve first point always */
+		if (last)
 		{
-			if (tolerance > 0.0)
+			/* Don't drop points if we are running short of points */
+	        if (n_points - i > min_points - n_points_out)
 			{
-				/* within the removal tolerance? */
-				double dsq = distance2d_sqr_pt_pt(prev_point, this_point);
-				if (dsq <= tolsq)
-					/* then skip it */
-					continue;
+				if (tolerance > 0.0)
+				{
+					/* Only drop points that are within our tolerance */
+					dsq = distance2d_sqr_pt_pt(last, pt);
+					/* Allow any point but the last one to be dropped */
+					if (!last_point && dsq <= tolsq)
+					{
+						continue;
+					}
+				}
+				else
+				{
+					/* At tolerance zero, only skip exact dupes */
+					if (memcmp((char*)pt, (char*)last, pt_size) == 0)
+						continue;
+				}
 			}
-			else
-			{
-				/* exact duplicate? */
-				p1 = getPoint_internal(in, ipn-1);
-				p2 = getPoint_internal(in, ipn);
-				if (memcmp(p1, p2, ptsize) == 0)
-					/* then skip it */
-					continue;
-			}
 		}
-		/*
-		* The point is different (see above) from
-		* the previous point, so add it to output
-		*/
-		p1 = getPoint_internal(out, opn++);
-		p2 = getPoint_internal(in, ipn);
-		memcpy(p1, p2, ptsize);
 
-		prev_point = this_point;
-	}
+		/* Got to last point, and it's not very different from */
+		/* the point that preceded it. We want to keep the last */
+		/* point, not the second-to-last one, so we pull our write */
+		/* index back one value */
+		if (last_point && n_points_out > 1 && tolerance > 0.0 && dsq <= tolsq)
+		{
+			n_points_out--;
+		}
 
-	/* Keep the last point */
-	p1 = getPoint_internal(out, opn-1); /* Last output point */
-	p2 = getPoint_internal(in, ipn-1); /* Last input point */
-	if ( memcmp(p1, p2, ptsize) != 0 )
-		memcpy(p1, p2, ptsize);
-
-	LWDEBUGF(3, " in:%d out:%d", out->npoints, opn);
-	out->npoints = opn;
-
-	return out;
+		/* Compact all remaining values to front of array */
+		ptarray_copy_point(pa, i, n_points_out++);
+		last = pt;
+	}
+	/* Adjust array length */
+	pa->npoints = n_points_out;
+	return;
 }
 
-POINTARRAY *
-ptarray_remove_repeated_points(const POINTARRAY *in, double tolerance)
-{
-	return ptarray_remove_repeated_points_minpoints(in, tolerance, 2);
-}
 
 /************************************************************************/
 



More information about the postgis-tickets mailing list