[postgis-tickets] [SCM] PostGIS branch master updated. 3.1.0rc1-202-g423b519

git at osgeo.org git at osgeo.org
Sun Jun 6 03:32:47 PDT 2021


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "PostGIS".

The branch, master has been updated
       via  423b519e1851eb630c57ffa242bf370edfb365ad (commit)
       via  35558344063371f4ccc7dbc591e18ff78407b209 (commit)
       via  7f41d76cb44397fe4b0a7bde4016ee14fff809ed (commit)
       via  76f9c693b37139f3dd04f6ab5ecbee6ec9e16c70 (commit)
      from  429bf56aa47e55a13555a15d20ef08a1c14d2aba (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 423b519e1851eb630c57ffa242bf370edfb365ad
Author: Darafei Praliaskouski <me at komzpa.net>
Date:   Sun Jun 6 13:29:47 2021 +0300

    Polish over lwgeom_remove_repeated_points_in_place and NEWS entry

diff --git a/NEWS b/NEWS
index d45d57f..1de0ef3 100644
--- a/NEWS
+++ b/NEWS
@@ -15,6 +15,8 @@ PostGIS 3.2.0
   - #4881, #4884, Store sign of edge_id for lineal TopoGeometry in relation table
            to retain direction (Sandro Santilli)
   - #4628, Add an option to disable ANALYZE when loading shapefiles (Stefan Corneliu Petrea)
+  - #4924, Faster ST_RemoveRepeatedPoints on large multipoints, O(NlogN) instead of O(N^2)
+           (Aliaksandr Kalenik, Darafei Praliaskouski)
 
  * New features*
   - #4841, FindTopology to quickly get a topology record (Sandro Santilli)
diff --git a/liblwgeom/cunit/cu_algorithm.c b/liblwgeom/cunit/cu_algorithm.c
index 6ce5fbc..9fa7549 100644
--- a/liblwgeom/cunit/cu_algorithm.c
+++ b/liblwgeom/cunit/cu_algorithm.c
@@ -1170,7 +1170,7 @@ static void test_lwgeom_remove_repeated_points(void)
 	modified = lwgeom_remove_repeated_points_in_place(g, 1);
 	ASSERT_INT_EQUAL(modified, LW_TRUE);
 	ewkt = lwgeom_to_ewkt(g);
-	ASSERT_STRING_EQUAL(ewkt, "MULTIPOINT(0 0,10 0,5 5,0 10,10 10)");
+	ASSERT_STRING_EQUAL(ewkt, "MULTIPOINT(0 0,0 10,5 5,10 0,10 10)");
 	lwgeom_free(g);
 	lwfree(ewkt);
 
@@ -1187,7 +1187,7 @@ static void test_lwgeom_remove_repeated_points(void)
 	ASSERT_INT_EQUAL(modified, LW_TRUE);
 	ewkt = lwgeom_to_ewkt(g);
 	ASSERT_STRING_EQUAL(
-		ewkt, "MULTIPOINT(0 0,10 0,5 5,8 5,5 8,8 8,0 10,10 10,50 50,60 50,55 55,58 55,55 58,58 58,50 60,60 60)");
+	    ewkt, "MULTIPOINT(0 0,0 10,5 5,5 8,8 5,8 8,10 0,10 10,50 50,50 60,55 55,55 58,58 55,58 58,60 50,60 60)");
 	lwgeom_free(g);
 	lwfree(ewkt);
 
diff --git a/liblwgeom/lwgeom.c b/liblwgeom/lwgeom.c
index f0975dd..ad85fa8 100644
--- a/liblwgeom/lwgeom.c
+++ b/liblwgeom/lwgeom.c
@@ -1579,145 +1579,137 @@ lwgeom_remove_repeated_points_in_place(LWGEOM *geom, double tolerance)
 	int geometry_modified = LW_FALSE;
 	switch (geom->type)
 	{
-		/* No-op! Cannot remote points */
-		case POINTTYPE:
-		case TRIANGLETYPE:
-			return geometry_modified;
-		case LINETYPE:
+	/* No-op! Cannot remove points */
+	case POINTTYPE:
+	case TRIANGLETYPE:
+		return geometry_modified;
+	case LINETYPE: {
+		LWLINE *g = (LWLINE *)(geom);
+		POINTARRAY *pa = g->points;
+		uint32_t npoints = pa->npoints;
+		ptarray_remove_repeated_points_in_place(pa, tolerance, 2);
+		geometry_modified = npoints != pa->npoints;
+		/* Invalid input, discard the collapsed line */
+		if (pa->npoints < 2)
 		{
-			LWLINE *g = (LWLINE*)(geom);
-			POINTARRAY *pa = g->points;
+			pa->npoints = 0;
+			geometry_modified = LW_TRUE;
+		}
+		break;
+	}
+	case POLYGONTYPE: {
+		uint32_t j = 0;
+		LWPOLY *g = (LWPOLY *)(geom);
+		for (uint32_t i = 0; i < g->nrings; i++)
+		{
+			POINTARRAY *pa = g->rings[i];
 			uint32_t npoints = pa->npoints;
-			ptarray_remove_repeated_points_in_place(pa, tolerance, 2);
-			geometry_modified = npoints != pa->npoints;
-			/* Invalid output */
-			if (pa->npoints == 1 && pa->maxpoints > 1)
+			ptarray_remove_repeated_points_in_place(pa, tolerance, 4);
+			geometry_modified |= npoints != pa->npoints;
+			/* Drop collapsed rings */
+			if (pa->npoints < 4)
 			{
-				/* Use first point as last point */
-				pa->npoints = 2;
-				ptarray_copy_point(pa, 0, 1);
+				geometry_modified = LW_TRUE;
+				ptarray_free(pa);
+				continue;
 			}
-			break;
+			g->rings[j++] = pa;
 		}
-		case POLYGONTYPE:
+		/* Update ring count */
+		g->nrings = j;
+		break;
+	}
+	case MULTIPOINTTYPE: {
+		double tolsq = tolerance * tolerance;
+		LWMPOINT *mpt = (LWMPOINT *)geom;
+
+		for (uint8_t dim = 0; dim < 2; dim++)
 		{
-			uint32_t i, j = 0;
-			LWPOLY *g = (LWPOLY*)(geom);
-			for (i = 0; i < g->nrings; i++)
+			/* sort by y, then by x - this way the result is sorted by x */
+			qsort(mpt->geoms, mpt->ngeoms, sizeof(LWPOINT *), dim ? cmp_point_x : cmp_point_y);
+			for (uint32_t i = 0; i < mpt->ngeoms; i++)
 			{
-				POINTARRAY *pa = g->rings[i];
-				int minpoints = 4;
-				uint32_t npoints = 0;
-				/* Skip zero'ed out rings */
-				if(!pa)
+				if (!mpt->geoms[i])
 					continue;
-				npoints = pa->npoints;
-				ptarray_remove_repeated_points_in_place(pa, tolerance, minpoints);
-				geometry_modified |= npoints != pa->npoints;
-				/* Drop collapsed rings */
-				if(pa->npoints < 4)
-				{
-					geometry_modified = LW_TRUE;
-					ptarray_free(pa);
-					continue;
-				}
-				g->rings[j++] = pa;
-			}
-			/* Update ring count */
-			g->nrings = j;
-			break;
-		}
-		case MULTIPOINTTYPE:
-		{
-			double tolsq = tolerance * tolerance;
-			LWMPOINT *mpt = (LWMPOINT *)geom;
 
-			for (uint8_t dim = 0; dim < 2; dim++)
-			{
-				qsort(mpt->geoms, mpt->ngeoms, sizeof(LWPOINT *), dim ? cmp_point_y : cmp_point_x);
-				for (uint32_t i = 0; i < mpt->ngeoms; i++)
+				const POINT2D *pti = getPoint2d_cp(mpt->geoms[i]->point, 0);
+
+				/* check upcoming points if they're within tolerance of current one */
+				for (uint32_t j = i + 1; j < mpt->ngeoms; j++)
 				{
-					if (!mpt->geoms[i])
+					if (!mpt->geoms[j])
 						continue;
 
-					const POINT2D *pti = getPoint2d_cp(mpt->geoms[i]->point, 0);
-					for (uint32_t j = i + 1; j < mpt->ngeoms; j++)
-					{
-						if (!mpt->geoms[j])
-							continue;
+					const POINT2D *ptj = getPoint2d_cp(mpt->geoms[j]->point, 0);
 
-						const POINT2D *ptj = getPoint2d_cp(mpt->geoms[j]->point, 0);
-						if ((dim ? ptj->y - pti->y : ptj->x - pti->x) > tolerance)
-							break;
+					/* check that the point is in the strip of tolerance around the point */
+					if ((dim ? ptj->x - pti->x : ptj->y - pti->y) > tolerance)
+						break;
 
-						if (distance2d_sqr_pt_pt(pti, ptj) <= tolsq)
-						{
-							lwpoint_free(mpt->geoms[j]);
-							mpt->geoms[j] = NULL;
-						}
+					/* remove any upcoming point that is within tolerance circle */
+					if (distance2d_sqr_pt_pt(pti, ptj) <= tolsq)
+					{
+						lwpoint_free(mpt->geoms[j]);
+						mpt->geoms[j] = NULL;
+						geometry_modified = LW_TRUE;
 					}
 				}
-
-				uint32_t i = 0;
-				for (uint32_t j = 0; j < mpt->ngeoms; j++)
-				{
-					if (mpt->geoms[j])
-						mpt->geoms[i++] = mpt->geoms[j];
-				}
-
-				geometry_modified |= mpt->ngeoms != i;
-				mpt->ngeoms = i;
 			}
 
-			break;
+			/* compactify array of points */
+			uint32_t i = 0;
+			for (uint32_t j = 0; j < mpt->ngeoms; j++)
+				if (mpt->geoms[j])
+					mpt->geoms[i++] = mpt->geoms[j];
+			mpt->ngeoms = i;
 		}
 
-		case CIRCSTRINGTYPE:
-			/* Dunno how to handle these, will return untouched */
-			return geometry_modified;
+		break;
+	}
 
-		/* Can process most multi* types as generic collection */
-		case MULTILINETYPE:
-		case MULTIPOLYGONTYPE:
-		case TINTYPE:
-		case COLLECTIONTYPE:
-		/* Curve types we mostly ignore, but allow the linear */
-		/* portions to be processed by recursing into them */
-		case MULTICURVETYPE:
-		case CURVEPOLYTYPE:
-		case MULTISURFACETYPE:
-		case COMPOUNDTYPE:
+	case CIRCSTRINGTYPE:
+		/* Dunno how to handle these, will return untouched */
+		return geometry_modified;
+
+	/* Can process most multi* types as generic collection */
+	case MULTILINETYPE:
+	case MULTIPOLYGONTYPE:
+	case TINTYPE:
+	case COLLECTIONTYPE:
+	/* Curve types we mostly ignore, but allow the linear */
+	/* portions to be processed by recursing into them */
+	case MULTICURVETYPE:
+	case CURVEPOLYTYPE:
+	case MULTISURFACETYPE:
+	case COMPOUNDTYPE: {
+		uint32_t i, j = 0;
+		LWCOLLECTION *col = (LWCOLLECTION *)(geom);
+		for (i = 0; i < col->ngeoms; i++)
 		{
-			uint32_t i, j = 0;
-			LWCOLLECTION *col = (LWCOLLECTION*)(geom);
-			for (i = 0; i < col->ngeoms; i++)
+			LWGEOM *g = col->geoms[i];
+			if (!g)
+				continue;
+			geometry_modified |= lwgeom_remove_repeated_points_in_place(g, tolerance);
+			/* Drop zero'ed out geometries */
+			if (lwgeom_is_empty(g))
 			{
-				LWGEOM *g = col->geoms[i];
-				if (!g) continue;
-				geometry_modified |= lwgeom_remove_repeated_points_in_place(g, tolerance);
-				/* Drop zero'ed out geometries */
-				if(lwgeom_is_empty(g))
-				{
-					lwgeom_free(g);
-					continue;
-				}
-				col->geoms[j++] = g;
+				lwgeom_free(g);
+				continue;
 			}
-			/* Update geometry count */
-			col->ngeoms = j;
-			break;
-		}
-		default:
-		{
-			lwerror("%s: unsupported geometry type: %s", __func__, lwtype_name(geom->type));
-			break;
+			col->geoms[j++] = g;
 		}
+		/* Update geometry count */
+		col->ngeoms = j;
+		break;
+	}
+	default: {
+		lwerror("%s: unsupported geometry type: %s", __func__, lwtype_name(geom->type));
+		break;
+	}
 	}
 
 	if (geometry_modified)
-	{
 		lwgeom_drop_bbox(geom);
-	}
 	return geometry_modified;
 }
 
diff --git a/regress/core/remove_repeated_points_expected b/regress/core/remove_repeated_points_expected
index 2e747d0..607dca0 100644
--- a/regress/core/remove_repeated_points_expected
+++ b/regress/core/remove_repeated_points_expected
@@ -4,8 +4,8 @@
 3|MULTILINESTRING((0 0,1 1,2 2,3 3),(5 5,4 4,2 2))
 4|POLYGON((0 0,10 0,10 10,0 10,0 0),(5 5,5 8,8 8,8 5,5 5))
 5|MULTIPOLYGON(((0 0,10 0,10 10,0 10,0 0),(5 5,5 8,8 8,8 5,5 5)),((50 50,50 60,60 60,60 50,50 50),(55 55,55 58,58 58,58 55,55 55)))
-6|MULTIPOINT(0 0,10 0,5 5,8 5,5 8,8 8,0 10,10 10,50 50,60 50,55 55,58 55,55 58,58 58,50 60,60 60)
-7|GEOMETRYCOLLECTION(MULTIPOINT(0 0,10 0,5 5,8 5,5 8,8 8,0 10,10 10,50 50,60 50,55 55,58 55,55 58,58 58,50 60,60 60),MULTIPOLYGON(((0 0,10 0,10 10,0 10,0 0),(5 5,5 8,8 8,8 5,5 5)),((50 50,50 60,60 60,60 50,50 50),(55 55,55 58,58 58,58 55,55 55))))
+6|MULTIPOINT(0 0,0 10,5 5,5 8,8 5,8 8,10 0,10 10,50 50,50 60,55 55,55 58,58 55,58 58,60 50,60 60)
+7|GEOMETRYCOLLECTION(MULTIPOINT(0 0,0 10,5 5,5 8,8 5,8 8,10 0,10 10,50 50,50 60,55 55,55 58,58 55,58 58,60 50,60 60),MULTIPOLYGON(((0 0,10 0,10 10,0 10,0 0),(5 5,5 8,8 8,8 5,5 5)),((50 50,50 60,60 60,60 50,50 50),(55 55,55 58,58 58,58 55,55 55))))
 8|POINT(0 0)
 9|CURVEPOLYGON ZM (CIRCULARSTRING ZM (-2 0 0 0,-1 -1 1 2,0 0 2 4,1 -1 3 6,2 0 4 8,0 2 2 4,-2 0 0 0),(-1 0 1 2,0 0.5 2 4,1 0 3 6,0 1 3 4,-1 0 1 2))
 10|LINESTRING(0 0,0 0)

commit 35558344063371f4ccc7dbc591e18ff78407b209
Author: kalenikaliaksandr <kalenik.aliaksandr at gmail.com>
Date:   Thu Apr 22 20:39:03 2021 +0300

    fix code style

diff --git a/liblwgeom/lwgeom.c b/liblwgeom/lwgeom.c
index b041ab7..f0975dd 100644
--- a/liblwgeom/lwgeom.c
+++ b/liblwgeom/lwgeom.c
@@ -1632,22 +1632,22 @@ lwgeom_remove_repeated_points_in_place(LWGEOM *geom, double tolerance)
 			double tolsq = tolerance * tolerance;
 			LWMPOINT *mpt = (LWMPOINT *)geom;
 
-			for (uint32_t dim = 0; dim < 2; dim++)
+			for (uint8_t dim = 0; dim < 2; dim++)
 			{
-				qsort(mpt->geoms, mpt->ngeoms, sizeof(LWPOINT *), dim == 0 ? cmp_point_x : cmp_point_y);
+				qsort(mpt->geoms, mpt->ngeoms, sizeof(LWPOINT *), dim ? cmp_point_y : cmp_point_x);
 				for (uint32_t i = 0; i < mpt->ngeoms; i++)
 				{
-					if (mpt->geoms[i] == NULL)
+					if (!mpt->geoms[i])
 						continue;
 
 					const POINT2D *pti = getPoint2d_cp(mpt->geoms[i]->point, 0);
 					for (uint32_t j = i + 1; j < mpt->ngeoms; j++)
 					{
-						if (mpt->geoms[j] == NULL)
+						if (!mpt->geoms[j])
 							continue;
 
 						const POINT2D *ptj = getPoint2d_cp(mpt->geoms[j]->point, 0);
-						if ((dim == 0 ? ptj->x - pti->x : ptj->y - pti->y) > tolerance)
+						if ((dim ? ptj->y - pti->y : ptj->x - pti->x) > tolerance)
 							break;
 
 						if (distance2d_sqr_pt_pt(pti, ptj) <= tolsq)
@@ -1661,7 +1661,7 @@ lwgeom_remove_repeated_points_in_place(LWGEOM *geom, double tolerance)
 				uint32_t i = 0;
 				for (uint32_t j = 0; j < mpt->ngeoms; j++)
 				{
-					if (mpt->geoms[j] != NULL)
+					if (mpt->geoms[j])
 						mpt->geoms[i++] = mpt->geoms[j];
 				}
 

commit 7f41d76cb44397fe4b0a7bde4016ee14fff809ed
Author: kalenikaliaksandr <kalenik.aliaksandr at gmail.com>
Date:   Mon Apr 19 23:30:42 2021 +0300

    improve performance of removerepeatedpoints(multipoint) -- double sort

diff --git a/liblwgeom/cunit/cu_algorithm.c b/liblwgeom/cunit/cu_algorithm.c
index 4400ca2..6ce5fbc 100644
--- a/liblwgeom/cunit/cu_algorithm.c
+++ b/liblwgeom/cunit/cu_algorithm.c
@@ -1170,7 +1170,7 @@ static void test_lwgeom_remove_repeated_points(void)
 	modified = lwgeom_remove_repeated_points_in_place(g, 1);
 	ASSERT_INT_EQUAL(modified, LW_TRUE);
 	ewkt = lwgeom_to_ewkt(g);
-	ASSERT_STRING_EQUAL(ewkt, "MULTIPOINT(0 0,0 10,5 5,10 10,10 0)");
+	ASSERT_STRING_EQUAL(ewkt, "MULTIPOINT(0 0,10 0,5 5,0 10,10 10)");
 	lwgeom_free(g);
 	lwfree(ewkt);
 
@@ -1187,7 +1187,7 @@ static void test_lwgeom_remove_repeated_points(void)
 	ASSERT_INT_EQUAL(modified, LW_TRUE);
 	ewkt = lwgeom_to_ewkt(g);
 	ASSERT_STRING_EQUAL(
-	    ewkt, "MULTIPOINT(0 0,0 10,5 5,8 5,8 8,10 10,5 8,50 50,55 55,50 60,55 58,58 58,60 60,58 55,60 50,10 0)");
+		ewkt, "MULTIPOINT(0 0,10 0,5 5,8 5,5 8,8 8,0 10,10 10,50 50,60 50,55 55,58 55,55 58,58 58,50 60,60 60)");
 	lwgeom_free(g);
 	lwfree(ewkt);
 
diff --git a/liblwgeom/lwgeom.c b/liblwgeom/lwgeom.c
index 66dc1be..b041ab7 100644
--- a/liblwgeom/lwgeom.c
+++ b/liblwgeom/lwgeom.c
@@ -1550,19 +1550,27 @@ void lwgeom_set_srid(LWGEOM *geom, int32_t srid)
 /**************************************************************/
 
 static int
-point_cmp(const void *pn1, const void *pn2)
+cmp_point_x(const void *pa, const void *pb)
 {
-	LWPOINT *p1 = *((LWPOINT **)pn1);
-	LWPOINT *p2 = *((LWPOINT **)pn2);
+	LWPOINT *p1 = *((LWPOINT **)pa);
+	LWPOINT *p2 = *((LWPOINT **)pb);
 
-	GBOX b1, b2;
-	lwgeom_calculate_gbox((LWGEOM *)p1, &b1);
-	lwgeom_calculate_gbox((LWGEOM *)p2, &b2);
+	const POINT2D *pt1 = getPoint2d_cp(p1->point, 0);
+	const POINT2D *pt2 = getPoint2d_cp(p2->point, 0);
 
-	uint64_t h1, h2;
-	h1 = gbox_get_sortable_hash(&b1, 0);
-	h2 = gbox_get_sortable_hash(&b2, 0);
-	return h1 < h2 ? -1 : (h1 > h2 ? 1 : 0);
+	return (pt1->x > pt2->x) ? 1 : ((pt1->x < pt2->x) ? -1 : 0);
+}
+
+static int
+cmp_point_y(const void *pa, const void *pb)
+{
+	LWPOINT *p1 = *((LWPOINT **)pa);
+	LWPOINT *p2 = *((LWPOINT **)pb);
+
+	const POINT2D *pt1 = getPoint2d_cp(p1->point, 0);
+	const POINT2D *pt2 = getPoint2d_cp(p2->point, 0);
+
+	return (pt1->y > pt2->y) ? 1 : ((pt1->y < pt2->y) ? -1 : 0);
 }
 
 int
@@ -1623,26 +1631,43 @@ lwgeom_remove_repeated_points_in_place(LWGEOM *geom, double tolerance)
 		{
 			double tolsq = tolerance * tolerance;
 			LWMPOINT *mpt = (LWMPOINT *)geom;
-			qsort(mpt->geoms, mpt->ngeoms, sizeof(LWPOINT *), point_cmp);
 
-			uint32_t i = 0;
-			for (uint32_t j = 1; j < mpt->ngeoms; j++)
+			for (uint32_t dim = 0; dim < 2; dim++)
 			{
-				POINT2D ptj, pti;
-				getPoint2d_p(mpt->geoms[i]->point, 0, &pti);
-				getPoint2d_p(mpt->geoms[j]->point, 0, &ptj);
-				if (distance2d_sqr_pt_pt(&ptj, &pti) > tolsq)
+				qsort(mpt->geoms, mpt->ngeoms, sizeof(LWPOINT *), dim == 0 ? cmp_point_x : cmp_point_y);
+				for (uint32_t i = 0; i < mpt->ngeoms; i++)
 				{
-					mpt->geoms[++i] = mpt->geoms[j];
+					if (mpt->geoms[i] == NULL)
+						continue;
+
+					const POINT2D *pti = getPoint2d_cp(mpt->geoms[i]->point, 0);
+					for (uint32_t j = i + 1; j < mpt->ngeoms; j++)
+					{
+						if (mpt->geoms[j] == NULL)
+							continue;
+
+						const POINT2D *ptj = getPoint2d_cp(mpt->geoms[j]->point, 0);
+						if ((dim == 0 ? ptj->x - pti->x : ptj->y - pti->y) > tolerance)
+							break;
+
+						if (distance2d_sqr_pt_pt(pti, ptj) <= tolsq)
+						{
+							lwpoint_free(mpt->geoms[j]);
+							mpt->geoms[j] = NULL;
+						}
+					}
 				}
-				else
+
+				uint32_t i = 0;
+				for (uint32_t j = 0; j < mpt->ngeoms; j++)
 				{
-					lwpoint_free(mpt->geoms[j]);
+					if (mpt->geoms[j] != NULL)
+						mpt->geoms[i++] = mpt->geoms[j];
 				}
-			}
 
-			geometry_modified = mpt->ngeoms != i + 1;
-			mpt->ngeoms = i + 1;
+				geometry_modified |= mpt->ngeoms != i;
+				mpt->ngeoms = i;
+			}
 
 			break;
 		}
diff --git a/regress/core/mvt_expected b/regress/core/mvt_expected
index bcbcc1c..b53be37 100644
--- a/regress/core/mvt_expected
+++ b/regress/core/mvt_expected
@@ -81,7 +81,7 @@ TG6|GjIKBHRlc3QSHRICAAAYAyIVCVqmPxoTRjETCicPCSgKEh0KFBQPGgJjMSICKAEogCB4Ag==
 TG7|Gj0KBHRlc3QSKBICAAAYAyIgCSi6PyIyHh0eJwkAJw8JFAoSABQUCQ8JEzESKAoKFA8aAmMxIgIo
 ASiAIHgC
 TG8|GiEKBHRlc3QSDBICAAAYASIECTLePxoCYzEiAigBKIAgeAI=
-TG9|GiMKBHRlc3QSDhICAAAYASIGETLeP2VGGgJjMSICKAEogCB4Ag==
+TG9|GiMKBHRlc3QSDhICAAAYASIGETOkQGZFGgJjMSICKAEogCB4Ag==
 TA1|Gi8KBHRlc3QSDhIEAAABARgBIgQJMt4/GgJjMRoCYzIiAigBIgYKBGFiY2QogCB4Ag==
 TA2|GigKBHRlc3QSDBICAAAYASIECTLePxoCYzEiCRmamZmZmZnxPyiAIHgC
 TA3|GhkKBHRlc3QSCBgBIgQJMt4/GgJjMSiAIHgC
diff --git a/regress/core/remove_repeated_points_expected b/regress/core/remove_repeated_points_expected
index faa1a9c..2e747d0 100644
--- a/regress/core/remove_repeated_points_expected
+++ b/regress/core/remove_repeated_points_expected
@@ -4,8 +4,8 @@
 3|MULTILINESTRING((0 0,1 1,2 2,3 3),(5 5,4 4,2 2))
 4|POLYGON((0 0,10 0,10 10,0 10,0 0),(5 5,5 8,8 8,8 5,5 5))
 5|MULTIPOLYGON(((0 0,10 0,10 10,0 10,0 0),(5 5,5 8,8 8,8 5,5 5)),((50 50,50 60,60 60,60 50,50 50),(55 55,55 58,58 58,58 55,55 55)))
-6|MULTIPOINT(0 0,0 10,5 5,8 5,8 8,10 10,5 8,50 50,55 55,50 60,55 58,58 58,60 60,58 55,60 50,10 0)
-7|GEOMETRYCOLLECTION(MULTIPOINT(0 0,0 10,5 5,8 5,8 8,10 10,5 8,50 50,55 55,50 60,55 58,58 58,60 60,58 55,60 50,10 0),MULTIPOLYGON(((0 0,10 0,10 10,0 10,0 0),(5 5,5 8,8 8,8 5,5 5)),((50 50,50 60,60 60,60 50,50 50),(55 55,55 58,58 58,58 55,55 55))))
+6|MULTIPOINT(0 0,10 0,5 5,8 5,5 8,8 8,0 10,10 10,50 50,60 50,55 55,58 55,55 58,58 58,50 60,60 60)
+7|GEOMETRYCOLLECTION(MULTIPOINT(0 0,10 0,5 5,8 5,5 8,8 8,0 10,10 10,50 50,60 50,55 55,58 55,55 58,58 58,50 60,60 60),MULTIPOLYGON(((0 0,10 0,10 10,0 10,0 0),(5 5,5 8,8 8,8 5,5 5)),((50 50,50 60,60 60,60 50,50 50),(55 55,55 58,58 58,58 55,55 55))))
 8|POINT(0 0)
 9|CURVEPOLYGON ZM (CIRCULARSTRING ZM (-2 0 0 0,-1 -1 1 2,0 0 2 4,1 -1 3 6,2 0 4 8,0 2 2 4,-2 0 0 0),(-1 0 1 2,0 0.5 2 4,1 0 3 6,0 1 3 4,-1 0 1 2))
 10|LINESTRING(0 0,0 0)

commit 76f9c693b37139f3dd04f6ab5ecbee6ec9e16c70
Author: kalenikaliaksandr <kalenik.aliaksandr at gmail.com>
Date:   Mon Apr 19 04:55:10 2021 +0300

    improve performance of removerepeatedpoints(multipoint)

diff --git a/liblwgeom/cunit/cu_algorithm.c b/liblwgeom/cunit/cu_algorithm.c
index f58b001..4400ca2 100644
--- a/liblwgeom/cunit/cu_algorithm.c
+++ b/liblwgeom/cunit/cu_algorithm.c
@@ -1170,7 +1170,7 @@ static void test_lwgeom_remove_repeated_points(void)
 	modified = lwgeom_remove_repeated_points_in_place(g, 1);
 	ASSERT_INT_EQUAL(modified, LW_TRUE);
 	ewkt = lwgeom_to_ewkt(g);
-	ASSERT_STRING_EQUAL(ewkt, "MULTIPOINT(0 0,10 0,10 10,0 10,5 5)");
+	ASSERT_STRING_EQUAL(ewkt, "MULTIPOINT(0 0,0 10,5 5,10 10,10 0)");
 	lwgeom_free(g);
 	lwfree(ewkt);
 
@@ -1187,7 +1187,7 @@ static void test_lwgeom_remove_repeated_points(void)
 	ASSERT_INT_EQUAL(modified, LW_TRUE);
 	ewkt = lwgeom_to_ewkt(g);
 	ASSERT_STRING_EQUAL(
-	    ewkt, "MULTIPOINT(0 0,10 0,10 10,0 10,5 5,5 8,8 8,8 5,50 50,50 60,60 60,60 50,55 55,55 58,58 58,58 55)");
+	    ewkt, "MULTIPOINT(0 0,0 10,5 5,8 5,8 8,10 10,5 8,50 50,55 55,50 60,55 58,58 58,60 60,58 55,60 50,10 0)");
 	lwgeom_free(g);
 	lwfree(ewkt);
 
diff --git a/liblwgeom/lwgeom.c b/liblwgeom/lwgeom.c
index 49579b4..66dc1be 100644
--- a/liblwgeom/lwgeom.c
+++ b/liblwgeom/lwgeom.c
@@ -1549,6 +1549,21 @@ void lwgeom_set_srid(LWGEOM *geom, int32_t srid)
 
 /**************************************************************/
 
+static int
+point_cmp(const void *pn1, const void *pn2)
+{
+	LWPOINT *p1 = *((LWPOINT **)pn1);
+	LWPOINT *p2 = *((LWPOINT **)pn2);
+
+	GBOX b1, b2;
+	lwgeom_calculate_gbox((LWGEOM *)p1, &b1);
+	lwgeom_calculate_gbox((LWGEOM *)p2, &b2);
+
+	uint64_t h1, h2;
+	h1 = gbox_get_sortable_hash(&b1, 0);
+	h2 = gbox_get_sortable_hash(&b2, 0);
+	return h1 < h2 ? -1 : (h1 > h2 ? 1 : 0);
+}
 
 int
 lwgeom_remove_repeated_points_in_place(LWGEOM *geom, double tolerance)
@@ -1606,55 +1621,29 @@ lwgeom_remove_repeated_points_in_place(LWGEOM *geom, double tolerance)
 		}
 		case MULTIPOINTTYPE:
 		{
-			double tolsq = tolerance*tolerance;
-			uint32_t i, j, n = 0;
-			LWMPOINT *mpt = (LWMPOINT *)(geom);
-			LWPOINT **out;
-			LWPOINT *out_stack[out_stack_size];
-			int use_heap = (mpt->ngeoms > out_stack_size);
-
-			/* No-op on empty */
-			if (mpt->ngeoms < 2)
-				return geometry_modified;
-
-			/* We cannot write directly back to the multipoint */
-			/* geoms array because we're reading out of it still */
-			/* so we use a side array */
-			if (use_heap)
-				out = lwalloc(sizeof(LWMPOINT *) * mpt->ngeoms);
-			else
-				out = out_stack;
+			double tolsq = tolerance * tolerance;
+			LWMPOINT *mpt = (LWMPOINT *)geom;
+			qsort(mpt->geoms, mpt->ngeoms, sizeof(LWPOINT *), point_cmp);
 
-			/* Inefficient O(n^2) implementation */
-			for (i = 0; i < mpt->ngeoms; i++)
+			uint32_t i = 0;
+			for (uint32_t j = 1; j < mpt->ngeoms; j++)
 			{
-				int seen = 0;
-				LWPOINT *p1 = mpt->geoms[i];
-				const POINT2D *pt1 = getPoint2d_cp(p1->point, 0);
-				for (j = 0; j < n; j++)
+				POINT2D ptj, pti;
+				getPoint2d_p(mpt->geoms[i]->point, 0, &pti);
+				getPoint2d_p(mpt->geoms[j]->point, 0, &ptj);
+				if (distance2d_sqr_pt_pt(&ptj, &pti) > tolsq)
 				{
-					LWPOINT *p2 = out[j];
-					const POINT2D *pt2 = getPoint2d_cp(p2->point, 0);
-					if (distance2d_sqr_pt_pt(pt1, pt2) <= tolsq)
-					{
-						seen = 1;
-						break;
-					}
+					mpt->geoms[++i] = mpt->geoms[j];
 				}
-				if (seen)
+				else
 				{
-					lwpoint_free(p1);
-					continue;
+					lwpoint_free(mpt->geoms[j]);
 				}
-				out[n++] = p1;
 			}
 
-			/* Copy remaining points back into the input */
-			/* array */
-			memcpy(mpt->geoms, out, sizeof(LWPOINT *) * n);
-			geometry_modified = mpt->ngeoms != n;
-			mpt->ngeoms = n;
-			if (use_heap) lwfree(out);
+			geometry_modified = mpt->ngeoms != i + 1;
+			mpt->ngeoms = i + 1;
+
 			break;
 		}
 
diff --git a/regress/core/remove_repeated_points_expected b/regress/core/remove_repeated_points_expected
index b793a6d..faa1a9c 100644
--- a/regress/core/remove_repeated_points_expected
+++ b/regress/core/remove_repeated_points_expected
@@ -4,8 +4,8 @@
 3|MULTILINESTRING((0 0,1 1,2 2,3 3),(5 5,4 4,2 2))
 4|POLYGON((0 0,10 0,10 10,0 10,0 0),(5 5,5 8,8 8,8 5,5 5))
 5|MULTIPOLYGON(((0 0,10 0,10 10,0 10,0 0),(5 5,5 8,8 8,8 5,5 5)),((50 50,50 60,60 60,60 50,50 50),(55 55,55 58,58 58,58 55,55 55)))
-6|MULTIPOINT(0 0,10 0,10 10,0 10,5 5,5 8,8 8,8 5,50 50,50 60,60 60,60 50,55 55,55 58,58 58,58 55)
-7|GEOMETRYCOLLECTION(MULTIPOINT(0 0,10 0,10 10,0 10,5 5,5 8,8 8,8 5,50 50,50 60,60 60,60 50,55 55,55 58,58 58,58 55),MULTIPOLYGON(((0 0,10 0,10 10,0 10,0 0),(5 5,5 8,8 8,8 5,5 5)),((50 50,50 60,60 60,60 50,50 50),(55 55,55 58,58 58,58 55,55 55))))
+6|MULTIPOINT(0 0,0 10,5 5,8 5,8 8,10 10,5 8,50 50,55 55,50 60,55 58,58 58,60 60,58 55,60 50,10 0)
+7|GEOMETRYCOLLECTION(MULTIPOINT(0 0,0 10,5 5,8 5,8 8,10 10,5 8,50 50,55 55,50 60,55 58,58 58,60 60,58 55,60 50,10 0),MULTIPOLYGON(((0 0,10 0,10 10,0 10,0 0),(5 5,5 8,8 8,8 5,5 5)),((50 50,50 60,60 60,60 50,50 50),(55 55,55 58,58 58,58 55,55 55))))
 8|POINT(0 0)
 9|CURVEPOLYGON ZM (CIRCULARSTRING ZM (-2 0 0 0,-1 -1 1 2,0 0 2 4,1 -1 3 6,2 0 4 8,0 2 2 4,-2 0 0 0),(-1 0 1 2,0 0.5 2 4,1 0 3 6,0 1 3 4,-1 0 1 2))
 10|LINESTRING(0 0,0 0)

-----------------------------------------------------------------------

Summary of changes:
 NEWS                                         |   2 +
 liblwgeom/cunit/cu_algorithm.c               |   4 +-
 liblwgeom/lwgeom.c                           | 250 ++++++++++++++-------------
 regress/core/mvt_expected                    |   2 +-
 regress/core/remove_repeated_points_expected |   4 +-
 5 files changed, 135 insertions(+), 127 deletions(-)


hooks/post-receive
-- 
PostGIS


More information about the postgis-tickets mailing list