[postgis-tickets] [SCM] PostGIS branch master updated. 3.1.0alpha1-146-g1fab20d

git at osgeo.org git at osgeo.org
Sun Jul 5 14:54:49 PDT 2020


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "PostGIS".

The branch, master has been updated
       via  1fab20d016044103a66fa9b29ec5bf2f358fc580 (commit)
      from  6700df0e2db7965458b36f973d0216ce1cada762 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 1fab20d016044103a66fa9b29ec5bf2f358fc580
Author: Darafei Praliaskouski <me at komzpa.net>
Date:   Sat Jun 27 23:41:24 2020 +0300

    Support for 3D in K-Means.
    
    Closes #4710.

diff --git a/NEWS b/NEWS
index 00f7550..6c3d614 100644
--- a/NEWS
+++ b/NEWS
@@ -9,7 +9,7 @@ Only tickets not included in 3.1.0alpha1
 * New features *
   - #4656, Cast a geojson_text::geometry for implicit GeoJSON ingestion (Raúl Marín)
   - #4687, Expose GEOS MaximumInscribedCircle (Paul Ramsey)
-  -
+  - #4710, ST_ClusterKMeans now works with 3D geometries (Darafei Praliaskouski)
 
 * Enhancements *
   - #4675, topology.GetRingEdges now implemented in C (Sandro Santilli)
diff --git a/doc/reference_cluster.xml b/doc/reference_cluster.xml
index a33fdd5..1ae50b3 100644
--- a/doc/reference_cluster.xml
+++ b/doc/reference_cluster.xml
@@ -233,8 +233,9 @@ GEOMETRYCOLLECTION(LINESTRING(6 6,7 7))
       <para>Returns 2D distance based
         <ulink url="https://en.wikipedia.org/wiki/K-means_clustering">K-means</ulink>
         cluster number for each input geometry. The distance used for clustering is the
-        distance between the centroids of the geometries.
+        distance between the centroids for 2D geometries, and distance between bounding box centers for 3D geometries.
       </para>
+      <para>Enhanced: 3.1.0 Support for 3D geometries</para>
       <para>Availability: 2.3.0</para>
     </refsection>
 
@@ -245,7 +246,7 @@ GEOMETRYCOLLECTION(LINESTRING(6 6,7 7))
 SELECT lpad((row_number() over())::text,3,'0') As parcel_id, geom,
 ('{residential, commercial}'::text[])[1 + mod(row_number()OVER(),2)] As type
 FROM
-    ST_Subdivide(ST_Buffer('LINESTRING(40 100, 98 100, 100 150, 60 90)'::geometry,
+    ST_Subdivide(ST_Buffer('SRID=3857;LINESTRING(40 100, 98 100, 100 150, 60 90)'::geometry,
     40, 'endcap=square'),12) As geom;
 </programlisting>
 
@@ -306,6 +307,27 @@ FROM parcels;
    2 | 006       | residential
 (7 rows)</programlisting>
 
+
+		    <programlisting> -- Clustering points around antimeridian can be done in 3D XYZ CRS, EPSG:4978:
+
+SELECT ST_ClusterKMeans(ST_Transform(ST_Force3D(geom), 4978), 3) over () AS cid, parcel_id, type
+FROM parcels;
+-- result
+┌─────┬───────────┬─────────────┐
+│ cid │ parcel_id │    type     │
+├─────┼───────────┼─────────────┤
+│   1 │ 001       │ commercial  │
+│   2 │ 002       │ residential │
+│   0 │ 003       │ commercial  │
+│   1 │ 004       │ residential │
+│   0 │ 005       │ commercial  │
+│   2 │ 006       │ residential │
+│   0 │ 007       │ commercial  │
+└─────┴───────────┴─────────────┘
+(7 rows)
+</programlisting>
+
+
     </refsection>
 
     <refsection>
diff --git a/liblwgeom/cunit/cu_algorithm.c b/liblwgeom/cunit/cu_algorithm.c
index cdd1f37..1578a2e 100644
--- a/liblwgeom/cunit/cu_algorithm.c
+++ b/liblwgeom/cunit/cu_algorithm.c
@@ -1726,7 +1726,7 @@ static void test_kmeans(void)
 		}
 	}
 
-	r = lwgeom_cluster_2d_kmeans((const LWGEOM **)geoms, N, num_clusters);
+	r = lwgeom_cluster_kmeans((const LWGEOM **)geoms, N, num_clusters);
 
 	// for (i = 0; i < k; i++)
 	// {
diff --git a/liblwgeom/liblwgeom.h.in b/liblwgeom/liblwgeom.h.in
index 7c573b5..59a94ce 100644
--- a/liblwgeom/liblwgeom.h.in
+++ b/liblwgeom/liblwgeom.h.in
@@ -2512,7 +2512,7 @@ LWGEOM* lwgeom_voronoi_diagram(const LWGEOM* g, const GBOX* env, double toleranc
 * @param ngeoms the number of elements in the array
 * @param k the number of clusters to calculate
 */
-int * lwgeom_cluster_2d_kmeans(const LWGEOM **geoms, uint32_t ngeoms, uint32_t k);
+int * lwgeom_cluster_kmeans(const LWGEOM **geoms, uint32_t ngeoms, uint32_t k);
 
 #include "lwinline.h"
 
diff --git a/liblwgeom/lwinline.h b/liblwgeom/lwinline.h
index 880b51d..c3d4366 100644
--- a/liblwgeom/lwinline.h
+++ b/liblwgeom/lwinline.h
@@ -40,6 +40,16 @@ distance2d_sqr_pt_pt(const POINT2D *p1, const POINT2D *p2)
 	return hside * hside + vside * vside;
 }
 
+inline static double
+distance3d_sqr_pt_pt(const POINT3D *p1, const POINT3D *p2)
+{
+	double hside = p2->x - p1->x;
+	double vside = p2->y - p1->y;
+	double zside = p2->z - p1->z;
+
+	return hside * hside + vside * vside + zside * zside;
+}
+
 /*
  * Size of point represeneted in the POINTARRAY
  * 16 for 2d, 24 for 3d, 32 for 4d
@@ -94,7 +104,7 @@ getPoint2d_cp(const POINTARRAY *pa, uint32_t n)
 }
 
 /**
- * Returns a POINT2D pointer into the POINTARRAY serialized_ptlist,
+ * Returns a POINT3D pointer into the POINTARRAY serialized_ptlist,
  * suitable for reading from. This is very high performance
  * and declared const because you aren't allowed to muck with the
  * values, only read them.
@@ -106,7 +116,7 @@ getPoint3d_cp(const POINTARRAY *pa, uint32_t n)
 }
 
 /**
- * Returns a POINT2D pointer into the POINTARRAY serialized_ptlist,
+ * Returns a POINT4D pointer into the POINTARRAY serialized_ptlist,
  * suitable for reading from. This is very high performance
  * and declared const because you aren't allowed to muck with the
  * values, only read them.
diff --git a/liblwgeom/lwkmeans.c b/liblwgeom/lwkmeans.c
index 8a4931a..bdb244a 100644
--- a/liblwgeom/lwkmeans.c
+++ b/liblwgeom/lwkmeans.c
@@ -1,6 +1,6 @@
 /*-------------------------------------------------------------------------
  *
- * Copyright (c) 2018, Darafei Praliaskouski <me at komzpa.net>
+ * Copyright (c) 2018-2020, Darafei Praliaskouski <me at komzpa.net>
  * Copyright (c) 2016, Paul Ramsey <pramsey at cleverelephant.ca>
  *
  *------------------------------------------------------------------------*/
@@ -19,33 +19,23 @@
  */
 #define KMEANS_MAX_ITERATIONS 1000
 
-static void
-update_r(POINT2D** objs, int* clusters, uint32_t n, POINT2D** centers, uint32_t k)
+static uint8_t
+update_r(POINT3D *objs, int *clusters, uint32_t n, POINT3D *centers, uint32_t k)
 {
-	POINT2D* obj;
-	unsigned int i;
-	double distance, curr_distance;
-	uint32_t cluster, curr_cluster;
+	uint8_t converged = LW_TRUE;
 
-	for (i = 0; i < n; i++)
+	for (uint32_t i = 0; i < n; i++)
 	{
-		obj = objs[i];
-
-		/* Don't try to cluster NULL objects, just add them to the "unclusterable cluster" */
-		if (!obj)
-		{
-			clusters[i] = KMEANS_NULL_CLUSTER;
-			continue;
-		}
+		POINT3D obj = objs[i];
 
 		/* Initialize with distance to first cluster */
-		curr_distance = distance2d_sqr_pt_pt(obj, centers[0]);
-		curr_cluster = 0;
+		double curr_distance = distance3d_sqr_pt_pt(&obj, &centers[0]);
+		int curr_cluster = 0;
 
 		/* Check all other cluster centers and find the nearest */
-		for (cluster = 1; cluster < k; cluster++)
+		for (uint32_t cluster = 1; cluster < k; cluster++)
 		{
-			distance = distance2d_sqr_pt_pt(obj, centers[cluster]);
+			double distance = distance3d_sqr_pt_pt(&obj, &centers[cluster]);
 			if (distance < curr_distance)
 			{
 				curr_distance = distance;
@@ -54,81 +44,63 @@ update_r(POINT2D** objs, int* clusters, uint32_t n, POINT2D** centers, uint32_t
 		}
 
 		/* Store the nearest cluster this object is in */
-		clusters[i] = (int) curr_cluster;
+		if (clusters[i] != (int)curr_cluster)
+		{
+			converged = LW_FALSE;
+			clusters[i] = (int)curr_cluster;
+		}
 	}
+	return converged;
 }
 
 static void
-update_means(POINT2D** objs, int* clusters, uint32_t n, POINT2D** centers, uint32_t* weights, uint32_t k)
+update_means(POINT3D *objs, int *clusters, uint32_t n, POINT3D *centers, uint32_t *weights, uint32_t k)
 {
-	uint32_t i;
-	int cluster;
-
 	memset(weights, 0, sizeof(uint32_t) * k);
-	for (i = 0; i < k; i++)
+	memset(centers, 0, sizeof(POINT3D) * k);
+	for (uint32_t i = 0; i < n; i++)
 	{
-		centers[i]->x = 0.0;
-		centers[i]->y = 0.0;
-	}
-	for (i = 0; i < n; i++)
-	{
-		cluster = clusters[i];
-		if (cluster != KMEANS_NULL_CLUSTER)
-		{
-			centers[cluster]->x += objs[i]->x;
-			centers[cluster]->y += objs[i]->y;
-			weights[cluster] += 1;
-		}
+		int cluster = clusters[i];
+		centers[cluster].x += objs[i].x;
+		centers[cluster].y += objs[i].y;
+		centers[cluster].z += objs[i].z;
+		weights[cluster] += 1;
 	}
-	for (i = 0; i < k; i++)
+	for (uint32_t i = 0; i < k; i++)
 	{
 		if (weights[i])
 		{
-			centers[i]->x /= weights[i];
-			centers[i]->y /= weights[i];
+			centers[i].x /= weights[i];
+			centers[i].y /= weights[i];
+			centers[i].z /= weights[i];
 		}
 	}
 }
 
-static int
-kmeans(POINT2D** objs, int* clusters, uint32_t n, POINT2D** centers, uint32_t k)
+static uint8_t
+kmeans(POINT3D *objs, int *clusters, uint32_t n, POINT3D *centers, uint32_t k)
 {
-	uint32_t i = 0;
-	int* clusters_last;
-	int converged = LW_FALSE;
-	size_t clusters_sz = sizeof(int) * n;
-	uint32_t* weights;
+	uint8_t converged = LW_FALSE;
+	uint32_t *weights = lwalloc(sizeof(uint32_t) * k);
 
-	weights = lwalloc(sizeof(int) * k);
-
-	/* previous cluster state array */
-	clusters_last = lwalloc(clusters_sz);
-
-	for (i = 0; i < KMEANS_MAX_ITERATIONS && !converged; i++)
+	for (uint32_t i = 0; i < KMEANS_MAX_ITERATIONS; i++)
 	{
 		LW_ON_INTERRUPT(break);
-
-		/* store the previous state of the clustering */
-		memcpy(clusters_last, clusters, clusters_sz);
-
-		update_r(objs, clusters, n, centers, k);
+		converged = update_r(objs, clusters, n, centers, k);
+		if (converged)
+			break;
 		update_means(objs, clusters, n, centers, weights, k);
-
-		/* if all the cluster numbers are unchanged, we are at a stable solution */
-		converged = memcmp(clusters_last, clusters, clusters_sz) == 0;
 	}
-
-	lwfree(clusters_last);
 	lwfree(weights);
 	if (!converged)
-		lwerror("%s did not converge after %d iterations", __func__, i);
+		lwerror("%s did not converge after %d iterations", __func__, KMEANS_MAX_ITERATIONS);
 	return converged;
 }
 
 static void
-kmeans_init(POINT2D **objs, uint32_t n, POINT2D **centers, POINT2D *centers_raw, uint32_t k)
+kmeans_init(POINT3D *objs, uint32_t n, POINT3D *centers, uint32_t k)
 {
-	double* distances;
+	double *distances;
 	uint32_t p1 = 0, p2 = 0;
 	uint32_t i, j;
 	uint32_t duplicate_count = 1; /* a point is a duplicate of itself */
@@ -141,20 +113,9 @@ kmeans_init(POINT2D **objs, uint32_t n, POINT2D **centers, POINT2D *centers_raw,
 	/* k >= 2: find two distant points greedily */
 	for (i = 1; i < n; i++)
 	{
-		/* skip null */
-		if (!objs[i]) continue;
-
-		/* reinit if first element happened to be null */
-		if (!objs[p1] && !objs[p2])
-		{
-			p1 = i;
-			p2 = i;
-			continue;
-		}
-
 		/* if we found a larger distance, replace our choice */
-		dst_p1 = distance2d_sqr_pt_pt(objs[i], objs[p1]);
-		dst_p2 = distance2d_sqr_pt_pt(objs[i], objs[p2]);
+		dst_p1 = distance3d_sqr_pt_pt(&objs[i], &objs[p1]);
+		dst_p2 = distance3d_sqr_pt_pt(&objs[i], &objs[p2]);
 		if ((dst_p1 > max_dst) || (dst_p2 > max_dst))
 		{
 			if (dst_p1 > dst_p2)
@@ -168,7 +129,8 @@ kmeans_init(POINT2D **objs, uint32_t n, POINT2D **centers, POINT2D *centers_raw,
 				p1 = i;
 			}
 		}
-		if ((dst_p1 == 0) || (dst_p2 == 0)) duplicate_count++;
+		if ((dst_p1 == 0) || (dst_p2 == 0))
+			duplicate_count++;
 	}
 	if (duplicate_count > 1)
 		lwnotice(
@@ -177,13 +139,11 @@ kmeans_init(POINT2D **objs, uint32_t n, POINT2D **centers, POINT2D *centers_raw,
 		    duplicate_count);
 
 	/* by now two points should be found and non-same */
-	assert(p1 != p2 && objs[p1] && objs[p2] && max_dst >= 0);
+	assert(p1 != p2 && max_dst >= 0);
 
 	/* accept these two points */
-	centers_raw[0] = *((POINT2D *)objs[p1]);
-	centers[0] = &(centers_raw[0]);
-	centers_raw[1] = *((POINT2D *)objs[p2]);
-	centers[1] = &(centers_raw[1]);
+	centers[0] = objs[p1];
+	centers[1] = objs[p2];
 
 	if (k > 2)
 	{
@@ -192,12 +152,7 @@ kmeans_init(POINT2D **objs, uint32_t n, POINT2D **centers, POINT2D *centers_raw,
 
 		/* initialize array with distance to first object */
 		for (j = 0; j < n; j++)
-		{
-			if (objs[j])
-				distances[j] = distance2d_sqr_pt_pt(objs[j], centers[0]);
-			else
-				distances[j] = -1;
-		}
+			distances[j] = distance3d_sqr_pt_pt(&objs[j], &centers[0]);
 		distances[p1] = -1;
 		distances[p2] = -1;
 
@@ -211,10 +166,11 @@ kmeans_init(POINT2D **objs, uint32_t n, POINT2D **centers, POINT2D *centers_raw,
 			for (j = 0; j < n; j++)
 			{
 				/* empty objs and accepted clusters are already marked with distance = -1 */
-				if (distances[j] < 0) continue;
+				if (distances[j] < 0)
+					continue;
 
 				/* update minimal distance with previosuly accepted cluster */
-				current_distance = distance2d_sqr_pt_pt(objs[j], centers[i - 1]);
+				current_distance = distance3d_sqr_pt_pt(&objs[j], &centers[i - 1]);
 				if (current_distance < distances[j])
 					distances[j] = current_distance;
 
@@ -233,33 +189,17 @@ kmeans_init(POINT2D **objs, uint32_t n, POINT2D **centers, POINT2D *centers_raw,
 			distances[candidate_center] = -1;
 			/* Copy the point coordinates into the initial centers array
 			 * Centers array is an array of pointers to points, not an array of points */
-			centers_raw[i] = *((POINT2D *)objs[candidate_center]);
-			centers[i] = &(centers_raw[i]);
+			centers[i] = objs[candidate_center];
 		}
 		lwfree(distances);
 	}
 }
 
-int*
-lwgeom_cluster_2d_kmeans(const LWGEOM** geoms, uint32_t n, uint32_t k)
+int *
+lwgeom_cluster_kmeans(const LWGEOM **geoms, uint32_t n, uint32_t k)
 {
-	uint32_t i;
-	uint32_t num_centroids = 0;
 	uint32_t num_non_empty = 0;
-	LWGEOM** centroids;
-	POINT2D* centers_raw;
-	const POINT2D* cp;
-	int result = LW_FALSE;
-
-	/* An array of objects to be analyzed.
-	 * All NULL values will be returned in the KMEANS_NULL_CLUSTER. */
-	POINT2D** objs;
-
-	/* An array of centers for the algorithm. */
-	POINT2D** centers;
-
-	/* Array to fill in with cluster numbers. */
-	int* clusters;
+	uint8_t result = LW_FALSE;
 
 	assert(k > 0);
 	assert(n > 0);
@@ -267,79 +207,116 @@ lwgeom_cluster_2d_kmeans(const LWGEOM** geoms, uint32_t n, uint32_t k)
 
 	if (n < k)
 	{
-		lwerror("%s: number of geometries is less than the number of clusters requested, not all clusters will get data", __func__);
+		lwerror(
+		    "%s: number of geometries is less than the number of clusters requested, not all clusters will get data",
+		    __func__);
 		k = n;
 	}
 
-	/* We'll hold the temporary centroid objects here */
-	centroids = lwalloc(sizeof(LWGEOM*) * n);
-	memset(centroids, 0, sizeof(LWGEOM*) * n);
-
-	/* The vector of cluster means. We have to allocate a chunk of memory for these because we'll be mutating them
-	 * in the kmeans algorithm */
-	centers_raw = lwalloc(sizeof(POINT2D) * k);
-	memset(centers_raw, 0, sizeof(POINT2D) * k);
+	/* An array of objects to be analyzed. */
+	POINT3D *objs = lwalloc(sizeof(POINT3D) * n);
 
-	/* K-means configuration setup */
-	objs = lwalloc(sizeof(POINT2D*) * n);
-	clusters = lwalloc(sizeof(int) * n);
-	centers = lwalloc(sizeof(POINT2D*) * k);
+	/* Array to mark unclusterable objects. Will be returned as KMEANS_NULL_CLUSTER. */
+	uint8_t *geom_valid = lwalloc(sizeof(uint8_t) * n);
+	memset(geom_valid, 0, sizeof(uint8_t) * n);
 
-	/* Clean the memory */
-	memset(objs, 0, sizeof(POINT2D*) * n);
+	/* Array to fill in with cluster numbers. */
+	int *clusters = lwalloc(sizeof(int) * n);
 	memset(clusters, 0, sizeof(int) * n);
-	memset(centers, 0, sizeof(POINT2D*) * k);
+
+	/* An array of clusters centers for the algorithm. */
+	POINT3D *centers = lwalloc(sizeof(POINT3D) * k);
+	memset(centers, 0, sizeof(POINT3D) * k);
 
 	/* Prepare the list of object pointers for K-means */
-	for (i = 0; i < n; i++)
+	for (uint32_t i = 0; i < n; i++)
 	{
-		const LWGEOM* geom = geoms[i];
-		LWPOINT* lwpoint;
+		const LWGEOM *geom = geoms[i];
+		POINT3D out = {0, 0, 0};
 
-		/* Null/empty geometries get a NULL pointer, set earlier with memset */
-		if ((!geom) || lwgeom_is_empty(geom)) continue;
+		/* Null/empty geometries get geom_valid=LW_FALSE set earlier with memset */
+		if ((!geom) || lwgeom_is_empty(geom))
+			continue;
 
 		/* If the input is a point, use its coordinates */
-		/* If its not a point, convert it to one via centroid */
-		if (lwgeom_get_type(geom) != POINTTYPE)
+		if (lwgeom_get_type(geom) == POINTTYPE)
+		{
+			out.x = lwpoint_get_x(lwgeom_as_lwpoint(geom));
+			out.y = lwpoint_get_y(lwgeom_as_lwpoint(geom));
+			if (lwgeom_has_z(geom))
+				out.z = lwpoint_get_z(lwgeom_as_lwpoint(geom));
+		}
+		else if (!lwgeom_has_z(geom))
 		{
-			LWGEOM* centroid = lwgeom_centroid(geom);
-			if ((!centroid)) continue;
+			/* For 2D, we can take a centroid*/
+			LWGEOM *centroid = lwgeom_centroid(geom);
+			if (!centroid)
+				continue;
 			if (lwgeom_is_empty(centroid))
 			{
 				lwgeom_free(centroid);
 				continue;
 			}
-			centroids[num_centroids++] = centroid;
-			lwpoint = lwgeom_as_lwpoint(centroid);
+			out.x = lwpoint_get_x(lwgeom_as_lwpoint(centroid));
+			out.y = lwpoint_get_y(lwgeom_as_lwpoint(centroid));
+			lwgeom_free(centroid);
 		}
 		else
-			lwpoint = lwgeom_as_lwpoint(geom);
-
-		/* Store a pointer to the POINT2D we are interested in */
-		cp = getPoint2d_cp(lwpoint->point, 0);
-		objs[i] = (POINT2D*)cp;
-		num_non_empty++;
+		{
+			/* For 3D non-point, we can have a box center */
+			const GBOX *box = lwgeom_get_bbox(geom);
+			if (!gbox_is_valid(box))
+				continue;
+			out.x = (box->xmax + box->xmin) / 2;
+			out.y = (box->ymax + box->ymin) / 2;
+			out.z = (box->zmax + box->zmin) / 2;
+		}
+		geom_valid[i] = LW_TRUE;
+		objs[num_non_empty++] = out;
 	}
 
 	if (num_non_empty < k)
 	{
-		lwnotice("%s: number of non-empty geometries is less than the number of clusters requested, not all clusters will get data", __func__);
+		lwnotice(
+		    "%s: number of non-empty geometries (%d) is less than the number of clusters (%d) requested, not all clusters will get data",
+		    __func__,
+		    num_non_empty,
+		    k);
 		k = num_non_empty;
 	}
 
 	if (k > 1)
 	{
-		kmeans_init(objs, n, centers, centers_raw, k);
-		result = kmeans(objs, clusters, n, centers, k);
+		int *clusters_dense = lwalloc(sizeof(int) * num_non_empty);
+		memset(clusters_dense, 0, sizeof(int) * num_non_empty);
+
+		kmeans_init(objs, num_non_empty, centers, k);
+		result = kmeans(objs, clusters_dense, num_non_empty, centers, k);
+
+		if (result)
+		{
+			uint32_t d = 0;
+			for (uint32_t i = 0; i < n; i++)
+				if (geom_valid[i])
+					clusters[i] = clusters_dense[d++];
+				else
+					clusters[i] = KMEANS_NULL_CLUSTER;
+		}
+		lwfree(clusters_dense);
+	}
+	else if (k == 0)
+	{
+		/* k=0: everything is unclusterable */
+		for (uint32_t i = 0; i < n; i++)
+			clusters[i] = KMEANS_NULL_CLUSTER;
+		result = LW_TRUE;
 	}
 	else
 	{
-		/* k=0: everything is unclusterable
-		 * k=1: mark up NULL and non-NULL */
-		for (i = 0; i < n; i++)
+		/* k=1: mark up NULL and non-NULL */
+		for (uint32_t i = 0; i < n; i++)
 		{
-			if (k == 0 || !objs[i])
+			if (!geom_valid[i])
 				clusters[i] = KMEANS_NULL_CLUSTER;
 			else
 				clusters[i] = 0;
@@ -350,11 +327,11 @@ lwgeom_cluster_2d_kmeans(const LWGEOM** geoms, uint32_t n, uint32_t k)
 	/* Before error handling, might as well clean up all the inputs */
 	lwfree(objs);
 	lwfree(centers);
-	lwfree(centers_raw);
-	lwfree(centroids);
+	lwfree(geom_valid);
 
 	/* Good result */
-	if (result) return clusters;
+	if (result)
+		return clusters;
 
 	/* Bad result, not going to need the answer */
 	lwfree(clusters);
diff --git a/postgis/lwgeom_window.c b/postgis/lwgeom_window.c
index fe36875..107590b 100644
--- a/postgis/lwgeom_window.c
+++ b/postgis/lwgeom_window.c
@@ -232,7 +232,7 @@ Datum ST_ClusterKMeans(PG_FUNCTION_ARGS)
 		}
 
 		/* Calculate k-means on the list! */
-		r = lwgeom_cluster_2d_kmeans((const LWGEOM **)geoms, N, k);
+		r = lwgeom_cluster_kmeans((const LWGEOM **)geoms, N, k);
 
 		/* Clean up */
 		for (i = 0; i < N; i++)
diff --git a/regress/core/cluster.sql b/regress/core/cluster.sql
index d8ed0a8..7358222 100644
--- a/regress/core/cluster.sql
+++ b/regress/core/cluster.sql
@@ -44,7 +44,7 @@ SELECT '#3612b', ST_ClusterDBSCAN(ST_Point(1,1), 20.1, 5) OVER();
 
 
 -- ST_ClusterKMeans
-select '#4100a', count(distinct result) from (SELECT ST_ClusterKMeans(foo1.the_geom, 3)OVER()  As result
+select '#4100a', count(distinct result) from (SELECT ST_ClusterKMeans(foo1.the_geom, 3) OVER()  As result
   FROM ((SELECT ST_Collect(geom)  As the_geom
 		FROM (VALUES ( ST_GeomFromEWKT('SRID=4326;MULTIPOLYGON(((-71.0821 42.3036 2,-71.0822 42.3036 2,-71.082 42.3038 2,-71.0819 42.3037 2,-71.0821 42.3036 2)))') ),
 	( ST_GeomFromEWKT('SRID=4326;POLYGON((-71.1261 42.2703 1,-71.1257 42.2703 1,-71.1257 42.2701 1,-71.126 42.2701 1,-71.1261 42.2702 1,-71.1261 42.2703 1))') ) ) As g(geom) CROSS JOIN generate_series(1,3) As i GROUP BY i )) As foo1 LIMIT 10) kmeans;
@@ -60,3 +60,15 @@ select '#4101a', count(distinct result) from (SELECT ST_ClusterKMeans(foo1.the_g
 			UNION ALL SELECT ST_GeomFromText('MULTILINESTRING EMPTY',4326) As the_geom ) ) As foo1 LIMIT 10) kmeans;
 
 select '#4101b', count(distinct cid) from (select ST_ClusterKMeans(geom,2) over () as cid from (values ('POINT EMPTY'::geometry), ('POINT EMPTY')) g(geom)) kmeans;
+
+select '3d_support-1', count(distinct cid) from (select ST_ClusterKMeans(geom,2) over () as cid from (values ('POINT(0 0 1)'::geometry), ('POINT(0 0 5)'), ('POINT(0 0 7)')) g(geom)) kmeans;
+select '3d_support-2', count(distinct cid) from (select ST_ClusterKMeans(geom,2) over () as cid from (values ('LINESTRING(0 0 1, 0 0 -1)'::geometry), ('POINT(0 0 5)'), ('POINT(0 0 7)')) g(geom)) kmeans;
+select '3d_support-3', count(distinct cid) from (select ST_ClusterKMeans(geom,2) over () as cid from (values ('LINESTRING(0 0, 0 0)'::geometry), ('POINT(0 0)'), ('POINT(0 0)')) g(geom)) kmeans;
+
+-- check that null and empty is handled in the clustering
+select '#4071', count(distinct a), count(distinct b), count(distinct c)  from
+(select
+	ST_ClusterKMeans(geom, 1) over () a,
+	ST_ClusterKMeans(geom, 2) over () b,
+	ST_ClusterKMeans(geom, 3) over () c
+from (values (null::geometry), ('POINT(1 1)'), ('POINT EMPTY'), ('POINT(0 0)'), ('POINT(4 4)')) as g (geom)) z;
diff --git a/regress/core/cluster_expected b/regress/core/cluster_expected
index cb7ac91..287125b 100644
--- a/regress/core/cluster_expected
+++ b/regress/core/cluster_expected
@@ -34,7 +34,12 @@ NOTICE:  kmeans_init: there are at least 3 duplicate inputs, number of output cl
 #4100a|1
 NOTICE:  kmeans_init: there are at least 2 duplicate inputs, number of output clusters may be less than you requested
 #4100b|1
-NOTICE:  lwgeom_cluster_2d_kmeans: number of non-empty geometries is less than the number of clusters requested, not all clusters will get data
+NOTICE:  lwgeom_cluster_kmeans: number of non-empty geometries (0) is less than the number of clusters (3) requested, not all clusters will get data
 #4101a|1
-NOTICE:  lwgeom_cluster_2d_kmeans: number of non-empty geometries is less than the number of clusters requested, not all clusters will get data
+NOTICE:  lwgeom_cluster_kmeans: number of non-empty geometries (0) is less than the number of clusters (2) requested, not all clusters will get data
 #4101b|1
+3d_support-1|2
+3d_support-2|2
+NOTICE:  kmeans_init: there are at least 3 duplicate inputs, number of output clusters may be less than you requested
+3d_support-3|1
+#4071|2|3|4
diff --git a/regress/core/lwgeom_regress.sql b/regress/core/lwgeom_regress.sql
index 988b81e..e95818d 100644
--- a/regress/core/lwgeom_regress.sql
+++ b/regress/core/lwgeom_regress.sql
@@ -219,14 +219,6 @@ group by cid
 order by count(*)
 limit 1;
 
--- check that null and empty is handled in the clustering
-select '#4071', count(distinct a), count(distinct b), count(distinct c)  from
-(select
-	ST_ClusterKMeans(geom, 1) over () a,
-	ST_ClusterKMeans(geom, 2) over () b,
-	ST_ClusterKMeans(geom, 3) over () c
-from (values (null::geometry), ('POINT(1 1)'), ('POINT EMPTY'), ('POINT(0 0)'), ('POINT(4 4)')) as g (geom)) z;
-
 -- typmod checks
 select 'typmod_point_4326', geometry_typmod_out(geometry_typmod_in('{Point,4326}'));
 select 'typmod_point_0', geometry_typmod_out(geometry_typmod_in('{Point,0}'));
diff --git a/regress/core/lwgeom_regress_expected b/regress/core/lwgeom_regress_expected
index dd88331..8dfd405 100644
--- a/regress/core/lwgeom_regress_expected
+++ b/regress/core/lwgeom_regress_expected
@@ -43,7 +43,6 @@ ERROR:  Empty geometry
 ST_Angle_2_lines|4.712389
 #3965|25|25
 #3971|t
-#4071|2|3|4
 typmod_point_4326|(Point,4326)
 typmod_point_0|(Point)
 NOTICE:  SRID value -1 converted to the officially unknown SRID value 0

-----------------------------------------------------------------------

Summary of changes:
 NEWS                                 |   2 +-
 doc/reference_cluster.xml            |  26 +++-
 liblwgeom/cunit/cu_algorithm.c       |   2 +-
 liblwgeom/liblwgeom.h.in             |   2 +-
 liblwgeom/lwinline.h                 |  14 +-
 liblwgeom/lwkmeans.c                 | 291 ++++++++++++++++-------------------
 postgis/lwgeom_window.c              |   2 +-
 regress/core/cluster.sql             |  14 +-
 regress/core/cluster_expected        |   9 +-
 regress/core/lwgeom_regress.sql      |   8 -
 regress/core/lwgeom_regress_expected |   1 -
 11 files changed, 194 insertions(+), 177 deletions(-)


hooks/post-receive
-- 
PostGIS


More information about the postgis-tickets mailing list