[postgis-tickets] [SCM] PostGIS branch master updated. 3.1.0alpha3-3-g1ba9345

git at osgeo.org git at osgeo.org
Sun Nov 22 12:51:56 PST 2020


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "PostGIS".

The branch, master has been updated
       via  1ba9345694db6de6e648c10fe7dba2028e67a201 (commit)
      from  7ab01105ef45f2319c361174a27c6e7900069d32 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 1ba9345694db6de6e648c10fe7dba2028e67a201
Author: Darafei Praliaskouski <me at komzpa.net>
Date:   Sun Nov 22 23:50:59 2020 +0300

    Weighted KMeans
    
    Adds weight to KMeans.
    
    Closes #4801

diff --git a/NEWS b/NEWS
index 1bbbc4a..2c0318d 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,11 @@
+PostGIS 3.1.0alpha4
+2020/xx/xx
+Only tickets not included in 3.1.0alpha3
+
+ * New features*
+  - #4801, ST_ClusterKMeans supports weights in POINT[Z]M geometries (Darafei Praliaskouski)
+
+
 PostGIS 3.1.0alpha3
 2020/11/19
 Only tickets not included in 3.1.0alpha2
@@ -118,7 +126,6 @@ PostGIS 3.1.0alpha1
   - #4149, ST_Simplify(geom, 0) is now O(N).
            ST_Affine (ST_Translate, ST_TransScale, ST_Rotate) optimized.
            ST_SnapToGrid optimized. (Darafei Praliaskouski)
-  - #4574, Link Time Optimizations enabled (Darafei Praliaskouski)
   - #4578, Add parallellism and cost properties to brin functions (Raúl Marín)
   - #4473, Silence yacc warnings (Raúl Marín)
   - #4589, Disable C asserts when building without "--enable-debug" (Raúl Marín)
diff --git a/doc/reference_cluster.xml b/doc/reference_cluster.xml
index 1ae50b3..6d0a693 100644
--- a/doc/reference_cluster.xml
+++ b/doc/reference_cluster.xml
@@ -234,8 +234,9 @@ GEOMETRYCOLLECTION(LINESTRING(6 6,7 7))
         <ulink url="https://en.wikipedia.org/wiki/K-means_clustering">K-means</ulink>
         cluster number for each input geometry. The distance used for clustering is the
         distance between the centroids for 2D geometries, and distance between bounding box centers for 3D geometries.
+        For POINT inputs, M coordinate will be treated as weight of input and has to be larger than 0.
       </para>
-      <para>Enhanced: 3.1.0 Support for 3D geometries</para>
+      <para>Enhanced: 3.1.0 Support for 3D geometries and weights</para>
       <para>Availability: 2.3.0</para>
     </refsection>
 
diff --git a/liblwgeom/lwkmeans.c b/liblwgeom/lwkmeans.c
index bdb244a..c5c6a4e 100644
--- a/liblwgeom/lwkmeans.c
+++ b/liblwgeom/lwkmeans.c
@@ -19,23 +19,33 @@
  */
 #define KMEANS_MAX_ITERATIONS 1000
 
+inline static double
+distance3d_sqr_pt4d_pt4d(const POINT4D *p1, const POINT4D *p2)
+{
+	double hside = p2->x - p1->x;
+	double vside = p2->y - p1->y;
+	double zside = p2->z - p1->z;
+
+	return hside * hside + vside * vside + zside * zside;
+}
+
 static uint8_t
-update_r(POINT3D *objs, int *clusters, uint32_t n, POINT3D *centers, uint32_t k)
+update_r(POINT4D *objs, int *clusters, uint32_t n, POINT4D *centers, uint32_t k)
 {
 	uint8_t converged = LW_TRUE;
 
 	for (uint32_t i = 0; i < n; i++)
 	{
-		POINT3D obj = objs[i];
+		POINT4D obj = objs[i];
 
 		/* Initialize with distance to first cluster */
-		double curr_distance = distance3d_sqr_pt_pt(&obj, &centers[0]);
+		double curr_distance = distance3d_sqr_pt4d_pt4d(&obj, &centers[0]);
 		int curr_cluster = 0;
 
 		/* Check all other cluster centers and find the nearest */
-		for (uint32_t cluster = 1; cluster < k; cluster++)
+		for (int cluster = 1; cluster < k; cluster++)
 		{
-			double distance = distance3d_sqr_pt_pt(&obj, &centers[cluster]);
+			double distance = distance3d_sqr_pt4d_pt4d(&obj, &centers[cluster]);
 			if (distance < curr_distance)
 			{
 				curr_distance = distance;
@@ -44,44 +54,42 @@ update_r(POINT3D *objs, int *clusters, uint32_t n, POINT3D *centers, uint32_t k)
 		}
 
 		/* Store the nearest cluster this object is in */
-		if (clusters[i] != (int)curr_cluster)
+		if (clusters[i] != curr_cluster)
 		{
 			converged = LW_FALSE;
-			clusters[i] = (int)curr_cluster;
+			clusters[i] = curr_cluster;
 		}
 	}
 	return converged;
 }
 
 static void
-update_means(POINT3D *objs, int *clusters, uint32_t n, POINT3D *centers, uint32_t *weights, uint32_t k)
+update_means(POINT4D *objs, int *clusters, uint32_t n, POINT4D *centers, uint32_t k)
 {
-	memset(weights, 0, sizeof(uint32_t) * k);
-	memset(centers, 0, sizeof(POINT3D) * k);
+	memset(centers, 0, sizeof(POINT4D) * k);
 	for (uint32_t i = 0; i < n; i++)
 	{
 		int cluster = clusters[i];
-		centers[cluster].x += objs[i].x;
-		centers[cluster].y += objs[i].y;
-		centers[cluster].z += objs[i].z;
-		weights[cluster] += 1;
+		centers[cluster].x += objs[i].x * objs[i].m;
+		centers[cluster].y += objs[i].y * objs[i].m;
+		centers[cluster].z += objs[i].z * objs[i].m;
+		centers[cluster].m += objs[i].m;
 	}
 	for (uint32_t i = 0; i < k; i++)
 	{
-		if (weights[i])
+		if (centers[i].m)
 		{
-			centers[i].x /= weights[i];
-			centers[i].y /= weights[i];
-			centers[i].z /= weights[i];
+			centers[i].x /= centers[i].m;
+			centers[i].y /= centers[i].m;
+			centers[i].z /= centers[i].m;
 		}
 	}
 }
 
 static uint8_t
-kmeans(POINT3D *objs, int *clusters, uint32_t n, POINT3D *centers, uint32_t k)
+kmeans(POINT4D *objs, int *clusters, uint32_t n, POINT4D *centers, uint32_t k)
 {
 	uint8_t converged = LW_FALSE;
-	uint32_t *weights = lwalloc(sizeof(uint32_t) * k);
 
 	for (uint32_t i = 0; i < KMEANS_MAX_ITERATIONS; i++)
 	{
@@ -89,16 +97,15 @@ kmeans(POINT3D *objs, int *clusters, uint32_t n, POINT3D *centers, uint32_t k)
 		converged = update_r(objs, clusters, n, centers, k);
 		if (converged)
 			break;
-		update_means(objs, clusters, n, centers, weights, k);
+		update_means(objs, clusters, n, centers, k);
 	}
-	lwfree(weights);
 	if (!converged)
 		lwerror("%s did not converge after %d iterations", __func__, KMEANS_MAX_ITERATIONS);
 	return converged;
 }
 
 static void
-kmeans_init(POINT3D *objs, uint32_t n, POINT3D *centers, uint32_t k)
+kmeans_init(POINT4D *objs, uint32_t n, POINT4D *centers, uint32_t k)
 {
 	double *distances;
 	uint32_t p1 = 0, p2 = 0;
@@ -114,8 +121,8 @@ kmeans_init(POINT3D *objs, uint32_t n, POINT3D *centers, uint32_t k)
 	for (i = 1; i < n; i++)
 	{
 		/* if we found a larger distance, replace our choice */
-		dst_p1 = distance3d_sqr_pt_pt(&objs[i], &objs[p1]);
-		dst_p2 = distance3d_sqr_pt_pt(&objs[i], &objs[p2]);
+		dst_p1 = distance3d_sqr_pt4d_pt4d(&objs[i], &objs[p1]);
+		dst_p2 = distance3d_sqr_pt4d_pt4d(&objs[i], &objs[p2]);
 		if ((dst_p1 > max_dst) || (dst_p2 > max_dst))
 		{
 			if (dst_p1 > dst_p2)
@@ -152,7 +159,7 @@ kmeans_init(POINT3D *objs, uint32_t n, POINT3D *centers, uint32_t k)
 
 		/* initialize array with distance to first object */
 		for (j = 0; j < n; j++)
-			distances[j] = distance3d_sqr_pt_pt(&objs[j], &centers[0]);
+			distances[j] = distance3d_sqr_pt4d_pt4d(&objs[j], &centers[0]);
 		distances[p1] = -1;
 		distances[p2] = -1;
 
@@ -170,7 +177,7 @@ kmeans_init(POINT3D *objs, uint32_t n, POINT3D *centers, uint32_t k)
 					continue;
 
 				/* update minimal distance with previosuly accepted cluster */
-				current_distance = distance3d_sqr_pt_pt(&objs[j], &centers[i - 1]);
+				current_distance = distance3d_sqr_pt4d_pt4d(&objs[j], &centers[i - 1]);
 				if (current_distance < distances[j])
 					distances[j] = current_distance;
 
@@ -199,7 +206,7 @@ int *
 lwgeom_cluster_kmeans(const LWGEOM **geoms, uint32_t n, uint32_t k)
 {
 	uint32_t num_non_empty = 0;
-	uint8_t result = LW_FALSE;
+	uint8_t converged = LW_FALSE;
 
 	assert(k > 0);
 	assert(n > 0);
@@ -214,7 +221,7 @@ lwgeom_cluster_kmeans(const LWGEOM **geoms, uint32_t n, uint32_t k)
 	}
 
 	/* An array of objects to be analyzed. */
-	POINT3D *objs = lwalloc(sizeof(POINT3D) * n);
+	POINT4D *objs = lwalloc(sizeof(POINT4D) * n);
 
 	/* Array to mark unclusterable objects. Will be returned as KMEANS_NULL_CLUSTER. */
 	uint8_t *geom_valid = lwalloc(sizeof(uint8_t) * n);
@@ -225,14 +232,15 @@ lwgeom_cluster_kmeans(const LWGEOM **geoms, uint32_t n, uint32_t k)
 	memset(clusters, 0, sizeof(int) * n);
 
 	/* An array of clusters centers for the algorithm. */
-	POINT3D *centers = lwalloc(sizeof(POINT3D) * k);
-	memset(centers, 0, sizeof(POINT3D) * k);
+	POINT4D *centers = lwalloc(sizeof(POINT4D) * k);
+	memset(centers, 0, sizeof(POINT4D) * k);
 
 	/* Prepare the list of object pointers for K-means */
 	for (uint32_t i = 0; i < n; i++)
 	{
 		const LWGEOM *geom = geoms[i];
-		POINT3D out = {0, 0, 0};
+		/* Unset M values will be 1 */
+		POINT4D out = {0, 0, 0, 1};
 
 		/* Null/empty geometries get geom_valid=LW_FALSE set earlier with memset */
 		if ((!geom) || lwgeom_is_empty(geom))
@@ -245,6 +253,13 @@ lwgeom_cluster_kmeans(const LWGEOM **geoms, uint32_t n, uint32_t k)
 			out.y = lwpoint_get_y(lwgeom_as_lwpoint(geom));
 			if (lwgeom_has_z(geom))
 				out.z = lwpoint_get_z(lwgeom_as_lwpoint(geom));
+			if (lwgeom_has_m(geom))
+			{
+				out.m = lwpoint_get_m(lwgeom_as_lwpoint(geom));
+				if (out.m <= 0)
+					lwerror("%s has an input point geometry with weight in M less or equal to 0",
+						__func__);
+			}
 		}
 		else if (!lwgeom_has_z(geom))
 		{
@@ -291,9 +306,9 @@ lwgeom_cluster_kmeans(const LWGEOM **geoms, uint32_t n, uint32_t k)
 		memset(clusters_dense, 0, sizeof(int) * num_non_empty);
 
 		kmeans_init(objs, num_non_empty, centers, k);
-		result = kmeans(objs, clusters_dense, num_non_empty, centers, k);
+		converged = kmeans(objs, clusters_dense, num_non_empty, centers, k);
 
-		if (result)
+		if (converged)
 		{
 			uint32_t d = 0;
 			for (uint32_t i = 0; i < n; i++)
@@ -309,7 +324,7 @@ lwgeom_cluster_kmeans(const LWGEOM **geoms, uint32_t n, uint32_t k)
 		/* k=0: everything is unclusterable */
 		for (uint32_t i = 0; i < n; i++)
 			clusters[i] = KMEANS_NULL_CLUSTER;
-		result = LW_TRUE;
+		converged = LW_TRUE;
 	}
 	else
 	{
@@ -321,7 +336,7 @@ lwgeom_cluster_kmeans(const LWGEOM **geoms, uint32_t n, uint32_t k)
 			else
 				clusters[i] = 0;
 		}
-		result = LW_TRUE;
+		converged = LW_TRUE;
 	}
 
 	/* Before error handling, might as well clean up all the inputs */
@@ -330,7 +345,7 @@ lwgeom_cluster_kmeans(const LWGEOM **geoms, uint32_t n, uint32_t k)
 	lwfree(geom_valid);
 
 	/* Good result */
-	if (result)
+	if (converged)
 		return clusters;
 
 	/* Bad result, not going to need the answer */

-----------------------------------------------------------------------

Summary of changes:
 NEWS                      |  9 ++++-
 doc/reference_cluster.xml |  3 +-
 liblwgeom/lwkmeans.c      | 89 +++++++++++++++++++++++++++--------------------
 3 files changed, 62 insertions(+), 39 deletions(-)


hooks/post-receive
-- 
PostGIS


More information about the postgis-tickets mailing list