[postgis-tickets] r16605 - ST_ClusterKMeans: handle effective K=0 when all the inputs are EMPTY
Darafei
komzpa at gmail.com
Sun Jun 3 02:11:32 PDT 2018
Author: komzpa
Date: 2018-06-03 02:11:32 -0700 (Sun, 03 Jun 2018)
New Revision: 16605
Modified:
trunk/liblwgeom/lwkmeans.c
trunk/regress/cluster.sql
trunk/regress/cluster_expected
Log:
ST_ClusterKMeans: handle effective K=0 when all the inputs are EMPTY
Closes #4101
Closes https://github.com/postgis/postgis/pull/254
Modified: trunk/liblwgeom/lwkmeans.c
===================================================================
--- trunk/liblwgeom/lwkmeans.c 2018-06-02 12:09:35 UTC (rev 16604)
+++ trunk/liblwgeom/lwkmeans.c 2018-06-03 09:11:32 UTC (rev 16605)
@@ -135,21 +135,9 @@
double max_dst = -1;
double dst_p1, dst_p2;
- assert(k > 0);
+ /* k=0, k=1: "clustering" is just input validation */
+ assert(k > 1);
- /* k = 1: first non-null is ok, and input check guarantees there's one */
- if (k == 1)
- {
- for (i = 0; i < n; i++)
- {
- if (!objs[i]) continue;
- centers_raw[0] = *((POINT2D *)objs[i]);
- centers[0] = &(centers_raw[0]);
- return;
- }
- assert(0);
- }
-
/* k >= 2: find two distant points greedily */
for (i = 1; i < n; i++)
{
@@ -333,10 +321,25 @@
k = num_non_empty;
}
- kmeans_init(objs, clusters, n, centers, centers_raw, k);
+ if (k > 1)
+ {
+ kmeans_init(objs, clusters, n, centers, centers_raw, k);
+ result = kmeans(objs, clusters, n, centers, k);
+ }
+ else
+ {
+ /* k=0: everythong is unclusterable
+ * k=1: mark up NULL and non-NULL */
+ for (i = 0; i < n; i++)
+ {
+ if (k == 0 || !objs[i])
+ clusters[i] = KMEANS_NULL_CLUSTER;
+ else
+ clusters[i] = 0;
+ }
+ result = LW_TRUE;
+ }
- result = kmeans(objs, clusters, n, centers, k);
-
/* Before error handling, might as well clean up all the inputs */
lwfree(objs);
lwfree(centers);
Modified: trunk/regress/cluster.sql
===================================================================
--- trunk/regress/cluster.sql 2018-06-02 12:09:35 UTC (rev 16604)
+++ trunk/regress/cluster.sql 2018-06-03 09:11:32 UTC (rev 16605)
@@ -50,3 +50,13 @@
( ST_GeomFromEWKT('SRID=4326;POLYGON((-71.1261 42.2703 1,-71.1257 42.2703 1,-71.1257 42.2701 1,-71.126 42.2701 1,-71.1261 42.2702 1,-71.1261 42.2703 1))') ) ) As g(geom) CROSS JOIN generate_series(1,3) As i GROUP BY i )) As foo1 LIMIT 10) kmeans;
select '#4100b', count(distinct cid) from (select ST_ClusterKMeans(geom,2) over () as cid from (values ('POINT(0 0)'::geometry), ('POINT(0 0)')) g(geom)) kmeans;
+
+
+select '#4101a', count(distinct result) from (SELECT ST_ClusterKMeans(foo1.the_geom, 3) OVER() As result
+ FROM ((SELECT ST_GeomFromText('POINT EMPTY',4326) As the_geom
+ UNION ALL SELECT ST_GeomFromText('MULTIPOINT EMPTY',4326) As the_geom
+ UNION ALL SELECT ST_GeomFromText('MULTIPOLYGON EMPTY',4326) As the_geom
+ UNION ALL SELECT ST_GeomFromText('LINESTRING EMPTY',4326) As the_geom
+ UNION ALL SELECT ST_GeomFromText('MULTILINESTRING EMPTY',4326) As the_geom ) ) As foo1 LIMIT 10) kmeans;
+
+select '#4101b', count(distinct cid) from (select ST_ClusterKMeans(geom,2) over () as cid from (values ('POINT EMPTY'::geometry), ('POINT EMPTY')) g(geom)) kmeans;
Modified: trunk/regress/cluster_expected
===================================================================
--- trunk/regress/cluster_expected 2018-06-02 12:09:35 UTC (rev 16604)
+++ trunk/regress/cluster_expected 2018-06-03 09:11:32 UTC (rev 16605)
@@ -34,3 +34,7 @@
#4100a|1
NOTICE: kmeans_init: there are at least 2 duplicate inputs, number of output clusters may be less than you requested
#4100b|1
+NOTICE: lwgeom_cluster_2d_kmeans: number of non-empty geometries is less than the number of clusters requested, not all clusters will get data
+#4101a|1
+NOTICE: lwgeom_cluster_2d_kmeans: number of non-empty geometries is less than the number of clusters requested, not all clusters will get data
+#4101b|1
More information about the postgis-tickets
mailing list