[postgis-tickets] r14674 - #3362, ST_ClusterDBSCAN
Daniel Baston
dbaston at gmail.com
Wed Feb 24 03:21:30 PST 2016
Author: dbaston
Date: 2016-02-24 03:21:30 -0800 (Wed, 24 Feb 2016)
New Revision: 14674
Modified:
trunk/NEWS
trunk/liblwgeom/cunit/cu_unionfind.c
trunk/liblwgeom/lwgeom_geos.h
trunk/liblwgeom/lwgeom_geos_cluster.c
trunk/liblwgeom/lwunionfind.c
trunk/liblwgeom/lwunionfind.h
trunk/postgis/lwgeom_geos.c
trunk/postgis/lwgeom_window.c
trunk/postgis/postgis.sql.in
trunk/regress/cluster.sql
trunk/regress/cluster_expected
Log:
#3362, ST_ClusterDBSCAN
Modified: trunk/NEWS
===================================================================
--- trunk/NEWS 2016-02-24 10:44:11 UTC (rev 14673)
+++ trunk/NEWS 2016-02-24 11:21:30 UTC (rev 14674)
@@ -11,6 +11,7 @@
- populate_topology_layer (Sandro Santilli)
- #2259 ST_Voronoi (Dan Baston)
- #3339 ST_GeneratePoints (Paul Ramsey)
+ - #3362 ST_ClusterDBSCAN (Dan Baston)
- #3428 ST_Points (Dan Baston)
- #3465 ST_KMeans (Paul Ramsey)
Modified: trunk/liblwgeom/cunit/cu_unionfind.c
===================================================================
--- trunk/liblwgeom/cunit/cu_unionfind.c 2016-02-24 10:44:11 UTC (rev 14673)
+++ trunk/liblwgeom/cunit/cu_unionfind.c 2016-02-24 11:21:30 UTC (rev 14674)
@@ -110,6 +110,31 @@
UF_destroy(uf);
}
+static void test_unionfind_collapse_cluster_ids(void)
+{
+ UNIONFIND* uf = UF_create(10);
+
+ uf->clusters[0] = 8;
+ uf->clusters[1] = 5;
+ uf->clusters[2] = 5;
+ uf->clusters[3] = 5;
+ uf->clusters[4] = 7;
+ uf->clusters[5] = 5;
+ uf->clusters[6] = 8;
+ uf->clusters[7] = 7;
+ uf->clusters[8] = 8;
+ uf->clusters[9] = 7;
+
+ /* 5 -> 0
+ * 7 -> 1
+ * 8 -> 2
+ */
+ uint32_t expected_collapsed_ids[] = { 2, 0, 0, 0, 1, 0, 2, 1, 2, 1 };
+ uint32_t* collapsed_ids = UF_get_collapsed_cluster_ids(uf);
+
+ CU_ASSERT_EQUAL(0, memcmp(collapsed_ids, expected_collapsed_ids, 10*sizeof(uint32_t)));
+}
+
void unionfind_suite_setup(void);
void unionfind_suite_setup(void)
{
@@ -118,4 +143,5 @@
PG_ADD_TEST(suite, test_unionfind_union);
PG_ADD_TEST(suite, test_unionfind_ordered_by_cluster);
PG_ADD_TEST(suite, test_unionfind_path_compression);
+ PG_ADD_TEST(suite, test_unionfind_collapse_cluster_ids);
}
Modified: trunk/liblwgeom/lwgeom_geos.h
===================================================================
--- trunk/liblwgeom/lwgeom_geos.h 2016-02-24 10:44:11 UTC (rev 14673)
+++ trunk/liblwgeom/lwgeom_geos.h 2016-02-24 11:21:30 UTC (rev 14674)
@@ -31,8 +31,8 @@
#endif
#include "liblwgeom.h"
+#include "lwunionfind.h"
-
/*
** Public prototypes for GEOS utility functions.
*/
@@ -43,6 +43,8 @@
int cluster_intersecting(GEOSGeometry** geoms, uint32_t num_geoms, GEOSGeometry*** clusterGeoms, uint32_t* num_clusters);
int cluster_within_distance(LWGEOM** geoms, uint32_t num_geoms, double tolerance, LWGEOM*** clusterGeoms, uint32_t* num_clusters);
+int cluster_dbscan(LWGEOM** geoms, uint32_t num_geoms, double eps, uint32_t min_points, LWGEOM*** clusterGeoms, uint32_t* num_clusters);
+int union_dbscan(LWGEOM** geoms, uint32_t num_geoms, UNIONFIND* uf, double eps, uint32_t min_points);
POINTARRAY *ptarray_from_GEOSCoordSeq(const GEOSCoordSequence *cs, char want3d);
Modified: trunk/liblwgeom/lwgeom_geos_cluster.c
===================================================================
--- trunk/liblwgeom/lwgeom_geos_cluster.c 2016-02-24 10:44:11 UTC (rev 14673)
+++ trunk/liblwgeom/lwgeom_geos_cluster.c 2016-02-24 11:21:30 UTC (rev 14674)
@@ -31,26 +31,14 @@
static const int STRTREE_NODE_CAPACITY = 10;
-/* Utility struct used to pass information to the GEOSSTRtree_query callback */
-struct UnionIfIntersectingContext
+/* Utility struct used to accumulate geometries in GEOSSTRtree_query callback */
+struct QueryContext
{
- UNIONFIND* uf;
- char error;
- uint32_t* p;
- const GEOSPreparedGeometry* prep;
- GEOSGeometry** geoms;
+ void** items_found;
+ uint32_t items_found_size;
+ uint32_t num_items_found;
};
-/* Utility struct used to pass information to the GEOSSTRtree_query callback */
-struct UnionIfDWithinContext
-{
- UNIONFIND* uf;
- char error;
- uint32_t* p;
- LWGEOM** geoms;
- double tolerance;
-};
-
/* Utility struct to keep GEOSSTRtree and associated structures to be freed after use */
struct STRTree
{
@@ -62,8 +50,6 @@
static struct STRTree make_strtree(void** geoms, uint32_t num_geoms, char is_lwgeom);
static void destroy_strtree(struct STRTree tree);
-static void union_if_intersecting(void* item, void* userdata);
-static void union_if_dwithin(void* item, void* userdata);
static int union_intersecting_pairs(GEOSGeometry** geoms, uint32_t num_geoms, UNIONFIND* uf);
static int combine_geometries(UNIONFIND* uf, void** geoms, uint32_t num_geoms, void*** clustersGeoms, uint32_t* num_clusters, char is_lwgeom);
@@ -91,16 +77,16 @@
}
else
{
- const GBOX* box = lwgeom_get_bbox(geoms[i]);
- if (box)
- {
- tree.envelopes[i] = GBOX2GEOS(box);
- }
- else
- {
- /* Empty geometry */
- tree.envelopes[i] = GEOSGeom_createEmptyPolygon();
- }
+ const GBOX* box = lwgeom_get_bbox(geoms[i]);
+ if (box)
+ {
+ tree.envelopes[i] = GBOX2GEOS(box);
+ }
+ else
+ {
+ /* Empty geometry */
+ tree.envelopes[i] = GEOSGeom_createEmptyPolygon();
+ }
}
GEOSSTRtree_insert(tree.tree, tree.envelopes[i], &(tree.geom_ids[i]));
}
@@ -122,184 +108,111 @@
lwfree(tree.envelopes);
}
-/* Callback function for GEOSSTRtree_query */
static void
-union_if_intersecting(void* item, void* userdata)
+query_accumulate(void* item, void* userdata)
{
- struct UnionIfIntersectingContext *cxt = userdata;
- if (cxt->error)
+ struct QueryContext *cxt = userdata;
+ if (!cxt->items_found)
{
- return;
+ cxt->items_found_size = 8;
+ cxt->items_found = lwalloc(cxt->items_found_size * sizeof(void*));
}
- uint32_t q = *((uint32_t*) item);
- uint32_t p = *(cxt->p);
- if (p != q && UF_find(cxt->uf, p) != UF_find(cxt->uf, q))
+ if (cxt->num_items_found >= cxt->items_found_size)
{
- int geos_type = GEOSGeomTypeId(cxt->geoms[p]);
- int geos_result;
-
- /* Don't build prepared a geometry around a Point or MultiPoint -
- * there are some problems in the implementation, and it's not clear
- * there would be a performance benefit in any case. (See #3433)
- */
- if (geos_type != GEOS_POINT && geos_type != GEOS_MULTIPOINT)
- {
- /* Lazy initialize prepared geometry */
- if (cxt->prep == NULL)
- {
- cxt->prep = GEOSPrepare(cxt->geoms[p]);
- }
- geos_result = GEOSPreparedIntersects(cxt->prep, cxt->geoms[q]);
- }
- else
- {
- geos_result = GEOSIntersects(cxt->geoms[p], cxt->geoms[q]);
- }
- if (geos_result > 1)
- {
- cxt->error = geos_result;
- return;
- }
- if (geos_result)
- {
- UF_union(cxt->uf, p, q);
- }
+ cxt->items_found_size = 2 * cxt->items_found_size;
+ cxt->items_found = lwrealloc(cxt->items_found, cxt->items_found_size * sizeof(void*));
}
+ cxt->items_found[cxt->num_items_found++] = item;
}
-/* Callback function for GEOSSTRtree_query */
-static void
-union_if_dwithin(void* item, void* userdata)
-{
- struct UnionIfDWithinContext *cxt = userdata;
- if (cxt->error)
- {
- return;
- }
- uint32_t q = *((uint32_t*) item);
- uint32_t p = *(cxt->p);
-
- if (p != q && UF_find(cxt->uf, p) != UF_find(cxt->uf, q))
- {
- double mindist = lwgeom_mindistance2d_tolerance(cxt->geoms[p], cxt->geoms[q], cxt->tolerance);
- if (mindist == FLT_MAX)
- {
- cxt->error = 1;
- return;
- }
-
- if (mindist <= cxt->tolerance)
- {
- UF_union(cxt->uf, p, q);
- }
- }
-}
-
/* Identify intersecting geometries and mark them as being in the same set */
static int
union_intersecting_pairs(GEOSGeometry** geoms, uint32_t num_geoms, UNIONFIND* uf)
{
- uint32_t i;
+ uint32_t p, i;
+ struct STRTree tree;
+ struct QueryContext cxt =
+ {
+ .items_found = NULL,
+ .num_items_found = 0,
+ .items_found_size = 0
+ };
+ int success = LW_SUCCESS;
if (num_geoms <= 1)
- {
return LW_SUCCESS;
- }
- struct STRTree tree = make_strtree((void**) geoms, num_geoms, 0);
+ tree = make_strtree((void**) geoms, num_geoms, 0);
if (tree.tree == NULL)
{
destroy_strtree(tree);
return LW_FAILURE;
}
- for (i = 0; i < num_geoms; i++)
+
+ for (p = 0; p < num_geoms; p++)
{
- if (GEOSisEmpty(geoms[i]))
- {
- continue;
- }
+ const GEOSPreparedGeometry* prep = NULL;
+ GEOSGeometry* query_envelope;
- struct UnionIfIntersectingContext cxt =
- {
- .uf = uf,
- .error = 0,
- .p = &i,
- .prep = NULL,
- .geoms = geoms
- };
- GEOSGeometry* query_envelope = GEOSEnvelope(geoms[i]);
- GEOSSTRtree_query(tree.tree, query_envelope, &union_if_intersecting, &cxt);
+ if (GEOSisEmpty(geoms[p]))
+ continue;
- GEOSGeom_destroy(query_envelope);
- GEOSPreparedGeom_destroy(cxt.prep);
- if (cxt.error)
- {
- return LW_FAILURE;
- }
- }
+ cxt.num_items_found = 0;
+ query_envelope = GEOSEnvelope(geoms[p]);
+ GEOSSTRtree_query(tree.tree, query_envelope, &query_accumulate, &cxt);
- destroy_strtree(tree);
- return LW_SUCCESS;
-}
-
-/* Identify geometries within a distance tolerance and mark them as being in the same set */
-static int
-union_pairs_within_distance(LWGEOM** geoms, uint32_t num_geoms, UNIONFIND* uf, double tolerance)
-{
- uint32_t i;
-
- if (num_geoms <= 1)
- {
- return LW_SUCCESS;
- }
-
- struct STRTree tree = make_strtree((void**) geoms, num_geoms, 1);
- if (tree.tree == NULL)
- {
- destroy_strtree(tree);
- return LW_FAILURE;
- }
-
- for (i = 0; i < num_geoms; i++)
- {
- struct UnionIfDWithinContext cxt =
+ for (i = 0; i < cxt.num_items_found; i++)
{
- .uf = uf,
- .error = 0,
- .p = &i,
- .geoms = geoms,
- .tolerance = tolerance
- };
+ uint32_t q = *((uint32_t*) cxt.items_found[i]);
- const GBOX* geom_extent = lwgeom_get_bbox(geoms[i]);
- if (!geom_extent)
- {
- /* Empty geometry */
- continue;
- }
- GBOX* query_extent = gbox_clone(geom_extent);
- gbox_expand(query_extent, tolerance);
- GEOSGeometry* query_envelope = GBOX2GEOS(query_extent);
+ if (p != q && UF_find(uf, p) != UF_find(uf, q))
+ {
+ int geos_type = GEOSGeomTypeId(geoms[p]);
+ int geos_result;
- if (!query_envelope)
- {
- destroy_strtree(tree);
- return LW_FAILURE;
+ /* Don't build prepared a geometry around a Point or MultiPoint -
+ * there are some problems in the implementation, and it's not clear
+ * there would be a performance benefit in any case. (See #3433)
+ */
+ if (geos_type != GEOS_POINT && geos_type != GEOS_MULTIPOINT)
+ {
+ /* Lazy initialize prepared geometry */
+ if (prep == NULL)
+ {
+ prep = GEOSPrepare(geoms[p]);
+ }
+ geos_result = GEOSPreparedIntersects(prep, geoms[q]);
+ }
+ else
+ {
+ geos_result = GEOSIntersects(geoms[p], geoms[q]);
+ }
+ if (geos_result > 1)
+ {
+ success = LW_FAILURE;
+ break;
+ }
+ else if (geos_result)
+ {
+ UF_union(uf, p, q);
+ }
+ }
}
- GEOSSTRtree_query(tree.tree, query_envelope, &union_if_dwithin, &cxt);
+ if (prep)
+ GEOSPreparedGeom_destroy(prep);
+ GEOSGeom_destroy(query_envelope);
- lwfree(query_extent);
- GEOSGeom_destroy(query_envelope);
- if (cxt.error)
- {
- return LW_FAILURE;
- }
+ if (!success)
+ break;
}
+ if (cxt.items_found)
+ lwfree(cxt.items_found);
+
destroy_strtree(tree);
- return LW_SUCCESS;
+ return success;
}
/** Takes an array of GEOSGeometry* and constructs an array of GEOSGeometry*, where each element in the constructed
@@ -328,10 +241,109 @@
int
cluster_within_distance(LWGEOM** geoms, uint32_t num_geoms, double tolerance, LWGEOM*** clusterGeoms, uint32_t* num_clusters)
{
+ int min_points = 1;
+ return cluster_dbscan(geoms, num_geoms, tolerance, min_points, clusterGeoms, num_clusters);
+}
+
+
+static int
+dbscan_update_context(GEOSSTRtree* tree, struct QueryContext* cxt, LWGEOM** geoms, uint32_t p, double eps)
+{
+ cxt->num_items_found = 0;
+
+ const GBOX* geom_extent = lwgeom_get_bbox(geoms[p]);
+ GBOX* query_extent = gbox_clone(geom_extent);
+ gbox_expand(query_extent, eps);
+ GEOSGeometry* query_envelope = GBOX2GEOS(query_extent);
+
+ if (!query_envelope)
+ return LW_FAILURE;
+
+ GEOSSTRtree_query(tree, query_envelope, &query_accumulate, cxt);
+
+ lwfree(query_extent);
+ GEOSGeom_destroy(query_envelope);
+
+ return LW_SUCCESS;
+}
+
+int
+union_dbscan(LWGEOM** geoms, uint32_t num_geoms, UNIONFIND* uf, double eps, uint32_t min_points)
+{
+ uint32_t p, i;
+ struct STRTree tree;
+ struct QueryContext cxt =
+ {
+ .items_found = NULL,
+ .num_items_found = 0,
+ .items_found_size = 0
+ };
+ int success = LW_SUCCESS;
+
+ if (num_geoms <= 1)
+ return LW_SUCCESS;
+
+ tree = make_strtree((void**) geoms, num_geoms, 1);
+ if (tree.tree == NULL)
+ {
+ destroy_strtree(tree);
+ return LW_FAILURE;
+ }
+
+ for (p = 0; p < num_geoms; p++)
+ {
+ uint32_t num_neighbors = 0;
+ uint32_t* neighbors;
+
+ if (lwgeom_is_empty(geoms[p]))
+ continue;
+
+ dbscan_update_context(tree.tree, &cxt, geoms, p, eps);
+ neighbors = lwalloc(cxt.num_items_found * sizeof(uint32_t*));
+
+ for (i = 0; i < cxt.num_items_found; i++)
+ {
+ uint32_t q = *((uint32_t*) cxt.items_found[i]);
+
+ double mindist = lwgeom_mindistance2d_tolerance(geoms[p], geoms[q], eps);
+ if (mindist == FLT_MAX)
+ {
+ success = LW_FAILURE;
+ break;
+ }
+
+ if (mindist <= eps)
+ neighbors[num_neighbors++] = q;
+ }
+
+ if (num_neighbors >= min_points)
+ {
+ for (i = 0; i < num_neighbors; i++)
+ {
+ UF_union(uf, p, neighbors[i]);
+ }
+ }
+
+ lwfree(neighbors);
+
+ if (!success)
+ break;
+ }
+
+ if (cxt.items_found)
+ lwfree(cxt.items_found);
+
+ destroy_strtree(tree);
+ return success;
+}
+
+int
+cluster_dbscan(LWGEOM** geoms, uint32_t num_geoms, double eps, uint32_t min_points, LWGEOM*** clusterGeoms, uint32_t* num_clusters)
+{
int cluster_success;
UNIONFIND* uf = UF_create(num_geoms);
- if (union_pairs_within_distance(geoms, num_geoms, uf, tolerance) == LW_FAILURE)
+ if (union_dbscan(geoms, num_geoms, uf, eps, min_points) == LW_FAILURE)
{
UF_destroy(uf);
return LW_FAILURE;
Modified: trunk/liblwgeom/lwunionfind.c
===================================================================
--- trunk/liblwgeom/lwunionfind.c 2016-02-24 10:44:11 UTC (rev 14673)
+++ trunk/liblwgeom/lwunionfind.c 2016-02-24 11:21:30 UTC (rev 14674)
@@ -134,6 +134,33 @@
lwfree(cluster_id_ptr_by_elem_id);
return ordered_ids;
}
+
+uint32_t*
+UF_get_collapsed_cluster_ids(UNIONFIND* uf)
+{
+ uint32_t* ordered_components = UF_ordered_by_cluster(uf);
+ uint32_t* new_ids = lwalloc(uf->N * sizeof(uint32_t));
+ uint32_t last_old_id, current_new_id, i;
+
+ last_old_id = UF_find(uf, ordered_components[0]);
+ current_new_id = 0;
+ for (i = 0; i < uf->N; i++)
+ {
+ uint32_t j = ordered_components[i];
+ uint32_t current_old_id = UF_find(uf, j);
+
+ if (current_old_id != last_old_id)
+ current_new_id++;
+
+ new_ids[j] = current_new_id;
+ last_old_id = current_old_id;
+ }
+
+ lwfree(ordered_components);
+
+ return new_ids;
+}
+
static int
cmp_int(const void *a, const void *b)
{
Modified: trunk/liblwgeom/lwunionfind.h
===================================================================
--- trunk/liblwgeom/lwunionfind.h 2016-02-24 10:44:11 UTC (rev 14673)
+++ trunk/liblwgeom/lwunionfind.h 2016-02-24 11:21:30 UTC (rev 14674)
@@ -52,4 +52,7 @@
* same cluster are contiguous in the array */
uint32_t* UF_ordered_by_cluster(UNIONFIND* uf);
+/* Replace the cluster ids in a UNIONFIND with sequential ids starting at zero. */
+uint32_t* UF_get_collapsed_cluster_ids(UNIONFIND* uf);
+
#endif
Modified: trunk/postgis/lwgeom_geos.c
===================================================================
--- trunk/postgis/lwgeom_geos.c 2016-02-24 10:44:11 UTC (rev 14673)
+++ trunk/postgis/lwgeom_geos.c 2016-02-24 11:21:30 UTC (rev 14674)
@@ -3205,6 +3205,7 @@
LWGEOM** lw_inputs;
LWGEOM** lw_results;
double tolerance;
+ int min_points;
int srid=SRID_UNKNOWN;
/* Parameters used to construct a result array */
@@ -3213,13 +3214,31 @@
char elmalign;
/* Null array, null geometry (should be empty?) */
- if (PG_ARGISNULL(0))
- PG_RETURN_NULL();
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
- array = PG_GETARG_ARRAYTYPE_P(0);
+ array = PG_GETARG_ARRAYTYPE_P(0);
+
tolerance = PG_GETARG_FLOAT8(1);
- nelems = array_nelems_not_null(array);
+ if (tolerance < 0)
+ {
+ lwpgerror("Tolerance must be a positive number.");
+ PG_RETURN_NULL();
+ }
+ if (PG_NARGS() < 3)
+ min_points = 1;
+ else
+ min_points = PG_GETARG_INT32(2);
+
+ if (min_points < 1)
+ {
+ lwpgerror("min_points must be a positive number.");
+ PG_RETURN_NULL();
+ }
+
+ nelems = array_nelems_not_null(array);
+
POSTGIS_DEBUGF(3, "cluster_within_distance_garray: number of non-null elements: %d", nelems);
if ( nelems == 0 ) PG_RETURN_NULL();
@@ -3235,7 +3254,7 @@
PG_RETURN_NULL();
}
- if (cluster_within_distance(lw_inputs, nelems, tolerance, &lw_results, &nclusters) != LW_SUCCESS)
+ if (cluster_dbscan(lw_inputs, nelems, tolerance, min_points, &lw_results, &nclusters) != LW_SUCCESS)
{
elog(ERROR, "cluster_within: Error performing clustering");
PG_RETURN_NULL();
Modified: trunk/postgis/lwgeom_window.c
===================================================================
--- trunk/postgis/lwgeom_window.c 2016-02-24 10:44:11 UTC (rev 14673)
+++ trunk/postgis/lwgeom_window.c 2016-02-24 11:21:30 UTC (rev 14674)
@@ -19,32 +19,146 @@
**********************************************************************
*
* Copyright 2016 Paul Ramsey <pramsey at cleverelephant.ca>
+ * Copyright 2016 Daniel Baston <dbaston at gmail.com>
*
**********************************************************************/
+#include "../postgis_config.h"
+/* PostgreSQL */
#include "postgres.h"
-
-#include "fmgr.h"
#include "funcapi.h"
-#include "miscadmin.h"
-#include "utils/builtins.h"
#include "windowapi.h"
-#include "../postgis_config.h"
-
+/* PostGIS */
#include "liblwgeom.h"
+#include "lwunionfind.h" /* TODO move into liblwgeom.h ? */
+#include "lwgeom_geos.h"
+#include "lwgeom_log.h"
#include "lwgeom_pg.h"
+extern Datum ST_ClusterDBSCAN(PG_FUNCTION_ARGS);
extern Datum ST_KMeans(PG_FUNCTION_ARGS);
-typedef struct{
+typedef struct {
bool isdone;
bool isnull;
int result[1];
/* variable length */
} kmeans_context;
+typedef struct
+{
+ uint32_t cluster_id;
+ char is_null; /* NULL may result from a NULL geometry input, or it may be used by
+ algorithms such as DBSCAN that do not assign all inputs to a
+ cluster. */
+} dbscan_cluster_result;
+
+typedef struct
+{
+ char is_error;
+ dbscan_cluster_result cluster_assignments[1];
+} dbscan_context;
+
+static LWGEOM*
+read_lwgeom_from_partition(WindowObject win_obj, uint32_t i, bool* is_null)
+{
+ GSERIALIZED* g;
+ Datum arg = WinGetFuncArgInPartition(win_obj, 0, i, WINDOW_SEEK_HEAD, false, is_null, NULL);
+
+ if (*is_null) {
+ /* So that the indexes in our clustering input array can match our partition positions,
+ * * toss an empty point into the clustering inputs, as a pass-through.
+ * * NOTE: this will cause gaps in the output cluster id sequence.
+ * */
+ return lwpoint_as_lwgeom(lwpoint_construct_empty(0, 0, 0));
+ }
+
+ g = (GSERIALIZED*) PG_DETOAST_DATUM(arg);
+ return lwgeom_from_gserialized(g);
+}
+
+PG_FUNCTION_INFO_V1(ST_ClusterDBSCAN);
+Datum ST_ClusterDBSCAN(PG_FUNCTION_ARGS)
+{
+ WindowObject win_obj = PG_WINDOW_OBJECT();
+ uint32_t row = WinGetCurrentPosition(win_obj);
+ uint32_t ngeoms = WinGetPartitionRowCount(win_obj);
+ dbscan_context* context = WinGetPartitionLocalMemory(win_obj, sizeof(dbscan_context) + ngeoms * sizeof(dbscan_cluster_result));
+
+ if (row == 0) /* beginning of the partition; do all of the work now */
+ {
+ uint32_t i;
+ uint32_t* result_ids;
+ LWGEOM** geoms;
+ UNIONFIND* uf;
+ bool tolerance_is_null;
+ bool minpoints_is_null;
+ Datum tolerance_datum = WinGetFuncArgCurrent(win_obj, 1, &tolerance_is_null);
+ Datum minpoints_datum = WinGetFuncArgCurrent(win_obj, 2, &minpoints_is_null);
+ double tolerance = DatumGetFloat8(tolerance_datum);
+ int minpoints = DatumGetInt32(minpoints_datum);
+
+ context->is_error = LW_TRUE; /* until proven otherwise */
+
+ /* Validate input parameters */
+ if (tolerance_is_null || tolerance < 0)
+ {
+ lwpgerror("Tolerance must be a positive number", tolerance);
+ PG_RETURN_NULL();
+ }
+ if (minpoints_is_null || minpoints < 0)
+ {
+ lwpgerror("Minpoints must be a positive number", minpoints);
+ }
+
+ initGEOS(lwnotice, lwgeom_geos_error);
+ geoms = lwalloc(ngeoms * sizeof(LWGEOM*));
+ uf = UF_create(ngeoms);
+ for (i = 0; i < ngeoms; i++)
+ {
+ geoms[i] = read_lwgeom_from_partition(win_obj, i, &(context->cluster_assignments[i].is_null));
+
+ if (!geoms[i]) {
+ /* TODO release memory ? */
+ lwpgerror("Error reading geometry.");
+ PG_RETURN_NULL();
+ }
+ }
+
+ if (union_dbscan(geoms, ngeoms, uf, tolerance, minpoints) == LW_SUCCESS)
+ context->is_error = LW_FALSE;
+
+ for (i = 0; i < ngeoms; i++)
+ {
+ lwgeom_free(geoms[i]);
+ }
+ lwfree(geoms);
+
+ if (context->is_error)
+ {
+ UF_destroy(uf);
+ lwpgerror("Error during clustering");
+ PG_RETURN_NULL();
+ }
+
+ result_ids = UF_get_collapsed_cluster_ids(uf);
+ for (i = 0; i < ngeoms; i++)
+ {
+ context->cluster_assignments[i].cluster_id = result_ids[i];
+ }
+
+ lwfree(result_ids);
+ UF_destroy(uf);
+ }
+
+ if (context->cluster_assignments[row].is_null)
+ PG_RETURN_NULL();
+
+ PG_RETURN_INT32(context->cluster_assignments[row].cluster_id);
+}
+
PG_FUNCTION_INFO_V1(ST_KMeans);
Datum ST_KMeans(PG_FUNCTION_ARGS)
{
Modified: trunk/postgis/postgis.sql.in
===================================================================
--- trunk/postgis/postgis.sql.in 2016-02-24 10:44:11 UTC (rev 14673)
+++ trunk/postgis/postgis.sql.in 2016-02-24 11:21:30 UTC (rev 14674)
@@ -1648,6 +1648,12 @@
AS 'MODULE_PATHNAME', 'cluster_within_distance_garray'
LANGUAGE 'c' IMMUTABLE STRICT;
+-- Availability: 2.3
+CREATE OR REPLACE FUNCTION ST_ClusterDBSCAN (geometry, eps float8, minpoints int)
+ RETURNS int
+ AS 'MODULE_PATHNAME', 'ST_ClusterDBSCAN'
+ LANGUAGE 'c' IMMUTABLE STRICT WINDOW;
+
-- Availability: 1.2.2
CREATE OR REPLACE FUNCTION ST_LineMerge(geometry)
RETURNS geometry
@@ -3678,6 +3684,12 @@
AS 'MODULE_PATHNAME'
LANGUAGE 'c';
+-- Availability: 2.3
+CREATE OR REPLACE FUNCTION pgis_geometry_accum_transfn(pgis_abs, geometry, float8, int)
+ RETURNS pgis_abs
+ AS 'MODULE_PATHNAME'
+ LANGUAGE 'c';
+
-- Availability: 1.4.0
CREATE OR REPLACE FUNCTION pgis_geometry_accum_finalfn(pgis_abs)
RETURNS geometry[]
Modified: trunk/regress/cluster.sql
===================================================================
--- trunk/regress/cluster.sql 2016-02-24 10:44:11 UTC (rev 14673)
+++ trunk/regress/cluster.sql 2016-02-24 11:21:30 UTC (rev 14674)
@@ -14,3 +14,24 @@
SELECT 't2', ST_AsText(unnest(ST_ClusterIntersecting(ST_Accum(geom ORDER BY id)))) FROM cluster_inputs;
SELECT 't3', ST_AsText(unnest(ST_ClusterWithin(geom, 1.4 ORDER BY id))) FROM cluster_inputs;
SELECT 't4', ST_AsText(unnest(ST_ClusterWithin(ST_Accum(geom ORDER BY id), 1.5))) FROM cluster_inputs;
+
+-- tests for ST_DBSCAN
+
+CREATE TEMPORARY TABLE dbscan_inputs (id int, geom geometry);
+INSERT INTO dbscan_inputs VALUES
+(1, 'POINT (0 0)'),
+(2, 'POINT (0 1)'),
+(3, 'POINT (-0.5 0.5)'),
+(4, 'POINT (1 0)'),
+(5, 'POINT (1 1)'),
+(6, 'POINT (1.0 0.5)');
+
+/* minpoints = 1, equivalent to ST_ClusterWithin */
+SELECT 't101', id, ST_ClusterDBSCAN(geom, eps := 0.8, minpoints := 1) OVER () from dbscan_inputs;
+
+/* minpoints = 4, no clusters */
+SELECT 't102', id, ST_ClusterDBSCAN(geom, eps := 0.8, minpoints := 4) OVER () from dbscan_inputs;
+
+/* minpoints = 3, but eps too small to form cluster on left */
+SELECT 't103', id, ST_ClusterDBSCAN(geom, eps := 0.6, minpoints := 3) OVER () from dbscan_inputs;
+
Modified: trunk/regress/cluster_expected
===================================================================
--- trunk/regress/cluster_expected 2016-02-24 10:44:11 UTC (rev 14673)
+++ trunk/regress/cluster_expected 2016-02-24 11:21:30 UTC (rev 14674)
@@ -9,3 +9,21 @@
t3|GEOMETRYCOLLECTION(POLYGON EMPTY)
t4|GEOMETRYCOLLECTION(LINESTRING(0 0,1 1),LINESTRING(5 5,4 4),LINESTRING(0 0,-1 -1),LINESTRING(6 6,7 7),POLYGON((0 0,4 0,4 4,0 4,0 0)))
t4|GEOMETRYCOLLECTION(POLYGON EMPTY)
+t101|1|0
+t101|2|0
+t101|3|0
+t101|4|1
+t101|5|1
+t101|6|1
+t102|1|0
+t102|2|1
+t102|3|2
+t102|4|3
+t102|5|4
+t102|6|5
+t103|1|0
+t103|2|1
+t103|3|2
+t103|4|3
+t103|5|3
+t103|6|3
More information about the postgis-tickets
mailing list