[postgis-tickets] r15057 - #3612, Calling ST_ClusterDBSCAN with fewer than minpoints geometries in window frame crashes backend

Daniel Baston dbaston at gmail.com
Sun Sep 4 19:10:12 PDT 2016


Author: dbaston
Date: 2016-09-04 19:10:12 -0700 (Sun, 04 Sep 2016)
New Revision: 15057

Modified:
   trunk/liblwgeom/cunit/cu_geos_cluster.c
   trunk/liblwgeom/lwgeom_geos_cluster.c
   trunk/postgis/lwgeom_window.c
   trunk/regress/cluster.sql
   trunk/regress/cluster_expected
Log:
#3612, Calling ST_ClusterDBSCAN with fewer than minpoints geometries in window frame crashes backend

Modified: trunk/liblwgeom/cunit/cu_geos_cluster.c
===================================================================
--- trunk/liblwgeom/cunit/cu_geos_cluster.c	2016-09-04 16:02:12 UTC (rev 15056)
+++ trunk/liblwgeom/cunit/cu_geos_cluster.c	2016-09-05 02:10:12 UTC (rev 15057)
@@ -255,33 +255,35 @@
 	perform_cluster_intersecting_test(wkt_inputs_pt, 2, expected_outputs_pt, 1);
 }
 
-static void dbscan_test(void)
+struct dbscan_test_info {
+	double eps;
+	uint32_t min_points;
+	uint32_t num_geoms;
+	char** wkt_inputs;
+	uint32_t* expected_ids;
+	int* expected_in_cluster;
+};
+
+static void do_dbscan_test(struct dbscan_test_info test)
 {
-	char* wkt_inputs[] = { "POINT (0 0)", "POINT (-1 0)", "POINT (-1 -0.1)", "POINT (-1 0.1)",
-		                   "POINT (1 0)",
-						   "POINT (2 0)", "POINT (3  0)", "POINT ( 3 -0.1)", "POINT ( 3 0.1)" };
-	uint32_t num_geoms = sizeof(wkt_inputs) / sizeof(char*);
-	LWGEOM** geoms = WKTARRAY2LWGEOM(wkt_inputs, num_geoms);
-	UNIONFIND* uf = UF_create(num_geoms);
+	LWGEOM** geoms = WKTARRAY2LWGEOM(test.wkt_inputs, test.num_geoms);
+	UNIONFIND* uf = UF_create(test.num_geoms);
 	uint32_t* ids;
 	char* in_a_cluster;
 	uint32_t i;
 
-	/* Although POINT (1 0) and POINT (2 0) are within eps distance of each other,
-	 * they do not connect the two clusters because POINT (1 0) is not a core point.
-	 * See #3572
-	 */
-	double eps = 1.01;
-	uint32_t min_points = 5;
-	uint32_t expected_ids[] = { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1 };
-
-	union_dbscan(geoms, num_geoms, uf, eps, min_points, &in_a_cluster);
+	union_dbscan(geoms, test.num_geoms, uf, test.eps, test.min_points, &in_a_cluster);
 	ids = UF_get_collapsed_cluster_ids(uf, in_a_cluster);
 
-	ASSERT_INTARRAY_EQUAL(ids, expected_ids, num_geoms);
+	for (i = 0; i < test.num_geoms; i++)
+	{
+		ASSERT_INT_EQUAL(in_a_cluster[i], test.expected_in_cluster[i]);
+		if (in_a_cluster[i])
+			ASSERT_INT_EQUAL(ids[i], test.expected_ids[i]);
+	}
 
 	UF_destroy(uf);
-	for (i = 0; i < num_geoms; i++)
+	for (i = 0; i < test.num_geoms; i++)
 	{
 		lwgeom_free(geoms[i]);
 	}
@@ -290,6 +292,79 @@
 	lwfree(ids);
 }
 
+static void dbscan_test(void)
+{
+	struct dbscan_test_info test;
+	char* wkt_inputs[] = { "POINT (0 0)", "POINT (-1 0)", "POINT (-1 -0.1)", "POINT (-1 0.1)",
+		                   "POINT (1 0)",
+						   "POINT (2 0)", "POINT (3  0)", "POINT ( 3 -0.1)", "POINT ( 3 0.1)" };
+	/* Although POINT (1 0) and POINT (2 0) are within eps distance of each other,
+	 * they do not connect the two clusters because POINT (1 0) is not a core point.
+	 * See #3572
+	 */
+	test.eps = 1.01;
+	test.min_points = 5;
+	uint32_t expected_ids[] =   { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1 };
+	int expected_in_cluster[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
+	test.num_geoms = sizeof(wkt_inputs) / sizeof(char*);
+
+	test.expected_ids = expected_ids;
+	test.wkt_inputs = wkt_inputs;
+	test.expected_in_cluster = expected_in_cluster;
+	do_dbscan_test(test);
+}
+
+static void dbscan_test_3612a(void)
+{
+	struct dbscan_test_info test;
+	char* wkt_inputs[] = { "POINT (1 1)" };
+
+	test.eps = 0.0;
+	test.min_points = 5;
+	uint32_t expected_ids[] = { rand() };
+	int expected_in_cluster[] = { 0 };
+	test.num_geoms = sizeof(wkt_inputs) / sizeof(char*);
+
+	test.expected_ids = expected_ids;
+	test.expected_in_cluster = expected_in_cluster;
+	test.wkt_inputs = wkt_inputs;
+	do_dbscan_test(test);
+}
+
+static void dbscan_test_3612b(void)
+{
+	struct dbscan_test_info test;
+	char* wkt_inputs[] = { "POINT (1 1)" };
+
+	test.eps = 0.0;
+	test.min_points = 1;
+	uint32_t expected_ids[]   = { 0 };
+	int expected_in_cluster[] = { 1 };
+	test.num_geoms = sizeof(wkt_inputs) / sizeof(char*);
+
+	test.expected_ids = expected_ids;
+	test.expected_in_cluster = expected_in_cluster;
+	test.wkt_inputs = wkt_inputs;
+	do_dbscan_test(test);
+}
+
+static void dbscan_test_3612c(void)
+{
+	struct dbscan_test_info test;
+	char* wkt_inputs[] = { "POLYGONM((-71.1319 42.2503 1,-71.132 42.2502 3,-71.1323 42.2504 -2,-71.1322 42.2505 1,-71.1319 42.2503 0))",
+						   "POLYGONM((-71.1319 42.2512 0,-71.1318 42.2511 20,-71.1317 42.2511 -20,-71.1317 42.251 5,-71.1317 42.2509 4,-71.132 42.2511 6,-71.1319 42.2512 30))" }; 
+	test.eps = 20.1;
+	test.min_points = 5;
+	uint32_t expected_ids[]   = { rand(), rand() };
+	int expected_in_cluster[] = { 0, 0 };
+	test.num_geoms = sizeof(wkt_inputs) / sizeof(char*);
+
+	test.expected_ids = expected_ids;
+	test.expected_in_cluster = expected_in_cluster;
+	test.wkt_inputs = wkt_inputs;
+	do_dbscan_test(test);
+}
+
 void geos_cluster_suite_setup(void);
 void geos_cluster_suite_setup(void)
 {
@@ -301,4 +376,7 @@
 	PG_ADD_TEST(suite, empty_inputs_test);
 	PG_ADD_TEST(suite, multipoint_test);
 	PG_ADD_TEST(suite, dbscan_test);
+	PG_ADD_TEST(suite, dbscan_test_3612a);
+	PG_ADD_TEST(suite, dbscan_test_3612b);
+	PG_ADD_TEST(suite, dbscan_test_3612c);
 }

Modified: trunk/liblwgeom/lwgeom_geos_cluster.c
===================================================================
--- trunk/liblwgeom/lwgeom_geos_cluster.c	2016-09-04 16:02:12 UTC (rev 15056)
+++ trunk/liblwgeom/lwgeom_geos_cluster.c	2016-09-05 02:10:12 UTC (rev 15057)
@@ -323,6 +323,14 @@
 	};
 	int success = LW_SUCCESS;
 
+	if (in_a_cluster_ret)
+	{
+		char* in_a_cluster = lwalloc(num_geoms * sizeof(char));
+		for (i = 0; i < num_geoms; i++)
+			in_a_cluster[i] = LW_TRUE;
+		*in_a_cluster_ret = in_a_cluster;
+	}
+
 	if (num_geoms <= 1)
 		return LW_SUCCESS;
 
@@ -358,14 +366,6 @@
 		}
 	}
 
-	if (in_a_cluster_ret)
-	{
-		char* in_a_cluster = lwalloc(num_geoms * sizeof(char));
-		for (i = 0; i < num_geoms; i++)
-			in_a_cluster[i] = LW_TRUE;
-		*in_a_cluster_ret = in_a_cluster;
-	}
-
 	if (cxt.items_found)
 		lwfree(cxt.items_found);
 
@@ -390,9 +390,19 @@
 	char* in_a_cluster;
 	char* is_in_core;
 
+	in_a_cluster = lwalloc(num_geoms * sizeof(char));
+	memset(in_a_cluster, 0, num_geoms * sizeof(char));
+
+	if (in_a_cluster_ret)
+		*in_a_cluster_ret = in_a_cluster;
+
 	/* Bail if we don't even have enough inputs to make a cluster. */
 	if (num_geoms <= min_points)
+	{
+		if (!in_a_cluster_ret)
+			lwfree(in_a_cluster);
 		return LW_SUCCESS;
+	}
 
 	tree = make_strtree((void**) geoms, num_geoms, LW_TRUE);
 	if (tree.tree == NULL)
@@ -401,8 +411,6 @@
 		return LW_FAILURE;
 	}
 
-	in_a_cluster = lwalloc(num_geoms * sizeof(char));
-	memset(in_a_cluster, 0, num_geoms * sizeof(char));
 	is_in_core = lwalloc(num_geoms * sizeof(char));
 	memset(is_in_core, 0, num_geoms * sizeof(char));
 	neighbors = lwalloc(min_points * sizeof(uint32_t));
@@ -488,10 +496,8 @@
 	lwfree(neighbors);
 	lwfree(is_in_core);
 
-	/* Either pass in_a_cluster to our caller, or free it. */
-	if (in_a_cluster_ret)
-		*in_a_cluster_ret = in_a_cluster;
-	else
+	/* Free in_a_cluster if we're not giving it to our caller */
+	if (!in_a_cluster_ret)
 		lwfree(in_a_cluster);
 
 	if (cxt.items_found)

Modified: trunk/postgis/lwgeom_window.c
===================================================================
--- trunk/postgis/lwgeom_window.c	2016-09-04 16:02:12 UTC (rev 15056)
+++ trunk/postgis/lwgeom_window.c	2016-09-05 02:10:12 UTC (rev 15057)
@@ -92,7 +92,7 @@
 		uint32_t i;
 		uint32_t* result_ids;
 		LWGEOM** geoms;
-		char* is_in_cluster;
+		char* is_in_cluster = NULL;
 		UNIONFIND* uf;
 		bool tolerance_is_null;
 		bool minpoints_is_null;
@@ -128,7 +128,7 @@
 			}
 		}
 
-		if (union_dbscan(geoms, ngeoms, uf, tolerance, minpoints, &is_in_cluster) == LW_SUCCESS)
+		if (union_dbscan(geoms, ngeoms, uf, tolerance, minpoints, minpoints > 1 ? &is_in_cluster : NULL) == LW_SUCCESS)
 			context->is_error = LW_FALSE;
 
 		for (i = 0; i < ngeoms; i++)
@@ -140,7 +140,8 @@
 		if (context->is_error)
 		{
 			UF_destroy(uf);
-			lwfree(is_in_cluster);
+			if (is_in_cluster)
+				lwfree(is_in_cluster);
 			lwpgerror("Error during clustering");
 			PG_RETURN_NULL();
 		}
@@ -148,7 +149,7 @@
 		result_ids = UF_get_collapsed_cluster_ids(uf, is_in_cluster);
 		for (i = 0; i < ngeoms; i++)
 		{
-			if (!is_in_cluster[i])
+			if (minpoints > 1 && !is_in_cluster[i])
 			{
 				context->cluster_assignments[i].is_null = LW_TRUE;
 			}

Modified: trunk/regress/cluster.sql
===================================================================
--- trunk/regress/cluster.sql	2016-09-04 16:02:12 UTC (rev 15056)
+++ trunk/regress/cluster.sql	2016-09-05 02:10:12 UTC (rev 15057)
@@ -35,3 +35,9 @@
 /* minpoints = 3, but eps too small to form cluster on left */
 SELECT 't103', id, ST_ClusterDBSCAN(geom, eps := 0.6, minpoints := 3) OVER () from dbscan_inputs;
 
+-- #3612
+SELECT 't3612a', ST_ClusterDBSCAN(foo1.the_geom, 20.1, 5)OVER()  As result
+							FROM ((SELECT geom  As the_geom
+									FROM (VALUES ( ST_GeomFromEWKT('SRID=4326;POLYGONM((-71.1319 42.2503 1,-71.132 42.2502 3,-71.1323 42.2504 -2,-71.1322 42.2505 1,-71.1319 42.2503 0))') ),
+											( ST_GeomFromEWKT('SRID=4326;POLYGONM((-71.1319 42.2512 0,-71.1318 42.2511 20,-71.1317 42.2511 -20,-71.1317 42.251 5,-71.1317 42.2509 4,-71.132 42.2511 6,-71.1319 42.2512 30))') ) ) As g(geom))) As foo1 LIMIT 3;
+SELECT 't3612b', ST_ClusterDBSCAN( ST_Point(1,1), 20.1, 5) OVER();

Modified: trunk/regress/cluster_expected
===================================================================
--- trunk/regress/cluster_expected	2016-09-04 16:02:12 UTC (rev 15056)
+++ trunk/regress/cluster_expected	2016-09-05 02:10:12 UTC (rev 15057)
@@ -27,3 +27,6 @@
 t103|4|0
 t103|5|0
 t103|6|0
+t3612a|
+t3612a|
+t3612b|



More information about the postgis-tickets mailing list