[postgis-tickets] r16222 - Disable negative values in weighted geometric median.

Darafei komzpa at gmail.com
Fri Jan 5 07:22:57 PST 2018


Author: komzpa
Date: 2018-01-05 07:22:57 -0800 (Fri, 05 Jan 2018)
New Revision: 16222

Modified:
   trunk/liblwgeom/cunit/cu_algorithm.c
   trunk/liblwgeom/cunit/cu_tester.c
   trunk/liblwgeom/lwgeom_geos.c
   trunk/liblwgeom/lwgeom_median.c
Log:
Disable negative values in weighted geometric median.

Reviewed by Dan Baston

Closes https://github.com/postgis/postgis/pull/176



Modified: trunk/liblwgeom/cunit/cu_algorithm.c
===================================================================
--- trunk/liblwgeom/cunit/cu_algorithm.c	2018-01-05 13:16:12 UTC (rev 16221)
+++ trunk/liblwgeom/cunit/cu_algorithm.c	2018-01-05 15:22:57 UTC (rev 16222)
@@ -3,6 +3,7 @@
  * PostGIS - Spatial Types for PostgreSQL
  * http://postgis.net
  * Copyright 2008 Paul Ramsey
+ * Copyright 2018 Darafei Praliaskouski, me at komzpa.net
  *
  * This is free software; you can redistribute and/or modify it under
  * the terms of the GNU General Public Licence. See the COPYING file.
@@ -1116,6 +1117,7 @@
 
 static void do_median_test(char* input, char* expected, int fail_if_not_converged, int iter_count)
 {
+	cu_error_msg_reset();
 	LWGEOM* g = lwgeom_from_wkt(input, LW_PARSER_CHECK_NONE);
 	LWPOINT* expected_result = NULL;
 	POINT4D actual_pt;
@@ -1147,7 +1149,7 @@
 			passed = passed && (!lwgeom_has_m((LWGEOM*) expected_result) || FP_EQUALS(actual_pt.m, expected_pt.m));
 		}
 		if (!passed)
-			printf("median_test expected %s got %s\n", lwgeom_to_ewkt((LWGEOM*) expected_result), lwgeom_to_ewkt((LWGEOM*) result));
+			printf("median_test input %s (parsed %s) expected %s got %s\n", input, lwgeom_to_ewkt(g), lwgeom_to_ewkt((LWGEOM*) expected_result), lwgeom_to_ewkt((LWGEOM*) result));
 	}
 	else if (result == NULL && expected == NULL) /* got nothing, expecting nothing */
 	{
@@ -1156,12 +1158,13 @@
 	else if (result != NULL && expected == NULL) /* got something, expecting nothing */
 	{
 		passed = LW_FALSE;
-		printf("median_test expected NULL got %s\n", lwgeom_to_ewkt((LWGEOM*) result));
+		printf("median_test input %s (parsed %s) expected NULL got %s\n", input, lwgeom_to_ewkt(g), lwgeom_to_ewkt((LWGEOM*) result));
 	}
 	else if (result == NULL && expected != NULL) /* got nothing, expecting something */
 	{
 		passed = LW_FALSE;
-		printf("median_test expected %s got NULL\n", lwgeom_to_ewkt((LWGEOM*) expected_result));
+		printf("%s", cu_error_msg);
+		printf("median_test input %s (parsed %s) expected %s got NULL\n", input, lwgeom_to_ewkt(g), lwgeom_to_ewkt((LWGEOM*) expected_result));
 	}
 
 	CU_ASSERT_TRUE(passed);
@@ -1199,21 +1202,26 @@
 	/* Empty input */
 	do_median_test("MULTIPOINT EMPTY", "POINT EMPTY", LW_FALSE, 1000);
 	do_median_test("MULTIPOINT (EMPTY)", "POINT EMPTY", LW_FALSE, 1000);
-	do_median_test("MULTIPOINT EMPTY", NULL, LW_TRUE, 1000);
-	do_median_test("MULTIPOINT (EMPTY)", NULL, LW_TRUE, 1000);
+	do_median_test("MULTIPOINT EMPTY", "POINT EMPTY", LW_TRUE, 1000);
+	do_median_test("MULTIPOINT (EMPTY)", "POINT EMPTY", LW_TRUE, 1000);
+	do_median_test("MULTIPOINT ZM (1 -1 3 1, 1 0 2 7, 2 1 1 1, EMPTY)", "POINT (1 0 2)", LW_TRUE, 1000);
 
 	/* Weighted input */
-	do_median_test("MULTIPOINT ((1 -1 3 1), (1 0 2 7), (2 1 1 1))", "POINT (1 0 2)", LW_TRUE, 1000);
+	do_median_test("MULTIPOINT ZM (1 -1 3 1, 1 0 2 7, 2 1 1 1)", "POINT (1 0 2)", LW_TRUE, 1000);
+	do_median_test("MULTIPOINT ZM (-1 1 -3 1, -1 0 -2 7, -2 -1 -1 1)", "POINT (-1 0 -2)", LW_TRUE, 1000);
+	do_median_test("MULTIPOINT ZM (-1 1 -3 1, -1 0 -2 7, -2 -1 -1 0.5, -2 -1 -1 0.5)", "POINT (-1 0 -2)", LW_TRUE, 1000);
+
 	/* Point that is replaced by two half-weighted */
-	do_median_test("MULTIPOINT ((0 -1 0 1), (0 0 0 1), (0 1 0 0.5), (0 1 0 0.5))", "POINT (0 0 0)", LW_TRUE, 1000);
+	do_median_test("MULTIPOINT ZM ((0 -1 0 1), (0 0 0 1), (0 1 0 0.5), (0 1 0 0.5))", "POINT (0 0 0)", LW_TRUE, 1000);
 	/* Point is doubled and then erased by negative weight */
-	do_median_test("MULTIPOINT ((1 -1 3 1), (1 0 2 7), (2 1 1 2), (2 1 1 -1))", "POINT (1 0 2)", LW_TRUE, 1000);
-	/* Weightless input is empty */
-	do_median_test("MULTIPOINT ((0 -1 0 0), (0 0 0 0), (0 0 0 0), (0 1 0 0))", "POINT EMPTY", LW_FALSE, 1000);
-	do_median_test("MULTIPOINT ((0 -1 0 0), (0 0 0 0), (0 0 0 0), (0 1 0 0))", NULL, LW_TRUE, 1000);
+	do_median_test("MULTIPOINT ZM ((1 -1 3 1), (1 0 2 7), (2 1 1 2), (2 1 1 -1))", NULL, LW_TRUE, 1000);
+	do_median_test("MULTIPOINT ZM ((1 -1 3 1), (1 0 2 7), (2 1 1 2), (2 1 1 -1))", NULL, LW_FALSE, 1000);
+	/* Weightless input won't converge */
+	do_median_test("MULTIPOINT ZM ((0 -1 0 0), (0 0 0 0), (0 0 0 0), (0 1 0 0))", NULL, LW_FALSE, 1000);
+	do_median_test("MULTIPOINT ZM ((0 -1 0 0), (0 0 0 0), (0 0 0 0), (0 1 0 0))", NULL, LW_TRUE, 1000);
 	/* Negative weight won't converge */
-	do_median_test("MULTIPOINT ((0 -1 0 -1), (0 0 0 -1), (0 1 0 -1))", "POINT EMPTY", LW_FALSE, 1000);
-	do_median_test("MULTIPOINT ((0 -1 0 -1), (0 0 0 -1), (0 1 0 -1))", NULL, LW_TRUE, 1000);
+	do_median_test("MULTIPOINT ZM ((0 -1 0 -1), (0 0 0 -1), (0 1 0 -1))", NULL, LW_FALSE, 1000);
+	do_median_test("MULTIPOINT ZM ((0 -1 0 -1), (0 0 0 -1), (0 1 0 -1))", NULL, LW_TRUE, 1000);
 
 	/* Bind convergence too tightly */
 	do_median_test("MULTIPOINT ((0 0), (1 1), (0 1), (2 2))", "POINT(0.75 1.0)", LW_FALSE, 0);

Modified: trunk/liblwgeom/cunit/cu_tester.c
===================================================================
--- trunk/liblwgeom/cunit/cu_tester.c	2018-01-05 13:16:12 UTC (rev 16221)
+++ trunk/liblwgeom/cunit/cu_tester.c	2018-01-05 15:22:57 UTC (rev 16222)
@@ -140,7 +140,7 @@
 	char *suite_name;
 	CU_pSuite suite_to_run;
 	char *test_name;
-	CU_pTest test_to_run;
+	CU_pTest test_to_run = NULL;
 	CU_ErrorCode errCode = 0;
 	CU_pTestRegistry registry;
 	int num_run;
@@ -206,7 +206,7 @@
 			}
 			else
 			{
-				if (test_name != NULL)
+				if (test_name != NULL && test_to_run != NULL)
 				{
 					/* Run only this test. */
 					printf("\nRunning test '%s' in suite '%s'.\n", test_name, suite_name);

Modified: trunk/liblwgeom/lwgeom_geos.c
===================================================================
--- trunk/liblwgeom/lwgeom_geos.c	2018-01-05 13:16:12 UTC (rev 16221)
+++ trunk/liblwgeom/lwgeom_geos.c	2018-01-05 15:22:57 UTC (rev 16222)
@@ -356,9 +356,6 @@
 	GEOSCoordSeq sq;
 	GEOSGeom g, shell;
 	GEOSGeom *geoms = NULL;
-	/*
-	LWGEOM *tmp;
-	*/
 	uint32_t ngeoms, i, j;
 	int geostype;
 #if LWDEBUG_LEVEL >= 4
@@ -375,13 +372,13 @@
 		return g;
 	}
 
+	LWPOINT *lwp = NULL;
+	LWPOLY *lwpoly = NULL;
+	LWLINE *lwl = NULL;
+	LWCOLLECTION *lwc = NULL;
+
 	switch (lwgeom->type)
 	{
-		LWPOINT *lwp = NULL;
-		LWPOLY *lwpoly = NULL;
-		LWLINE *lwl = NULL;
-		LWCOLLECTION *lwc = NULL;
-
 	case POINTTYPE:
 		lwp = (LWPOINT *)lwgeom;
 

Modified: trunk/liblwgeom/lwgeom_median.c
===================================================================
--- trunk/liblwgeom/lwgeom_median.c	2018-01-05 13:16:12 UTC (rev 16221)
+++ trunk/liblwgeom/lwgeom_median.c	2018-01-05 15:22:57 UTC (rev 16222)
@@ -23,15 +23,16 @@
  *
  **********************************************************************/
 
+#include <assert.h>
 #include <float.h>
 
 #include "liblwgeom_internal.h"
 #include "lwgeom_log.h"
 
 static void
-calc_weighted_distances_3d(const POINT3D* curr, const POINT4D* points, size_t npoints, double* distances)
+calc_weighted_distances_3d(const POINT3D* curr, const POINT4D* points, uint32_t npoints, double* distances)
 {
-	size_t i;
+	uint32_t i;
 	for (i = 0; i < npoints; i++)
 	{
 		distances[i] = distance3d_pt_pt(curr, (POINT3D*)&points[i]) / points[i].m;
@@ -39,20 +40,20 @@
 }
 
 static double
-iterate_4d(POINT4D* curr, const POINT4D* points, size_t npoints, double* distances)
+iterate_4d(POINT3D* curr, const POINT4D* points, uint32_t npoints, double* distances)
 {
-	size_t i;
-	POINT4D next = { 0, 0, 0, 1};
-	double delta;
+	uint32_t i;
+	POINT3D next = { 0, 0, 0 };
+	double delta = 0;
 	double denom = 0;
 	char hit = LW_FALSE;
 
-	calc_weighted_distances_3d((POINT3D*)curr, points, npoints, distances);
+	calc_weighted_distances_3d(curr, points, npoints, distances);
 
 	for (i = 0; i < npoints; i++)
 	{
 		/* we need to use lower epsilon than in FP_IS_ZERO in the loop for calculation to converge */
-		if (fabs(distances[i]) > DBL_EPSILON)
+		if (distances[i] > DBL_EPSILON)
 		{
 			next.x += points[i].x / distances[i];
 			next.y += points[i].y / distances[i];
@@ -64,122 +65,138 @@
 			hit = LW_TRUE;
 		}
 	}
-	if (fabs(denom) > DBL_EPSILON)
+	/* negative weight shouldn't get here */
+	assert(denom >= 0);
+
+	/* denom is zero in case of multipoint of single point when we've converged perfectly */
+	if (denom > DBL_EPSILON)
 	{
 		next.x /= denom;
 		next.y /= denom;
 		next.z /= denom;
-	}
-	else
-	{
-		hit = LW_TRUE;
-	}
 
-	/* If any of the intermediate points in the calculation is found in the
-	 * set of input points, the standard Weiszfeld method gets stuck with a
-	 * divide-by-zero.
-	 *
-	 * To get ourselves out of the hole, we follow an alternate procedure to
-	 * get the next iteration, as described in:
-	 *
-	 * Vardi, Y. and Zhang, C. (2011) "A modified Weiszfeld algorithm for the
-	 * Fermat-Weber location problem."  Math. Program., Ser. A 90: 559-566.
-	 * DOI 10.1007/s101070100222
-	 *
-	 * Available online at the time of this writing at
-	 * http://www.stat.rutgers.edu/home/cunhui/papers/43.pdf
-	 */
-	if (hit)
-	{
-		double dx = 0;
-		double dy = 0;
-		double dz = 0;
-		double r_inv;
-		for (i = 0; i < npoints; i++)
+		/* If any of the intermediate points in the calculation is found in the
+	 	* set of input points, the standard Weiszfeld method gets stuck with a
+	 	* divide-by-zero.
+	 	*
+	 	* To get ourselves out of the hole, we follow an alternate procedure to
+	 	* get the next iteration, as described in:
+	 	*
+	 	* Vardi, Y. and Zhang, C. (2011) "A modified Weiszfeld algorithm for the
+	 	* Fermat-Weber location problem."  Math. Program., Ser. A 90: 559-566.
+	 	* DOI 10.1007/s101070100222
+	 	*
+	 	* Available online at the time of this writing at
+	 	* http://www.stat.rutgers.edu/home/cunhui/papers/43.pdf
+	 	*/
+		if (hit)
 		{
-			if (fabs(distances[i]) > DBL_EPSILON)
+			double dx = 0;
+			double dy = 0;
+			double dz = 0;
+			double r_inv;
+			for (i = 0; i < npoints; i++)
 			{
-				dx += (points[i].x - curr->x) / distances[i];
-				dy += (points[i].y - curr->y) / distances[i];
-				dz += (points[i].y - curr->z) / distances[i];
+				if (distances[i] > DBL_EPSILON)
+				{
+					dx += (points[i].x - curr->x) / distances[i];
+					dy += (points[i].y - curr->y) / distances[i];
+					dz += (points[i].y - curr->z) / distances[i];
+				}
 			}
+
+			r_inv = 1.0 / sqrt ( dx*dx + dy*dy + dz*dz );
+
+			next.x = FP_MAX(0, 1.0 - r_inv)*next.x + FP_MIN(1.0, r_inv)*curr->x;
+			next.y = FP_MAX(0, 1.0 - r_inv)*next.y + FP_MIN(1.0, r_inv)*curr->y;
+			next.z = FP_MAX(0, 1.0 - r_inv)*next.z + FP_MIN(1.0, r_inv)*curr->z;
 		}
 
-		r_inv = 1.0 / sqrt ( dx*dx + dy*dy + dz*dz );
+		delta = distance3d_pt_pt(curr, &next);
 
-		next.x = FP_MAX(0, 1.0 - r_inv)*next.x + FP_MIN(1.0, r_inv)*curr->x;
-		next.y = FP_MAX(0, 1.0 - r_inv)*next.y + FP_MIN(1.0, r_inv)*curr->y;
-		next.z = FP_MAX(0, 1.0 - r_inv)*next.z + FP_MIN(1.0, r_inv)*curr->z;
+		curr->x = next.x;
+		curr->y = next.y;
+		curr->z = next.z;
 	}
 
-	delta = distance3d_pt_pt((POINT3D*)curr, (POINT3D*)&next);
-
-	curr->x = next.x;
-	curr->y = next.y;
-	curr->z = next.z;
-
 	return delta;
 }
 
-static POINT4D
-init_guess(const POINT4D* points, size_t npoints)
+static POINT3D
+init_guess(const POINT4D* points, uint32_t npoints)
 {
-	POINT4D guess = { 0, 0, 0, 0 };
-	size_t i;
+	assert(npoints > 0);
+	POINT3D guess = { 0, 0, 0 };
+	double mass = 0;
+	uint32_t i;
 	for (i = 0; i < npoints; i++)
 	{
 		guess.x += points[i].x * points[i].m;
 		guess.y += points[i].y * points[i].m;
 		guess.z += points[i].z * points[i].m;
-		guess.m += points[i].m;
+		mass += points[i].m;
 	}
-	if (!FP_IS_ZERO(guess.m))
-	{
-		guess.x /= guess.m;
-		guess.y /= guess.m;
-		guess.z /= guess.m;
-	}
+	guess.x /= mass;
+	guess.y /= mass;
+	guess.z /= mass;
 	return guess;
 }
 
 static POINT4D*
-lwmpoint_extract_points_4d(const LWMPOINT* g, size_t* ngeoms)
+lwmpoint_extract_points_4d(const LWMPOINT* g, uint32_t* npoints, int* input_empty)
 {
-	size_t i;
-	size_t n = 0;
-	int is_3d = lwgeom_has_z((LWGEOM*) g);
+	uint32_t i;
+	uint32_t n = 0;
+	POINT4D* points = lwalloc(g->ngeoms * sizeof(POINT4D));
 	int has_m = lwgeom_has_m((LWGEOM*) g);
 
-	POINT4D* points = lwalloc(g->ngeoms * sizeof(POINT4D));
 	for (i = 0; i < g->ngeoms; i++)
 	{
 		LWGEOM* subg = lwcollection_getsubgeom((LWCOLLECTION*) g, i);
 		if (!lwgeom_is_empty(subg))
 		{
-			getPoint4d_p(((LWPOINT*) subg)->point, 0, (POINT4D*) &points[n]);
-			if (!is_3d)
+			*input_empty = LW_FALSE;
+			if (!getPoint4d_p(((LWPOINT*) subg)->point, 0, &points[n]))
 			{
-				points[n].z = 0.0; /* in case the getPoint functions return NaN in the future for 2d */
+				lwerror("Geometric median: getPoint4d_p reported failure on point (POINT(%g %g %g %g), number %d of %d in input).", points[n].x, points[n].y, points[n].z, points[n].m, i, g->ngeoms);
+				lwfree(points);
+				return NULL;
 			}
-			if (!has_m)
+			if (has_m)
 			{
-				points[n].m = 1.0;
-				n++;
+				/* This limitation on non-negativity can be lifted
+				 * by replacing Weiszfeld algorithm with different one.
+				 * Possible option described in:
+				 *
+				 * Drezner, Zvi & O. Wesolowsky, George. (1991).
+				 * The Weber Problem On The Plane With Some Negative Weights.
+				 * INFOR. Information Systems and Operational Research.
+				 * 29. 10.1080/03155986.1991.11732158.
+				 */
+				if (points[n].m < 0)
+				{
+					lwerror("Geometric median input contains points with negative weights (POINT(%g %g %g %g), number %d of %d in input). Implementation can't guarantee global minimum convergence.", points[n].x, points[n].y, points[n].z, points[n].m, i, g->ngeoms);
+					lwfree(points);
+					return NULL;
+				}
+
+				/* points with zero weight are not going to affect calculation, drop them early */
+				if (points[n].m > DBL_EPSILON) n++;
 			}
 			else
 			{
-				/* points with zero weight are not going to affect calculation, drop them early */
-				if (!FP_IS_ZERO(points[n].m))
-				{
-					n++;
-				}
+				points[n].m = 1.0;
+				n++;
 			}
 		}
 	}
 
-	if (ngeoms != NULL)
-		*ngeoms = n;
+#if PARANOIA_LEVEL > 0
+	/* check Z=0 for 2D inputs*/
+	if (!lwgeom_has_z((LWGEOM*) g)) for (i = 0; i < n; i++) assert(points[i].z == 0);
+#endif
 
+	*npoints = n;
 	return points;
 }
 
@@ -187,44 +204,34 @@
 LWPOINT*
 lwmpoint_median(const LWMPOINT* g, double tol, uint32_t max_iter, char fail_if_not_converged)
 {
-	size_t npoints; /* we need to count this ourselves so we can exclude empties and weightless points */
-	size_t i;
+	/* m ordinate is considered weight, if defined */
+	uint32_t npoints = 0; /* we need to count this ourselves so we can exclude empties and weightless points */
+	uint32_t i;
+	int input_empty = LW_TRUE;
 	double delta = DBL_MAX;
-	double* distances;
-	POINT4D* points = lwmpoint_extract_points_4d(g, &npoints); /* m ordinate is considered weight, if defined */
-	POINT4D median;
+	POINT3D median;
+	double* distances = NULL;
+	POINT4D* points = lwmpoint_extract_points_4d(g, &npoints, &input_empty);
 
+	/* input validation failed, error reported already */
+	if (points == NULL) return NULL;
+
 	if (npoints == 0)
 	{
 		lwfree(points);
-		if (fail_if_not_converged)
+		if (input_empty)
 		{
-			lwerror("Median failed to find input points with weight.");
-			return NULL;
+			return lwpoint_construct_empty(g->srid, 0, 0);
 		}
 		else
 		{
-			return lwpoint_construct_empty(g->srid, 0, 0);
+			lwerror("Median failed to find non-empty input points with positive weight.");
+			return NULL;
 		}
 	}
 
 	median = init_guess(points, npoints);
 
-	/* negative total weight means median cannot converge */
-	if (median.m <= 0.0)
-	{
-		lwfree(points);
-		if (fail_if_not_converged)
-		{
-			lwerror("Median cannot converge for input with negative sum of weights.");
-			return NULL;
-		}
-		else
-		{
-			return lwpoint_construct_empty(g->srid, 0, 0);
-		}
-	}
-
 	distances = lwalloc(npoints * sizeof(double));
 
 	for (i = 0; i < max_iter && delta > tol; i++)
@@ -232,8 +239,8 @@
 		delta = iterate_4d(&median, points, npoints, distances);
 	}
 
+	lwfree(distances);
 	lwfree(points);
-	lwfree(distances);
 
 	if (fail_if_not_converged && delta > tol)
 	{



More information about the postgis-tickets mailing list