[postgis-tickets] r16222 - Disable negative values in weighted geometric median.
Darafei
komzpa at gmail.com
Fri Jan 5 07:22:57 PST 2018
Author: komzpa
Date: 2018-01-05 07:22:57 -0800 (Fri, 05 Jan 2018)
New Revision: 16222
Modified:
trunk/liblwgeom/cunit/cu_algorithm.c
trunk/liblwgeom/cunit/cu_tester.c
trunk/liblwgeom/lwgeom_geos.c
trunk/liblwgeom/lwgeom_median.c
Log:
Disable negative values in weighted geometric median.
Reviewed by Dan Baston
Closes https://github.com/postgis/postgis/pull/176
Modified: trunk/liblwgeom/cunit/cu_algorithm.c
===================================================================
--- trunk/liblwgeom/cunit/cu_algorithm.c 2018-01-05 13:16:12 UTC (rev 16221)
+++ trunk/liblwgeom/cunit/cu_algorithm.c 2018-01-05 15:22:57 UTC (rev 16222)
@@ -3,6 +3,7 @@
* PostGIS - Spatial Types for PostgreSQL
* http://postgis.net
* Copyright 2008 Paul Ramsey
+ * Copyright 2018 Darafei Praliaskouski, me at komzpa.net
*
* This is free software; you can redistribute and/or modify it under
* the terms of the GNU General Public Licence. See the COPYING file.
@@ -1116,6 +1117,7 @@
static void do_median_test(char* input, char* expected, int fail_if_not_converged, int iter_count)
{
+ cu_error_msg_reset();
LWGEOM* g = lwgeom_from_wkt(input, LW_PARSER_CHECK_NONE);
LWPOINT* expected_result = NULL;
POINT4D actual_pt;
@@ -1147,7 +1149,7 @@
passed = passed && (!lwgeom_has_m((LWGEOM*) expected_result) || FP_EQUALS(actual_pt.m, expected_pt.m));
}
if (!passed)
- printf("median_test expected %s got %s\n", lwgeom_to_ewkt((LWGEOM*) expected_result), lwgeom_to_ewkt((LWGEOM*) result));
+ printf("median_test input %s (parsed %s) expected %s got %s\n", input, lwgeom_to_ewkt(g), lwgeom_to_ewkt((LWGEOM*) expected_result), lwgeom_to_ewkt((LWGEOM*) result));
}
else if (result == NULL && expected == NULL) /* got nothing, expecting nothing */
{
@@ -1156,12 +1158,13 @@
else if (result != NULL && expected == NULL) /* got something, expecting nothing */
{
passed = LW_FALSE;
- printf("median_test expected NULL got %s\n", lwgeom_to_ewkt((LWGEOM*) result));
+ printf("median_test input %s (parsed %s) expected NULL got %s\n", input, lwgeom_to_ewkt(g), lwgeom_to_ewkt((LWGEOM*) result));
}
else if (result == NULL && expected != NULL) /* got nothing, expecting something */
{
passed = LW_FALSE;
- printf("median_test expected %s got NULL\n", lwgeom_to_ewkt((LWGEOM*) expected_result));
+ printf("%s", cu_error_msg);
+ printf("median_test input %s (parsed %s) expected %s got NULL\n", input, lwgeom_to_ewkt(g), lwgeom_to_ewkt((LWGEOM*) expected_result));
}
CU_ASSERT_TRUE(passed);
@@ -1199,21 +1202,26 @@
/* Empty input */
do_median_test("MULTIPOINT EMPTY", "POINT EMPTY", LW_FALSE, 1000);
do_median_test("MULTIPOINT (EMPTY)", "POINT EMPTY", LW_FALSE, 1000);
- do_median_test("MULTIPOINT EMPTY", NULL, LW_TRUE, 1000);
- do_median_test("MULTIPOINT (EMPTY)", NULL, LW_TRUE, 1000);
+ do_median_test("MULTIPOINT EMPTY", "POINT EMPTY", LW_TRUE, 1000);
+ do_median_test("MULTIPOINT (EMPTY)", "POINT EMPTY", LW_TRUE, 1000);
+ do_median_test("MULTIPOINT ZM (1 -1 3 1, 1 0 2 7, 2 1 1 1, EMPTY)", "POINT (1 0 2)", LW_TRUE, 1000);
/* Weighted input */
- do_median_test("MULTIPOINT ((1 -1 3 1), (1 0 2 7), (2 1 1 1))", "POINT (1 0 2)", LW_TRUE, 1000);
+ do_median_test("MULTIPOINT ZM (1 -1 3 1, 1 0 2 7, 2 1 1 1)", "POINT (1 0 2)", LW_TRUE, 1000);
+ do_median_test("MULTIPOINT ZM (-1 1 -3 1, -1 0 -2 7, -2 -1 -1 1)", "POINT (-1 0 -2)", LW_TRUE, 1000);
+ do_median_test("MULTIPOINT ZM (-1 1 -3 1, -1 0 -2 7, -2 -1 -1 0.5, -2 -1 -1 0.5)", "POINT (-1 0 -2)", LW_TRUE, 1000);
+
/* Point that is replaced by two half-weighted */
- do_median_test("MULTIPOINT ((0 -1 0 1), (0 0 0 1), (0 1 0 0.5), (0 1 0 0.5))", "POINT (0 0 0)", LW_TRUE, 1000);
+ do_median_test("MULTIPOINT ZM ((0 -1 0 1), (0 0 0 1), (0 1 0 0.5), (0 1 0 0.5))", "POINT (0 0 0)", LW_TRUE, 1000);
/* Point is doubled and then erased by negative weight */
- do_median_test("MULTIPOINT ((1 -1 3 1), (1 0 2 7), (2 1 1 2), (2 1 1 -1))", "POINT (1 0 2)", LW_TRUE, 1000);
- /* Weightless input is empty */
- do_median_test("MULTIPOINT ((0 -1 0 0), (0 0 0 0), (0 0 0 0), (0 1 0 0))", "POINT EMPTY", LW_FALSE, 1000);
- do_median_test("MULTIPOINT ((0 -1 0 0), (0 0 0 0), (0 0 0 0), (0 1 0 0))", NULL, LW_TRUE, 1000);
+ do_median_test("MULTIPOINT ZM ((1 -1 3 1), (1 0 2 7), (2 1 1 2), (2 1 1 -1))", NULL, LW_TRUE, 1000);
+ do_median_test("MULTIPOINT ZM ((1 -1 3 1), (1 0 2 7), (2 1 1 2), (2 1 1 -1))", NULL, LW_FALSE, 1000);
+ /* Weightless input won't converge */
+ do_median_test("MULTIPOINT ZM ((0 -1 0 0), (0 0 0 0), (0 0 0 0), (0 1 0 0))", NULL, LW_FALSE, 1000);
+ do_median_test("MULTIPOINT ZM ((0 -1 0 0), (0 0 0 0), (0 0 0 0), (0 1 0 0))", NULL, LW_TRUE, 1000);
/* Negative weight won't converge */
- do_median_test("MULTIPOINT ((0 -1 0 -1), (0 0 0 -1), (0 1 0 -1))", "POINT EMPTY", LW_FALSE, 1000);
- do_median_test("MULTIPOINT ((0 -1 0 -1), (0 0 0 -1), (0 1 0 -1))", NULL, LW_TRUE, 1000);
+ do_median_test("MULTIPOINT ZM ((0 -1 0 -1), (0 0 0 -1), (0 1 0 -1))", NULL, LW_FALSE, 1000);
+ do_median_test("MULTIPOINT ZM ((0 -1 0 -1), (0 0 0 -1), (0 1 0 -1))", NULL, LW_TRUE, 1000);
/* Bind convergence too tightly */
do_median_test("MULTIPOINT ((0 0), (1 1), (0 1), (2 2))", "POINT(0.75 1.0)", LW_FALSE, 0);
Modified: trunk/liblwgeom/cunit/cu_tester.c
===================================================================
--- trunk/liblwgeom/cunit/cu_tester.c 2018-01-05 13:16:12 UTC (rev 16221)
+++ trunk/liblwgeom/cunit/cu_tester.c 2018-01-05 15:22:57 UTC (rev 16222)
@@ -140,7 +140,7 @@
char *suite_name;
CU_pSuite suite_to_run;
char *test_name;
- CU_pTest test_to_run;
+ CU_pTest test_to_run = NULL;
CU_ErrorCode errCode = 0;
CU_pTestRegistry registry;
int num_run;
@@ -206,7 +206,7 @@
}
else
{
- if (test_name != NULL)
+ if (test_name != NULL && test_to_run != NULL)
{
/* Run only this test. */
printf("\nRunning test '%s' in suite '%s'.\n", test_name, suite_name);
Modified: trunk/liblwgeom/lwgeom_geos.c
===================================================================
--- trunk/liblwgeom/lwgeom_geos.c 2018-01-05 13:16:12 UTC (rev 16221)
+++ trunk/liblwgeom/lwgeom_geos.c 2018-01-05 15:22:57 UTC (rev 16222)
@@ -356,9 +356,6 @@
GEOSCoordSeq sq;
GEOSGeom g, shell;
GEOSGeom *geoms = NULL;
- /*
- LWGEOM *tmp;
- */
uint32_t ngeoms, i, j;
int geostype;
#if LWDEBUG_LEVEL >= 4
@@ -375,13 +372,13 @@
return g;
}
+ LWPOINT *lwp = NULL;
+ LWPOLY *lwpoly = NULL;
+ LWLINE *lwl = NULL;
+ LWCOLLECTION *lwc = NULL;
+
switch (lwgeom->type)
{
- LWPOINT *lwp = NULL;
- LWPOLY *lwpoly = NULL;
- LWLINE *lwl = NULL;
- LWCOLLECTION *lwc = NULL;
-
case POINTTYPE:
lwp = (LWPOINT *)lwgeom;
Modified: trunk/liblwgeom/lwgeom_median.c
===================================================================
--- trunk/liblwgeom/lwgeom_median.c 2018-01-05 13:16:12 UTC (rev 16221)
+++ trunk/liblwgeom/lwgeom_median.c 2018-01-05 15:22:57 UTC (rev 16222)
@@ -23,15 +23,16 @@
*
**********************************************************************/
+#include <assert.h>
#include <float.h>
#include "liblwgeom_internal.h"
#include "lwgeom_log.h"
static void
-calc_weighted_distances_3d(const POINT3D* curr, const POINT4D* points, size_t npoints, double* distances)
+calc_weighted_distances_3d(const POINT3D* curr, const POINT4D* points, uint32_t npoints, double* distances)
{
- size_t i;
+ uint32_t i;
for (i = 0; i < npoints; i++)
{
distances[i] = distance3d_pt_pt(curr, (POINT3D*)&points[i]) / points[i].m;
@@ -39,20 +40,20 @@
}
static double
-iterate_4d(POINT4D* curr, const POINT4D* points, size_t npoints, double* distances)
+iterate_4d(POINT3D* curr, const POINT4D* points, uint32_t npoints, double* distances)
{
- size_t i;
- POINT4D next = { 0, 0, 0, 1};
- double delta;
+ uint32_t i;
+ POINT3D next = { 0, 0, 0 };
+ double delta = 0;
double denom = 0;
char hit = LW_FALSE;
- calc_weighted_distances_3d((POINT3D*)curr, points, npoints, distances);
+ calc_weighted_distances_3d(curr, points, npoints, distances);
for (i = 0; i < npoints; i++)
{
/* we need to use lower epsilon than in FP_IS_ZERO in the loop for calculation to converge */
- if (fabs(distances[i]) > DBL_EPSILON)
+ if (distances[i] > DBL_EPSILON)
{
next.x += points[i].x / distances[i];
next.y += points[i].y / distances[i];
@@ -64,122 +65,138 @@
hit = LW_TRUE;
}
}
- if (fabs(denom) > DBL_EPSILON)
+ /* negative weight shouldn't get here */
+ assert(denom >= 0);
+
+ /* denom is zero in case of multipoint of single point when we've converged perfectly */
+ if (denom > DBL_EPSILON)
{
next.x /= denom;
next.y /= denom;
next.z /= denom;
- }
- else
- {
- hit = LW_TRUE;
- }
- /* If any of the intermediate points in the calculation is found in the
- * set of input points, the standard Weiszfeld method gets stuck with a
- * divide-by-zero.
- *
- * To get ourselves out of the hole, we follow an alternate procedure to
- * get the next iteration, as described in:
- *
- * Vardi, Y. and Zhang, C. (2011) "A modified Weiszfeld algorithm for the
- * Fermat-Weber location problem." Math. Program., Ser. A 90: 559-566.
- * DOI 10.1007/s101070100222
- *
- * Available online at the time of this writing at
- * http://www.stat.rutgers.edu/home/cunhui/papers/43.pdf
- */
- if (hit)
- {
- double dx = 0;
- double dy = 0;
- double dz = 0;
- double r_inv;
- for (i = 0; i < npoints; i++)
+ /* If any of the intermediate points in the calculation is found in the
+ * set of input points, the standard Weiszfeld method gets stuck with a
+ * divide-by-zero.
+ *
+ * To get ourselves out of the hole, we follow an alternate procedure to
+ * get the next iteration, as described in:
+ *
+ * Vardi, Y. and Zhang, C. (2011) "A modified Weiszfeld algorithm for the
+ * Fermat-Weber location problem." Math. Program., Ser. A 90: 559-566.
+ * DOI 10.1007/s101070100222
+ *
+ * Available online at the time of this writing at
+ * http://www.stat.rutgers.edu/home/cunhui/papers/43.pdf
+ */
+ if (hit)
{
- if (fabs(distances[i]) > DBL_EPSILON)
+ double dx = 0;
+ double dy = 0;
+ double dz = 0;
+ double r_inv;
+ for (i = 0; i < npoints; i++)
{
- dx += (points[i].x - curr->x) / distances[i];
- dy += (points[i].y - curr->y) / distances[i];
- dz += (points[i].y - curr->z) / distances[i];
+ if (distances[i] > DBL_EPSILON)
+ {
+ dx += (points[i].x - curr->x) / distances[i];
+ dy += (points[i].y - curr->y) / distances[i];
+ dz += (points[i].y - curr->z) / distances[i];
+ }
}
+
+ r_inv = 1.0 / sqrt ( dx*dx + dy*dy + dz*dz );
+
+ next.x = FP_MAX(0, 1.0 - r_inv)*next.x + FP_MIN(1.0, r_inv)*curr->x;
+ next.y = FP_MAX(0, 1.0 - r_inv)*next.y + FP_MIN(1.0, r_inv)*curr->y;
+ next.z = FP_MAX(0, 1.0 - r_inv)*next.z + FP_MIN(1.0, r_inv)*curr->z;
}
- r_inv = 1.0 / sqrt ( dx*dx + dy*dy + dz*dz );
+ delta = distance3d_pt_pt(curr, &next);
- next.x = FP_MAX(0, 1.0 - r_inv)*next.x + FP_MIN(1.0, r_inv)*curr->x;
- next.y = FP_MAX(0, 1.0 - r_inv)*next.y + FP_MIN(1.0, r_inv)*curr->y;
- next.z = FP_MAX(0, 1.0 - r_inv)*next.z + FP_MIN(1.0, r_inv)*curr->z;
+ curr->x = next.x;
+ curr->y = next.y;
+ curr->z = next.z;
}
- delta = distance3d_pt_pt((POINT3D*)curr, (POINT3D*)&next);
-
- curr->x = next.x;
- curr->y = next.y;
- curr->z = next.z;
-
return delta;
}
-static POINT4D
-init_guess(const POINT4D* points, size_t npoints)
+static POINT3D
+init_guess(const POINT4D* points, uint32_t npoints)
{
- POINT4D guess = { 0, 0, 0, 0 };
- size_t i;
+ assert(npoints > 0);
+ POINT3D guess = { 0, 0, 0 };
+ double mass = 0;
+ uint32_t i;
for (i = 0; i < npoints; i++)
{
guess.x += points[i].x * points[i].m;
guess.y += points[i].y * points[i].m;
guess.z += points[i].z * points[i].m;
- guess.m += points[i].m;
+ mass += points[i].m;
}
- if (!FP_IS_ZERO(guess.m))
- {
- guess.x /= guess.m;
- guess.y /= guess.m;
- guess.z /= guess.m;
- }
+ guess.x /= mass;
+ guess.y /= mass;
+ guess.z /= mass;
return guess;
}
static POINT4D*
-lwmpoint_extract_points_4d(const LWMPOINT* g, size_t* ngeoms)
+lwmpoint_extract_points_4d(const LWMPOINT* g, uint32_t* npoints, int* input_empty)
{
- size_t i;
- size_t n = 0;
- int is_3d = lwgeom_has_z((LWGEOM*) g);
+ uint32_t i;
+ uint32_t n = 0;
+ POINT4D* points = lwalloc(g->ngeoms * sizeof(POINT4D));
int has_m = lwgeom_has_m((LWGEOM*) g);
- POINT4D* points = lwalloc(g->ngeoms * sizeof(POINT4D));
for (i = 0; i < g->ngeoms; i++)
{
LWGEOM* subg = lwcollection_getsubgeom((LWCOLLECTION*) g, i);
if (!lwgeom_is_empty(subg))
{
- getPoint4d_p(((LWPOINT*) subg)->point, 0, (POINT4D*) &points[n]);
- if (!is_3d)
+ *input_empty = LW_FALSE;
+ if (!getPoint4d_p(((LWPOINT*) subg)->point, 0, &points[n]))
{
- points[n].z = 0.0; /* in case the getPoint functions return NaN in the future for 2d */
+ lwerror("Geometric median: getPoint4d_p reported failure on point (POINT(%g %g %g %g), number %d of %d in input).", points[n].x, points[n].y, points[n].z, points[n].m, i, g->ngeoms);
+ lwfree(points);
+ return NULL;
}
- if (!has_m)
+ if (has_m)
{
- points[n].m = 1.0;
- n++;
+ /* This limitation on non-negativity can be lifted
+ * by replacing Weiszfeld algorithm with different one.
+ * Possible option described in:
+ *
+ * Drezner, Zvi & O. Wesolowsky, George. (1991).
+ * The Weber Problem On The Plane With Some Negative Weights.
+ * INFOR. Information Systems and Operational Research.
+ * 29. 10.1080/03155986.1991.11732158.
+ */
+ if (points[n].m < 0)
+ {
+ lwerror("Geometric median input contains points with negative weights (POINT(%g %g %g %g), number %d of %d in input). Implementation can't guarantee global minimum convergence.", points[n].x, points[n].y, points[n].z, points[n].m, i, g->ngeoms);
+ lwfree(points);
+ return NULL;
+ }
+
+ /* points with zero weight are not going to affect calculation, drop them early */
+ if (points[n].m > DBL_EPSILON) n++;
}
else
{
- /* points with zero weight are not going to affect calculation, drop them early */
- if (!FP_IS_ZERO(points[n].m))
- {
- n++;
- }
+ points[n].m = 1.0;
+ n++;
}
}
}
- if (ngeoms != NULL)
- *ngeoms = n;
+#if PARANOIA_LEVEL > 0
+ /* check Z=0 for 2D inputs*/
+ if (!lwgeom_has_z((LWGEOM*) g)) for (i = 0; i < n; i++) assert(points[i].z == 0);
+#endif
+ *npoints = n;
return points;
}
@@ -187,44 +204,34 @@
LWPOINT*
lwmpoint_median(const LWMPOINT* g, double tol, uint32_t max_iter, char fail_if_not_converged)
{
- size_t npoints; /* we need to count this ourselves so we can exclude empties and weightless points */
- size_t i;
+ /* m ordinate is considered weight, if defined */
+ uint32_t npoints = 0; /* we need to count this ourselves so we can exclude empties and weightless points */
+ uint32_t i;
+ int input_empty = LW_TRUE;
double delta = DBL_MAX;
- double* distances;
- POINT4D* points = lwmpoint_extract_points_4d(g, &npoints); /* m ordinate is considered weight, if defined */
- POINT4D median;
+ POINT3D median;
+ double* distances = NULL;
+ POINT4D* points = lwmpoint_extract_points_4d(g, &npoints, &input_empty);
+ /* input validation failed, error reported already */
+ if (points == NULL) return NULL;
+
if (npoints == 0)
{
lwfree(points);
- if (fail_if_not_converged)
+ if (input_empty)
{
- lwerror("Median failed to find input points with weight.");
- return NULL;
+ return lwpoint_construct_empty(g->srid, 0, 0);
}
else
{
- return lwpoint_construct_empty(g->srid, 0, 0);
+ lwerror("Median failed to find non-empty input points with positive weight.");
+ return NULL;
}
}
median = init_guess(points, npoints);
- /* negative total weight means median cannot converge */
- if (median.m <= 0.0)
- {
- lwfree(points);
- if (fail_if_not_converged)
- {
- lwerror("Median cannot converge for input with negative sum of weights.");
- return NULL;
- }
- else
- {
- return lwpoint_construct_empty(g->srid, 0, 0);
- }
- }
-
distances = lwalloc(npoints * sizeof(double));
for (i = 0; i < max_iter && delta > tol; i++)
@@ -232,8 +239,8 @@
delta = iterate_4d(&median, points, npoints, distances);
}
+ lwfree(distances);
lwfree(points);
- lwfree(distances);
if (fail_if_not_converged && delta > tol)
{
More information about the postgis-tickets
mailing list