[postgis-tickets] r16188 - Weight-aware ST_GeometricMedian
Darafei
komzpa at gmail.com
Tue Dec 26 06:07:49 PST 2017
Author: komzpa
Date: 2017-12-26 06:07:49 -0800 (Tue, 26 Dec 2017)
New Revision: 16188
Modified:
trunk/NEWS
trunk/doc/reference_measure.xml
trunk/liblwgeom/cunit/cu_algorithm.c
trunk/liblwgeom/lwgeom_median.c
Log:
Weight-aware ST_GeometricMedian
Weight is to be supplied as M ordinate of individual points in MultiPoint.
Bring lwgeom_median.c cunit test coverage to 100%.
ST_GeometricMedian(fail_if_not_converged=false, max_iter=0)
might give you weighted centroid, but that's not a promise.
Closes #3954
Closes https://github.com/postgis/postgis/pull/176
Modified: trunk/NEWS
===================================================================
--- trunk/NEWS 2017-12-25 22:05:39 UTC (rev 16187)
+++ trunk/NEWS 2017-12-26 14:07:49 UTC (rev 16188)
@@ -28,6 +28,7 @@
- #1014, Hashable geometry, allowing direct use in CTE signatures (Paul Ramsey)
- #3097, Really allow MULTILINESTRING blades in ST_Split() (Paul Ramsey)
- #3942, geojson: Do not include private header for json-c >= 0.13 (Björn Esser)
+ - #3954, ST_GeometricMedian now supports point weights (Darafei Praliaskouski)
PostGIS 2.4.0
Modified: trunk/doc/reference_measure.xml
===================================================================
--- trunk/doc/reference_measure.xml 2017-12-25 22:05:39 UTC (rev 16187)
+++ trunk/doc/reference_measure.xml 2017-12-26 14:07:49 UTC (rev 16188)
@@ -820,7 +820,7 @@
</refsection>
</refentry>
-
+
<refentry id="ST_Angle">
<refnamediv>
<refname>ST_Angle</refname>
@@ -835,7 +835,7 @@
<paramdef><type>geometry </type><parameter>point2</parameter></paramdef>
<paramdef><type>geometry </type><parameter>point3</parameter></paramdef>
<paramdef choice="opt"><type>geometry </type><parameter>point4</parameter></paramdef>
- </funcprototype>
+ </funcprototype>
<funcprototype>
<funcdef>float <function>ST_Angle</function></funcdef>
<paramdef><type>geometry </type><parameter>line1</parameter></paramdef>
@@ -850,12 +850,12 @@
If input are 2 lines, get first and last point of the lines as 4 points.
For 4 points,compute the angle measured clockwise of P1P2,P3P4.
Results are always positive, between 0 and 2*Pi radians.
-
+
Uses azimuth of pairs or points.
</para>
- <para>ST_Angle(P1,P2,P3) = ST_Angle(P2,P1,P2,P3)</para>
- <para>Result is in radian and can be converted to degrees using a built-in PostgreSQL function degrees(), as shown in the example.</para>
- <para>Availability: 2.5.0</para>
+ <para>ST_Angle(P1,P2,P3) = ST_Angle(P2,P1,P2,P3)</para>
+ <para>Result is in radian and can be converted to degrees using a built-in PostgreSQL function degrees(), as shown in the example.</para>
+ <para>Availability: 2.5.0</para>
</refsection>
<refsection>
@@ -867,24 +867,24 @@
, random() * 2 * PI() AS rad2
FROM generate_series(1,2,2) AS s
)
- , points AS (
+ , points AS (
SELECT s, rad1,rad2, ST_MakePoint(cos1+s,sin1+s) as p1, ST_MakePoint(s,s) AS p2, ST_MakePoint(cos2+s,sin2+s) as p3
- FROM rand
- ,cos(rad1) cos1, sin(rad1) sin1
+ FROM rand
+ ,cos(rad1) cos1, sin(rad1) sin1
,cos(rad2) cos2, sin(rad2) sin2
)
SELECT s, ST_AsText(ST_SnapToGrid(ST_MakeLine(ARRAY[p1,p2,p3]),0.001)) AS line
, degrees(ST_Angle(p1,p2,p3)) as computed_angle
- , round(degrees(2*PI()-rad2 -2*PI()+rad1+2*PI()))::int%360 AS reference
- , round(degrees(2*PI()-rad2 -2*PI()+rad1+2*PI()))::int%360 AS reference
+ , round(degrees(2*PI()-rad2 -2*PI()+rad1+2*PI()))::int%360 AS reference
+ , round(degrees(2*PI()-rad2 -2*PI()+rad1+2*PI()))::int%360 AS reference
FROM points ;
1 | line | computed_angle | reference
------------------+------------------
1 | LINESTRING(1.511 1.86,1 1,0.896 0.005) | 155.27033848688 | 155
-</programlisting>
- </refsection>
+</programlisting>
+ </refsection>
</refentry>
<refentry id="ST_Centroid">
@@ -3281,25 +3281,31 @@
<refsection>
<title>Description</title>
- <para>
+ <para>
Computes the approximate geometric median of a MultiPoint geometry
using the Weiszfeld algorithm. The geometric median provides a
centrality measure that is less sensitive to outlier points than
the centroid.
-
+ </para>
+ <para>
The algorithm will iterate until the distance change between
successive iterations is less than the supplied <varname>tolerance</varname>
parameter. If this condition has not been met after <varname>max_iterations</varname>
iterations, the function will produce an error and exit, unless <varname>fail_if_not_converged</varname>
is set to false.
-
- If a tolerance value is not provided, a default tolerance value
+ </para>
+ <para>
+ If a <varname>tolerance</varname> value is not provided, a default tolerance value
will be calculated based on the extent of the input geometry.
- </para>
-
- <para>Availability: 2.3.0</para>
- <para>&Z_support;</para>
- </refsection>
+ </para>
+ <para>
+ M value of points, if present, is interpreted as their relative weight.
+ </para>
+ <para>Availability: 2.3.0</para>
+ <para>Enhanced: 2.5.0 Added support for M as weight of points.</para>
+ <para>&Z_support;</para>
+ <para>&M_support;</para>
+ </refsection>
<refsection>
<title>Examples</title>
<para>
Modified: trunk/liblwgeom/cunit/cu_algorithm.c
===================================================================
--- trunk/liblwgeom/cunit/cu_algorithm.c 2017-12-25 22:05:39 UTC (rev 16187)
+++ trunk/liblwgeom/cunit/cu_algorithm.c 2017-12-26 14:07:49 UTC (rev 16188)
@@ -1113,32 +1113,56 @@
do_median_dims_check("MULTIPOINT ZM ((1 3 4 5), (4 7 8 6), (2 9 1 7), (0 4 4 8), (2 2 3 9))", 3);
}
-static void do_median_test(char* input, char* expected)
+static void do_median_test(char* input, char* expected, int fail_if_not_converged, int iter_count)
{
LWGEOM* g = lwgeom_from_wkt(input, LW_PARSER_CHECK_NONE);
- LWPOINT* expected_result = lwgeom_as_lwpoint(lwgeom_from_wkt(expected, LW_PARSER_CHECK_NONE));
- POINT3DZ actual_pt;
- POINT3DZ expected_pt;
+ LWPOINT* expected_result = NULL;
+ POINT4D actual_pt;
+ POINT4D expected_pt;
- LWPOINT* result = lwgeom_median(g, FP_TOLERANCE / 10.0, 1000, LW_TRUE);
- int passed = LW_TRUE;
+ LWPOINT* result = lwgeom_median(g, FP_TOLERANCE / 10.0, iter_count, fail_if_not_converged);
+ int passed = LW_FALSE;
- lwpoint_getPoint3dz_p(result, &actual_pt);
- lwpoint_getPoint3dz_p(expected_result, &expected_pt);
+ if (expected != NULL)
+ {
+ expected_result = lwgeom_as_lwpoint(lwgeom_from_wkt(expected, LW_PARSER_CHECK_NONE));
+ lwpoint_getPoint4d_p(expected_result, &expected_pt);
+ }
+ if (result != NULL)
+ {
+ lwpoint_getPoint4d_p(result, &actual_pt);
+ }
- passed = passed && lwgeom_is_empty((LWGEOM*) expected_result) == lwgeom_is_empty((LWGEOM*) result);
- passed = passed && (lwgeom_has_z((LWGEOM*) expected_result) == lwgeom_has_z((LWGEOM*) result));
-
- if (!lwgeom_is_empty((LWGEOM*) result))
+ if (result != NULL && expected != NULL) /* got something, expecting something */
{
- passed = passed && FP_EQUALS(actual_pt.x, expected_pt.x);
- passed = passed && FP_EQUALS(actual_pt.y, expected_pt.y);
- passed = passed && (!lwgeom_has_z((LWGEOM*) expected_result) || FP_EQUALS(actual_pt.z, expected_pt.z));
+ passed = LW_TRUE;
+ passed = passed && lwgeom_is_empty((LWGEOM*) expected_result) == lwgeom_is_empty((LWGEOM*) result);
+ passed = passed && (lwgeom_has_z((LWGEOM*) expected_result) == lwgeom_has_z((LWGEOM*) result));
+ if (!lwgeom_is_empty((LWGEOM*) result))
+ {
+ passed = passed && FP_EQUALS(actual_pt.x, expected_pt.x);
+ passed = passed && FP_EQUALS(actual_pt.y, expected_pt.y);
+ passed = passed && (!lwgeom_has_z((LWGEOM*) expected_result) || FP_EQUALS(actual_pt.z, expected_pt.z));
+ passed = passed && (!lwgeom_has_m((LWGEOM*) expected_result) || FP_EQUALS(actual_pt.m, expected_pt.m));
+ }
+ if (!passed)
+ printf("median_test expected %s got %s\n", lwgeom_to_ewkt((LWGEOM*) expected_result), lwgeom_to_ewkt((LWGEOM*) result));
}
+ else if (result == NULL && expected == NULL) /* got nothing, expecting nothing */
+ {
+ passed = LW_TRUE;
+ }
+ else if (result != NULL && expected == NULL) /* got something, expecting nothing */
+ {
+ passed = LW_FALSE;
+ printf("median_test expected NULL got %s\n", lwgeom_to_ewkt((LWGEOM*) result));
+ }
+ else if (result == NULL && expected != NULL) /* got nothing, expecting something */
+ {
+ passed = LW_FALSE;
+ printf("median_test expected %s got NULL\n", lwgeom_to_ewkt((LWGEOM*) expected_result));
+ }
- if (!passed)
- printf("median_test expected %s got %s\n", lwgeom_to_ewkt((LWGEOM*) expected_result), lwgeom_to_ewkt((LWGEOM*) result));
-
CU_ASSERT_TRUE(passed);
lwgeom_free(g);
@@ -1154,24 +1178,48 @@
* Because the algorithm uses the centroid as a starting point, this situation will
* occur in the test case below.
*/
- do_median_test("MULTIPOINT ((0 -1), (0 0), (0 1))", "POINT (0 0)");
+ do_median_test("MULTIPOINT ((0 -1), (0 0), (0 1))", "POINT (0 0)", LW_TRUE, 1000);
/* Same as above but 3D, and shifter */
- do_median_test("MULTIPOINT ((1 -1 3), (1 0 2), (2 1 1))", "POINT (1 0 2)");
+ do_median_test("MULTIPOINT ((1 -1 3), (1 0 2), (2 1 1))", "POINT (1 0 2)", LW_TRUE, 1000);
/* Starting point is duplicated */
- do_median_test("MULTIPOINT ((0 -1), (0 0), (0 0), (0 1))", "POINT (0 0)");
+ do_median_test("MULTIPOINT ((0 -1), (0 0), (0 0), (0 1))", "POINT (0 0)", LW_TRUE, 1000);
/* Cube */
do_median_test("MULTIPOINT ((10 10 10), (10 20 10), (20 10 10), (20 20 10), (10 10 20), (10 20 20), (20 10 20), (20 20 20))",
- "POINT (15 15 15)");
+ "POINT (15 15 15)", LW_TRUE, 1000);
/* Some edge cases */
- do_median_test("MULTIPOINT EMPTY", "POINT EMPTY");
- do_median_test("MULTIPOINT (EMPTY)", "POINT EMPTY");
- do_median_test("POINT (7 6)", "POINT (7 6)");
- do_median_test("POINT (7 6 2)", "POINT (7 6 2)");
- do_median_test("MULTIPOINT ((7 6 2), EMPTY)", "POINT (7 6 2)");
+ do_median_test("POINT (7 6)", "POINT (7 6)", LW_TRUE, 1000);
+ do_median_test("POINT (7 6 2)", "POINT (7 6 2)", LW_TRUE, 1000);
+ do_median_test("MULTIPOINT ((7 6 2), EMPTY)", "POINT (7 6 2)", LW_TRUE, 1000);
+
+ /* Empty input */
+ do_median_test("MULTIPOINT EMPTY", "POINT EMPTY", LW_FALSE, 1000);
+ do_median_test("MULTIPOINT (EMPTY)", "POINT EMPTY", LW_FALSE, 1000);
+ do_median_test("MULTIPOINT EMPTY", NULL, LW_TRUE, 1000);
+ do_median_test("MULTIPOINT (EMPTY)", NULL, LW_TRUE, 1000);
+
+ /* Weighted input */
+ do_median_test("MULTIPOINT ((1 -1 3 1), (1 0 2 7), (2 1 1 1))", "POINT (1 0 2)", LW_TRUE, 1000);
+ /* Point that is replaced by two half-weighted */
+ do_median_test("MULTIPOINT ((0 -1 0 1), (0 0 0 1), (0 1 0 0.5), (0 1 0 0.5))", "POINT (0 0 0)", LW_TRUE, 1000);
+ /* Point is doubled and then erased by negative weight */
+ do_median_test("MULTIPOINT ((1 -1 3 1), (1 0 2 7), (2 1 1 2), (2 1 1 -1))", "POINT (1 0 2)", LW_TRUE, 1000);
+ /* Weightless input is empty */
+ do_median_test("MULTIPOINT ((0 -1 0 0), (0 0 0 0), (0 0 0 0), (0 1 0 0))", "POINT EMPTY", LW_FALSE, 1000);
+ do_median_test("MULTIPOINT ((0 -1 0 0), (0 0 0 0), (0 0 0 0), (0 1 0 0))", NULL, LW_TRUE, 1000);
+ /* Negative weight won't converge */
+ do_median_test("MULTIPOINT ((0 -1 0 -1), (0 0 0 -1), (0 1 0 -1))", "POINT EMPTY", LW_FALSE, 1000);
+ do_median_test("MULTIPOINT ((0 -1 0 -1), (0 0 0 -1), (0 1 0 -1))", NULL, LW_TRUE, 1000);
+
+ /* Bind convergence too tightly */
+ do_median_test("MULTIPOINT ((0 0), (1 1), (0 1), (2 2))", "POINT(0.75 1.0)", LW_FALSE, 0);
+ do_median_test("MULTIPOINT ((0 0), (1 1), (0 1), (2 2))", NULL, LW_TRUE, 1);
+ /* Unsupported geometry type */
+ do_median_test("POLYGON((1 0,0 1,1 2,2 1,1 0))", NULL, LW_TRUE, 1000);
+ do_median_test("POLYGON((1 0,0 1,1 2,2 1,1 0))", NULL, LW_FALSE, 1000);
}
static void test_point_density(void)
Modified: trunk/liblwgeom/lwgeom_median.c
===================================================================
--- trunk/liblwgeom/lwgeom_median.c 2017-12-25 22:05:39 UTC (rev 16187)
+++ trunk/liblwgeom/lwgeom_median.c 2017-12-26 14:07:49 UTC (rev 16188)
@@ -19,6 +19,7 @@
**********************************************************************
*
* Copyright 2015 Daniel Baston <dbaston at gmail.com>
+ * Copyright 2017 Darafei Praliaskouski <me at komzpa.net>
*
**********************************************************************/
@@ -28,43 +29,51 @@
#include "lwgeom_log.h"
static void
-calc_distances_3d(const POINT3D* curr, const POINT3D* points, uint32_t npoints, double* distances)
+calc_weighted_distances_3d(const POINT3D* curr, const POINT4D* points, size_t npoints, double* distances)
{
- uint32_t i;
+ size_t i;
for (i = 0; i < npoints; i++)
{
- distances[i] = distance3d_pt_pt(curr, &points[i]);
+ distances[i] = distance3d_pt_pt(curr, (POINT3D*)&points[i]) / points[i].m;
}
}
static double
-iterate_3d(POINT3D* curr, const POINT3D* points, uint32_t npoints, double* distances)
+iterate_4d(POINT4D* curr, const POINT4D* points, size_t npoints, double* distances)
{
- uint32_t i;
- POINT3D next = { 0, 0, 0 };
+ size_t i;
+ POINT4D next = { 0, 0, 0, 1};
double delta;
double denom = 0;
char hit = LW_FALSE;
- calc_distances_3d(curr, points, npoints, distances);
+ calc_weighted_distances_3d((POINT3D*)curr, points, npoints, distances);
for (i = 0; i < npoints; i++)
{
- if (distances[i] == 0)
- hit = LW_TRUE;
+ /* we need to use lower epsilon than in FP_IS_ZERO in the loop for calculation to converge */
+ if (fabs(distances[i]) > DBL_EPSILON)
+ {
+ next.x += points[i].x / distances[i];
+ next.y += points[i].y / distances[i];
+ next.z += points[i].z / distances[i];
+ denom += 1.0 / distances[i];
+ }
else
- denom += 1.0 / distances[i];
- }
-
- for (i = 0; i < npoints; i++)
- {
- if (distances[i] > 0)
{
- next.x += (points[i].x / distances[i]) / denom;
- next.y += (points[i].y / distances[i]) / denom;
- next.z += (points[i].z / distances[i]) / denom;
+ hit = LW_TRUE;
}
}
+ if (fabs(denom) > DBL_EPSILON)
+ {
+ next.x /= denom;
+ next.y /= denom;
+ next.z /= denom;
+ }
+ else
+ {
+ hit = LW_TRUE;
+ }
/* If any of the intermediate points in the calculation is found in the
* set of input points, the standard Weiszfeld method gets stuck with a
@@ -86,10 +95,9 @@
double dy = 0;
double dz = 0;
double r_inv;
- POINT3D alt;
for (i = 0; i < npoints; i++)
{
- if (distances[i] > 0)
+ if (fabs(distances[i]) > DBL_EPSILON)
{
dx += (points[i].x - curr->x) / distances[i];
dy += (points[i].y - curr->y) / distances[i];
@@ -99,14 +107,12 @@
r_inv = 1.0 / sqrt ( dx*dx + dy*dy + dz*dz );
- alt.x = FP_MAX(0, 1.0 - r_inv)*next.x + FP_MIN(1.0, r_inv)*curr->x;
- alt.y = FP_MAX(0, 1.0 - r_inv)*next.y + FP_MIN(1.0, r_inv)*curr->y;
- alt.z = FP_MAX(0, 1.0 - r_inv)*next.z + FP_MIN(1.0, r_inv)*curr->z;
-
- next = alt;
+ next.x = FP_MAX(0, 1.0 - r_inv)*next.x + FP_MIN(1.0, r_inv)*curr->x;
+ next.y = FP_MAX(0, 1.0 - r_inv)*next.y + FP_MIN(1.0, r_inv)*curr->y;
+ next.z = FP_MAX(0, 1.0 - r_inv)*next.z + FP_MIN(1.0, r_inv)*curr->z;
}
- delta = distance3d_pt_pt(curr, &next);
+ delta = distance3d_pt_pt((POINT3D*)curr, (POINT3D*)&next);
curr->x = next.x;
curr->y = next.y;
@@ -115,37 +121,59 @@
return delta;
}
-static POINT3D
-init_guess(const POINT3D* points, uint32_t npoints)
+static POINT4D
+init_guess(const POINT4D* points, size_t npoints)
{
- POINT3D guess = { 0, 0, 0 };
- uint32_t i;
+ POINT4D guess = { 0, 0, 0, 0 };
+ size_t i;
for (i = 0; i < npoints; i++)
{
- guess.x += points[i].x / npoints;
- guess.y += points[i].y / npoints;
- guess.z += points[i].z / npoints;
+ guess.x += points[i].x * points[i].m;
+ guess.y += points[i].y * points[i].m;
+ guess.z += points[i].z * points[i].m;
+ guess.m += points[i].m;
}
-
+ if (!FP_IS_ZERO(guess.m))
+ {
+ guess.x /= guess.m;
+ guess.y /= guess.m;
+ guess.z /= guess.m;
+ }
return guess;
}
-static POINT3D*
-lwmpoint_extract_points_3d(const LWMPOINT* g, uint32_t* ngeoms)
+static POINT4D*
+lwmpoint_extract_points_4d(const LWMPOINT* g, size_t* ngeoms)
{
- uint32_t i;
- uint32_t n = 0;
+ size_t i;
+ size_t n = 0;
int is_3d = lwgeom_has_z((LWGEOM*) g);
+ int has_m = lwgeom_has_m((LWGEOM*) g);
- POINT3D* points = lwalloc(g->ngeoms * sizeof(POINT3D));
+ POINT4D* points = lwalloc(g->ngeoms * sizeof(POINT4D));
for (i = 0; i < g->ngeoms; i++)
{
LWGEOM* subg = lwcollection_getsubgeom((LWCOLLECTION*) g, i);
if (!lwgeom_is_empty(subg))
{
- getPoint3dz_p(((LWPOINT*) subg)->point, 0, (POINT3DZ*) &points[n++]);
+ getPoint4d_p(((LWPOINT*) subg)->point, 0, (POINT4D*) &points[n]);
if (!is_3d)
- points[n-1].z = 0.0; /* in case the getPoint functions return NaN in the future for 2d */
+ {
+ points[n].z = 0.0; /* in case the getPoint functions return NaN in the future for 2d */
+ }
+ if (!has_m)
+ {
+ points[n].m = 1.0;
+ n++;
+ }
+ else
+ {
+ /* points with zero weight are not going to affect calculation, drop them early */
+ if (!FP_IS_ZERO(points[n].m))
+ {
+ n++;
+ }
+ }
}
}
@@ -155,29 +183,53 @@
return points;
}
+
LWPOINT*
lwmpoint_median(const LWMPOINT* g, double tol, uint32_t max_iter, char fail_if_not_converged)
{
- uint32_t npoints; /* we need to count this ourselves so we can exclude empties */
- uint32_t i;
+ size_t npoints; /* we need to count this ourselves so we can exclude empties and weightless points */
+ size_t i;
double delta = DBL_MAX;
double* distances;
- POINT3D* points = lwmpoint_extract_points_3d(g, &npoints);
- POINT3D median;
+ POINT4D* points = lwmpoint_extract_points_4d(g, &npoints); /* m ordinate is considered weight, if defined */
+ POINT4D median;
if (npoints == 0)
{
lwfree(points);
- return lwpoint_construct_empty(g->srid, 0, 0);
+ if (fail_if_not_converged)
+ {
+ lwerror("Median failed to find input points with weight.");
+ return NULL;
+ }
+ else
+ {
+ return lwpoint_construct_empty(g->srid, 0, 0);
+ }
}
median = init_guess(points, npoints);
+ /* negative total weight means median cannot converge */
+ if (median.m <= 0.0)
+ {
+ lwfree(points);
+ if (fail_if_not_converged)
+ {
+ lwerror("Median cannot converge for input with negative sum of weights.");
+ return NULL;
+ }
+ else
+ {
+ return lwpoint_construct_empty(g->srid, 0, 0);
+ }
+ }
+
distances = lwalloc(npoints * sizeof(double));
for (i = 0; i < max_iter && delta > tol; i++)
{
- delta = iterate_3d(&median, points, npoints, distances);
+ delta = iterate_4d(&median, points, npoints, distances);
}
lwfree(points);
More information about the postgis-tickets
mailing list