[postgis-tickets] r14746 - #3364, ST_GeometricMedian
Daniel Baston
dbaston at gmail.com
Fri Mar 4 17:22:48 PST 2016
Author: dbaston
Date: 2016-03-04 17:22:48 -0800 (Fri, 04 Mar 2016)
New Revision: 14746
Modified:
trunk/NEWS
trunk/doc/html/image_src/Makefile.in
trunk/doc/reference_measure.xml
trunk/liblwgeom/Makefile.in
trunk/liblwgeom/cunit/cu_algorithm.c
trunk/liblwgeom/liblwgeom.h.in
trunk/postgis/lwgeom_functions_analytic.c
trunk/postgis/postgis.sql.in
trunk/regress/Makefile.in
Log:
#3364, ST_GeometricMedian
Modified: trunk/NEWS
===================================================================
--- trunk/NEWS 2016-03-04 14:51:36 UTC (rev 14745)
+++ trunk/NEWS 2016-03-05 01:22:48 UTC (rev 14746)
@@ -13,6 +13,7 @@
- #2991 Enable ST_Transform to use PROJ.4 text (Mike Toews)
- #3339 ST_GeneratePoints (Paul Ramsey)
- #3362 ST_ClusterDBSCAN (Dan Baston)
+ - #3364 ST_GeometricMedian (Dan Baston)
- #3428 ST_Points (Dan Baston)
- #3465 ST_ClusterKMeans (Paul Ramsey)
- #3469 ST_MakeLine with MULTIPOINTs (Paul Norman)
Modified: trunk/doc/html/image_src/Makefile.in
===================================================================
--- trunk/doc/html/image_src/Makefile.in 2016-03-04 14:51:36 UTC (rev 14745)
+++ trunk/doc/html/image_src/Makefile.in 2016-03-05 01:22:48 UTC (rev 14746)
@@ -69,6 +69,7 @@
../images/st_extrude03.png \
../images/st_generatepoints01.png \
../images/st_generatepoints02.png \
+ ../images/st_geometricmedian01.png \
../images/st_issimple01.png \
../images/st_issimple02.png \
../images/st_issimple03.png \
Modified: trunk/doc/reference_measure.xml
===================================================================
--- trunk/doc/reference_measure.xml 2016-03-04 14:51:36 UTC (rev 14745)
+++ trunk/doc/reference_measure.xml 2016-03-05 01:22:48 UTC (rev 14746)
@@ -2867,6 +2867,128 @@
</refentry>
+ <refentry id="ST_GeometricMedian">
+ <refnamediv>
+ <refname>
+ ST_GeometricMedian
+ </refname>
+
+ <refpurpose>
+ Returns the geometric median of a MultiPoint.
+ </refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>geometry
+ <function>
+ ST_GeometricMedian
+ </function>
+ </funcdef>
+
+ <paramdef>
+ <type>
+ geometry
+ </type>
+ <parameter>
+ g
+ </parameter>
+ </paramdef>
+
+ <paramdef>
+ <type>
+ float8
+ </type>
+ <parameter>
+ tolerance
+ </parameter>
+ </paramdef>
+
+ <paramdef>
+ <type>
+ int
+ </type>
+ <parameter>
+ max_iter
+ </parameter>
+ </paramdef>
+
+ <paramdef>
+ <type>
+ boolean
+ </type>
+ <parameter>
+ fail_if_not_converged
+ </parameter>
+ </paramdef>
+
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsection>
+ <title>Description</title>
+
+ <para>
+ Computes the approximate geometric median of a MultiPoint geometry
+ using the Weiszfeld algorithm. The geometric median provides a
+ centrality measure that is less sensitive to outlier points than
+ the centroid.
+
+ The algorithm will iterate until the distance change between
+ successive iterations is less than the supplied <varname>tolerance</varname>
+ parameter. If this condition has not been met after <varname>max_iterations</varname>
+ iterations, the function will produce an error and exit, unless <varname>fail_if_not_converged</varname>
+ is set to false.
+
+ If a tolerance value is not provided, a default tolerance value
+ will be calculated based on the extent of the input geometry.
+ </para>
+
+ <para>Availability: 2.3.0</para>
+ <para>&Z_support;</para>
+ </refsection>
+ <refsection>
+ <title>Examples</title>
+ <para>
+ <informalfigure>
+ <mediaobject>
+ <imageobject>
+ <imagedata fileref="images/st_geometricmedian01.png" />
+ </imageobject>
+
+ <caption>
+ <para>
+ Comparison of the centroid (turquoise point) and geometric
+ median (red point) of a four-point MultiPoint (yellow points).
+ </para>
+ </caption>
+ </mediaobject>
+ </informalfigure>
+ </para>
+ <programlisting>
+WITH test AS (
+SELECT 'MULTIPOINT((0 0), (1 1), (2 2), (200 200))'::geometry geom)
+SELECT
+ ST_AsText(ST_Centroid(geom)) centroid,
+ ST_AsText(ST_GeometricMedian(geom)) median
+FROM test;
+ centroid | median
+--------------------+----------------------------------------
+ POINT(50.75 50.75) | POINT(1.9761550281255 1.9761550281255)
+(1 row)
+ </programlisting>
+ </refsection>
+
+ <refsection>
+ <title>See Also</title>
+
+ <para><xref linkend="ST_Centroid"/></para>
+ </refsection>
+
+ </refentry>
+
<refentry id="ST_HasArc">
<refnamediv>
<refname>ST_HasArc</refname>
Modified: trunk/liblwgeom/Makefile.in
===================================================================
--- trunk/liblwgeom/Makefile.in 2016-03-04 14:51:36 UTC (rev 14745)
+++ trunk/liblwgeom/Makefile.in 2016-03-05 01:22:48 UTC (rev 14746)
@@ -79,6 +79,7 @@
lwin_wkb.o \
lwin_twkb.o \
lwiterator.o \
+ lwgeom_median.o \
lwout_wkt.o \
lwout_twkb.o \
lwin_wkt_parse.o \
Modified: trunk/liblwgeom/cunit/cu_algorithm.c
===================================================================
--- trunk/liblwgeom/cunit/cu_algorithm.c 2016-03-04 14:51:36 UTC (rev 14745)
+++ trunk/liblwgeom/cunit/cu_algorithm.c 2016-03-05 01:22:48 UTC (rev 14746)
@@ -983,7 +983,86 @@
}
+static void do_median_dims_check(char* wkt, int expected_dims)
+{
+ LWGEOM* g = lwgeom_from_wkt(wkt, LW_PARSER_CHECK_NONE);
+ LWPOINT* result = lwgeom_median(g, 1e-8, 100, LW_FALSE);
+ CU_ASSERT_EQUAL(expected_dims, lwgeom_ndims((LWGEOM*) result));
+
+ lwgeom_free(g);
+ lwpoint_free(result);
+}
+
+static void test_median_handles_3d_correctly(void)
+{
+ do_median_dims_check("MULTIPOINT ((1 3), (4 7), (2 9), (0 4), (2 2))", 2);
+ do_median_dims_check("MULTIPOINT Z ((1 3 4), (4 7 8), (2 9 1), (0 4 4), (2 2 3))", 3);
+ do_median_dims_check("MULTIPOINT M ((1 3 4), (4 7 8), (2 9 1), (0 4 4), (2 2 3))", 2);
+ do_median_dims_check("MULTIPOINT ZM ((1 3 4 5), (4 7 8 6), (2 9 1 7), (0 4 4 8), (2 2 3 9))", 3);
+}
+
+static void do_median_test(char* input, char* expected)
+{
+ LWGEOM* g = lwgeom_from_wkt(input, LW_PARSER_CHECK_NONE);
+ LWPOINT* expected_result = lwgeom_as_lwpoint(lwgeom_from_wkt(expected, LW_PARSER_CHECK_NONE));
+ POINT3DZ actual_pt;
+ POINT3DZ expected_pt;
+
+ LWPOINT* result = lwgeom_median(g, FP_TOLERANCE / 10.0, 1000, LW_TRUE);
+ int passed = LW_TRUE;
+
+ lwpoint_getPoint3dz_p(result, &actual_pt);
+ lwpoint_getPoint3dz_p(expected_result, &expected_pt);
+
+ passed = passed && lwgeom_is_empty((LWGEOM*) expected_result) == lwgeom_is_empty((LWGEOM*) result);
+ passed = passed && (lwgeom_has_z((LWGEOM*) expected_result) == lwgeom_has_z((LWGEOM*) result));
+
+ if (!lwgeom_is_empty((LWGEOM*) result))
+ {
+ passed = passed && FP_EQUALS(actual_pt.x, expected_pt.x);
+ passed = passed && FP_EQUALS(actual_pt.y, expected_pt.y);
+ passed = passed && (!lwgeom_has_z((LWGEOM*) expected_result) || FP_EQUALS(actual_pt.z, expected_pt.z));
+ }
+
+ if (!passed)
+ printf("median_test expected %s got %s\n", lwgeom_to_ewkt((LWGEOM*) expected_result), lwgeom_to_ewkt((LWGEOM*) result));
+
+ CU_ASSERT_TRUE(passed);
+
+ lwgeom_free(g);
+ lwpoint_free(expected_result);
+ lwpoint_free(result);
+}
+
+static void test_median_robustness(void)
+{
+ /* A simple implementation of Weiszfeld's algorithm will fail if the median is equal
+ * to any one of the inputs, during any iteration of the algorithm.
+ *
+ * Because the algorithm uses the centroid as a starting point, this situation will
+ * occur in the test case below.
+ */
+ do_median_test("MULTIPOINT ((0 -1), (0 0), (0 1))", "POINT (0 0)");
+
+ /* Same as above but 3D, and shifter */
+ do_median_test("MULTIPOINT ((1 -1 3), (1 0 2), (2 1 1))", "POINT (1 0 2)");
+
+ /* Starting point is duplicated */
+ do_median_test("MULTIPOINT ((0 -1), (0 0), (0 0), (0 1))", "POINT (0 0)");
+
+ /* Cube */
+ do_median_test("MULTIPOINT ((10 10 10), (10 20 10), (20 10 10), (20 20 10), (10 10 20), (10 20 20), (20 10 20), (20 20 20))",
+ "POINT (15 15 15)");
+
+ /* Some edge cases */
+ do_median_test("MULTIPOINT EMPTY", "POINT EMPTY");
+ do_median_test("MULTIPOINT (EMPTY)", "POINT EMPTY");
+ do_median_test("POINT (7 6)", "POINT (7 6)");
+ do_median_test("POINT (7 6 2)", "POINT (7 6 2)");
+ do_median_test("MULTIPOINT ((7 6 2), EMPTY)", "POINT (7 6 2)");
+}
+
static void test_point_density(void)
{
LWGEOM *geom;
@@ -1091,4 +1170,6 @@
PG_ADD_TEST(suite,test_lw_arc_center);
PG_ADD_TEST(suite,test_point_density);
PG_ADD_TEST(suite,test_kmeans);
+ PG_ADD_TEST(suite,test_median_handles_3d_correctly);
+ PG_ADD_TEST(suite,test_median_robustness);
}
Modified: trunk/liblwgeom/liblwgeom.h.in
===================================================================
--- trunk/liblwgeom/liblwgeom.h.in 2016-03-04 14:51:36 UTC (rev 14745)
+++ trunk/liblwgeom/liblwgeom.h.in 2016-03-05 01:22:48 UTC (rev 14746)
@@ -1448,6 +1448,12 @@
extern LWMPOINT *lwmpoly_to_points(const LWMPOLY *mpoly, int npoints);
extern LWMPOINT *lwgeom_to_points(const LWGEOM *lwgeom, int npoints);
+/*
+ * Geometric median
+ */
+extern LWPOINT* lwgeom_median(const LWGEOM *g, double tol, uint32_t maxiter, char fail_if_not_converged);
+extern LWPOINT* lwmpoint_median(const LWMPOINT *g, double tol, uint32_t maxiter, char fail_if_not_converged);
+
/**
* Calculate the GeoHash (http://geohash.org) string for a geometry. Caller must free.
*/
Modified: trunk/postgis/lwgeom_functions_analytic.c
===================================================================
--- trunk/postgis/lwgeom_functions_analytic.c 2016-03-04 14:51:36 UTC (rev 14745)
+++ trunk/postgis/lwgeom_functions_analytic.c 2016-03-05 01:22:48 UTC (rev 14746)
@@ -53,6 +53,7 @@
Datum LWGEOM_SetEffectiveArea(PG_FUNCTION_ARGS);
Datum ST_LineCrossingDirection(PG_FUNCTION_ARGS);
Datum ST_MinimumBoundingRadius(PG_FUNCTION_ARGS);
+Datum ST_GeometricMedian(PG_FUNCTION_ARGS);
static double determineSide(const POINT2D *seg1, const POINT2D *seg2, const POINT2D *point);
@@ -1141,3 +1142,91 @@
PG_RETURN_DATUM(result);
}
+/**********************************************************************
+ *
+ * ST_GeometricMedian
+ *
+ **********************************************************************/
+
+PG_FUNCTION_INFO_V1(ST_GeometricMedian);
+Datum ST_GeometricMedian(PG_FUNCTION_ARGS)
+{
+ GSERIALIZED* geom;
+ GSERIALIZED* result;
+ LWGEOM* input;
+ LWPOINT* lwresult;
+ double tolerance;
+ bool compute_tolerance_from_box;
+ bool fail_if_not_converged;
+ int max_iter;
+
+ /* Read and validate our input arguments */
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
+
+ compute_tolerance_from_box = PG_ARGISNULL(1);
+
+ if (!compute_tolerance_from_box)
+ {
+ tolerance = PG_GETARG_FLOAT8(1);
+ if (tolerance < 0)
+ {
+ lwpgerror("Tolerance must be positive.");
+ PG_RETURN_NULL();
+ }
+ }
+
+ max_iter = PG_ARGISNULL(2) ? -1 : PG_GETARG_INT32(2);
+ fail_if_not_converged = PG_ARGISNULL(3) ? LW_FALSE : PG_GETARG_BOOL(3);
+
+ if (max_iter < 0)
+ {
+ lwpgerror("Maximum iterations must be positive.");
+ PG_RETURN_NULL();
+ }
+
+ /* OK, inputs are valid. */
+ geom = PG_GETARG_GSERIALIZED_P(0);
+ input = lwgeom_from_gserialized(geom);
+
+ if (compute_tolerance_from_box)
+ {
+ /* Compute a default tolerance based on the smallest dimension
+ * of the geometry's bounding box.
+ */
+ static const double min_default_tolerance = 1e-8;
+ static const double tolerance_coefficient = 1e-6;
+ const GBOX* box = lwgeom_get_bbox(input);
+
+ if (!box)
+ {
+ tolerance = min_default_tolerance;
+ }
+ else
+ {
+ double min_dim = FP_MIN(box->xmax - box->xmin, box->ymax - box->ymin);
+ if (lwgeom_has_z(input))
+ min_dim = FP_MIN(min_dim, box->zmax - box->zmin);
+
+ /* Apply a lower bound to the computed default tolerance to
+ * avoid a tolerance of zero in the case of collinear
+ * points.
+ */
+ tolerance = FP_MAX(min_default_tolerance, tolerance_coefficient * min_dim);
+ }
+ }
+
+ lwresult = lwgeom_median(input, tolerance, max_iter, fail_if_not_converged);
+ lwgeom_free(input);
+
+ if(!lwresult)
+ {
+ lwpgerror("Error computing geometric median.");
+ PG_RETURN_NULL();
+ }
+
+ result = geometry_serialize(lwpoint_as_lwgeom(lwresult));
+
+ PG_RETURN_POINTER(result);
+}
+
Modified: trunk/postgis/postgis.sql.in
===================================================================
--- trunk/postgis/postgis.sql.in 2016-03-04 14:51:36 UTC (rev 14745)
+++ trunk/postgis/postgis.sql.in 2016-03-05 01:22:48 UTC (rev 14746)
@@ -4007,6 +4007,13 @@
AS 'MODULE_PATHNAME', 'centroid'
LANGUAGE 'c' IMMUTABLE STRICT;
+
+-- Availability: 2.3.0
+CREATE OR REPLACE FUNCTION ST_GeometricMedian(g geometry, tolerance float8 DEFAULT NULL, max_iter int DEFAULT 10000, fail_if_not_converged boolean DEFAULT false)
+ RETURNS geometry
+ AS 'MODULE_PATHNAME', 'ST_GeometricMedian'
+ LANGUAGE 'c' IMMUTABLE;
+
-- PostGIS equivalent function: IsRing(geometry)
CREATE OR REPLACE FUNCTION ST_IsRing(geometry)
RETURNS boolean
Modified: trunk/regress/Makefile.in
===================================================================
--- trunk/regress/Makefile.in 2016-03-04 14:51:36 UTC (rev 14745)
+++ trunk/regress/Makefile.in 2016-03-05 01:22:48 UTC (rev 14746)
@@ -87,6 +87,7 @@
estimatedextent \
forcecurve \
geography \
+ geometric_median \
in_geohash \
in_gml \
in_kml \
More information about the postgis-tickets
mailing list