[postgis-tickets] r14746 - #3364, ST_GeometricMedian

Daniel Baston dbaston at gmail.com
Fri Mar 4 17:22:48 PST 2016


Author: dbaston
Date: 2016-03-04 17:22:48 -0800 (Fri, 04 Mar 2016)
New Revision: 14746

Modified:
   trunk/NEWS
   trunk/doc/html/image_src/Makefile.in
   trunk/doc/reference_measure.xml
   trunk/liblwgeom/Makefile.in
   trunk/liblwgeom/cunit/cu_algorithm.c
   trunk/liblwgeom/liblwgeom.h.in
   trunk/postgis/lwgeom_functions_analytic.c
   trunk/postgis/postgis.sql.in
   trunk/regress/Makefile.in
Log:
#3364, ST_GeometricMedian

Modified: trunk/NEWS
===================================================================
--- trunk/NEWS	2016-03-04 14:51:36 UTC (rev 14745)
+++ trunk/NEWS	2016-03-05 01:22:48 UTC (rev 14746)
@@ -13,6 +13,7 @@
   - #2991 Enable ST_Transform to use PROJ.4 text (Mike Toews)
   - #3339 ST_GeneratePoints (Paul Ramsey)
   - #3362 ST_ClusterDBSCAN (Dan Baston) 
+  - #3364 ST_GeometricMedian (Dan Baston)
   - #3428 ST_Points (Dan Baston)
   - #3465 ST_ClusterKMeans (Paul Ramsey)
   - #3469 ST_MakeLine with MULTIPOINTs (Paul Norman)

Modified: trunk/doc/html/image_src/Makefile.in
===================================================================
--- trunk/doc/html/image_src/Makefile.in	2016-03-04 14:51:36 UTC (rev 14745)
+++ trunk/doc/html/image_src/Makefile.in	2016-03-05 01:22:48 UTC (rev 14746)
@@ -69,6 +69,7 @@
 	../images/st_extrude03.png \
 	../images/st_generatepoints01.png \
 	../images/st_generatepoints02.png \
+	../images/st_geometricmedian01.png \
 	../images/st_issimple01.png \
 	../images/st_issimple02.png \
 	../images/st_issimple03.png \

Modified: trunk/doc/reference_measure.xml
===================================================================
--- trunk/doc/reference_measure.xml	2016-03-04 14:51:36 UTC (rev 14745)
+++ trunk/doc/reference_measure.xml	2016-03-05 01:22:48 UTC (rev 14746)
@@ -2867,6 +2867,128 @@
 
 	</refentry>
 
+	<refentry id="ST_GeometricMedian">
+	  <refnamediv>
+		  <refname>
+			  ST_GeometricMedian
+		  </refname>
+
+		<refpurpose>
+			Returns the geometric median of a MultiPoint.
+		</refpurpose>
+	  </refnamediv>
+
+	<refsynopsisdiv>
+	  <funcsynopsis>
+		<funcprototype>
+			<funcdef>geometry
+				<function>
+					ST_GeometricMedian
+				</function>
+			</funcdef>
+
+			<paramdef>
+				<type>
+					geometry
+				</type>
+				<parameter>
+					g
+				</parameter>
+			</paramdef>
+
+			<paramdef>
+				<type>
+					float8
+				</type>
+				<parameter>
+					tolerance
+				</parameter>
+			</paramdef>
+
+			<paramdef>
+				<type>
+					int
+				</type>
+				<parameter>
+					max_iter
+				</parameter>
+			</paramdef>
+
+			<paramdef>
+				<type>
+					boolean
+				</type>
+				<parameter>
+					fail_if_not_converged
+				</parameter>
+			</paramdef>
+
+		</funcprototype>
+	  </funcsynopsis>
+	</refsynopsisdiv>
+
+	<refsection>
+	  <title>Description</title>
+
+	  <para>
+		  Computes the approximate geometric median of a MultiPoint geometry
+		  using the Weiszfeld algorithm.  The geometric median provides a 
+		  centrality measure that is less sensitive to outlier points than 
+		  the centroid.
+		
+		  The algorithm will iterate until the distance change between 
+		  successive iterations is less than the supplied <varname>tolerance</varname>
+		  parameter.  If this condition has not been met after <varname>max_iterations</varname>
+		  iterations, the function will produce an error and exit, unless <varname>fail_if_not_converged</varname>
+		  is set to false.
+
+		  If a tolerance value is not provided, a default tolerance value
+		  will be calculated based on the extent of the input geometry.
+	  </para>
+
+	  <para>Availability: 2.3.0</para>
+	  <para>&Z_support;</para>
+  </refsection>
+    <refsection>
+      <title>Examples</title>
+	  <para>
+		  <informalfigure>
+			  <mediaobject>
+					<imageobject>
+						<imagedata fileref="images/st_geometricmedian01.png" />
+					</imageobject>
+
+					<caption>
+						<para>
+						Comparison of the centroid (turquoise point) and geometric
+						median (red point) of a four-point MultiPoint (yellow points).
+						</para>
+					</caption>
+			  </mediaobject>
+		</informalfigure>
+	  </para>
+	  <programlisting>
+WITH test AS (
+SELECT 'MULTIPOINT((0 0), (1 1), (2 2), (200 200))'::geometry geom)
+SELECT
+  ST_AsText(ST_Centroid(geom)) centroid, 
+  ST_AsText(ST_GeometricMedian(geom)) median
+FROM test;
+      centroid      |                 median                 
+--------------------+----------------------------------------
+ POINT(50.75 50.75) | POINT(1.9761550281255 1.9761550281255)
+(1 row)
+	  </programlisting>
+	</refsection>
+	
+	<refsection>
+	  <title>See Also</title>
+
+	  <para><xref linkend="ST_Centroid"/></para>
+	</refsection>
+
+	</refentry>
+
 	<refentry id="ST_HasArc">
 	  <refnamediv>
 		<refname>ST_HasArc</refname>

Modified: trunk/liblwgeom/Makefile.in
===================================================================
--- trunk/liblwgeom/Makefile.in	2016-03-04 14:51:36 UTC (rev 14745)
+++ trunk/liblwgeom/Makefile.in	2016-03-05 01:22:48 UTC (rev 14746)
@@ -79,6 +79,7 @@
 	lwin_wkb.o \
 	lwin_twkb.o \
 	lwiterator.o \
+	lwgeom_median.o \
 	lwout_wkt.o \
 	lwout_twkb.o \
 	lwin_wkt_parse.o \

Modified: trunk/liblwgeom/cunit/cu_algorithm.c
===================================================================
--- trunk/liblwgeom/cunit/cu_algorithm.c	2016-03-04 14:51:36 UTC (rev 14745)
+++ trunk/liblwgeom/cunit/cu_algorithm.c	2016-03-05 01:22:48 UTC (rev 14746)
@@ -983,7 +983,86 @@
 }
 
 
+static void do_median_dims_check(char* wkt, int expected_dims)
+{
+	LWGEOM* g = lwgeom_from_wkt(wkt, LW_PARSER_CHECK_NONE);
+	LWPOINT* result = lwgeom_median(g, 1e-8, 100, LW_FALSE);
 
+	CU_ASSERT_EQUAL(expected_dims, lwgeom_ndims((LWGEOM*) result));
+
+	lwgeom_free(g);
+	lwpoint_free(result);
+}
+
+static void test_median_handles_3d_correctly(void)
+{
+	do_median_dims_check("MULTIPOINT ((1 3), (4 7), (2 9), (0 4), (2 2))", 2);
+	do_median_dims_check("MULTIPOINT Z ((1 3 4), (4 7 8), (2 9 1), (0 4 4), (2 2 3))", 3);
+	do_median_dims_check("MULTIPOINT M ((1 3 4), (4 7 8), (2 9 1), (0 4 4), (2 2 3))", 2);
+	do_median_dims_check("MULTIPOINT ZM ((1 3 4 5), (4 7 8 6), (2 9 1 7), (0 4 4 8), (2 2 3 9))", 3);
+}
+
+static void do_median_test(char* input, char* expected)
+{
+	LWGEOM* g = lwgeom_from_wkt(input, LW_PARSER_CHECK_NONE);
+	LWPOINT* expected_result = lwgeom_as_lwpoint(lwgeom_from_wkt(expected, LW_PARSER_CHECK_NONE));
+	POINT3DZ actual_pt;
+	POINT3DZ expected_pt;
+
+	LWPOINT* result = lwgeom_median(g, FP_TOLERANCE / 10.0, 1000, LW_TRUE);
+	int passed = LW_TRUE;
+	
+	lwpoint_getPoint3dz_p(result, &actual_pt);
+	lwpoint_getPoint3dz_p(expected_result, &expected_pt);
+
+	passed = passed && lwgeom_is_empty((LWGEOM*) expected_result) == lwgeom_is_empty((LWGEOM*) result);
+	passed = passed && (lwgeom_has_z((LWGEOM*) expected_result) == lwgeom_has_z((LWGEOM*) result));
+
+	if (!lwgeom_is_empty((LWGEOM*) result))
+	{
+		passed = passed && FP_EQUALS(actual_pt.x, expected_pt.x);
+		passed = passed && FP_EQUALS(actual_pt.y, expected_pt.y);
+		passed = passed && (!lwgeom_has_z((LWGEOM*) expected_result) || FP_EQUALS(actual_pt.z, expected_pt.z));
+	}
+
+	if (!passed)
+		printf("median_test expected %s got %s\n", lwgeom_to_ewkt((LWGEOM*) expected_result), lwgeom_to_ewkt((LWGEOM*) result));
+
+	CU_ASSERT_TRUE(passed);
+
+	lwgeom_free(g);
+	lwpoint_free(expected_result);
+	lwpoint_free(result);
+}
+
+static void test_median_robustness(void)
+{
+	/* A simple implementation of Weiszfeld's algorithm will fail if the median is equal
+	 * to any one of the inputs, during any iteration of the algorithm.
+	 *
+	 * Because the algorithm uses the centroid as a starting point, this situation will
+	 * occur in the test case below.
+	 */
+	do_median_test("MULTIPOINT ((0 -1), (0 0), (0 1))", "POINT (0 0)");
+
+	/* Same as above but 3D, and shifter */
+	do_median_test("MULTIPOINT ((1 -1 3), (1 0 2), (2 1 1))", "POINT (1 0 2)");
+
+	/* Starting point is duplicated */
+	do_median_test("MULTIPOINT ((0 -1), (0 0), (0 0), (0 1))", "POINT (0 0)");
+
+	/* Cube */
+	do_median_test("MULTIPOINT ((10 10 10), (10 20 10), (20 10 10), (20 20 10), (10 10 20), (10 20 20), (20 10 20), (20 20 20))",
+				   "POINT (15 15 15)");
+
+	/* Some edge cases */
+	do_median_test("MULTIPOINT EMPTY", "POINT EMPTY");
+	do_median_test("MULTIPOINT (EMPTY)", "POINT EMPTY");
+	do_median_test("POINT (7 6)", "POINT (7 6)");
+	do_median_test("POINT (7 6 2)", "POINT (7 6 2)");
+	do_median_test("MULTIPOINT ((7 6 2), EMPTY)", "POINT (7 6 2)");
+}
+
 static void test_point_density(void)
 {
 	LWGEOM *geom;
@@ -1091,4 +1170,6 @@
 	PG_ADD_TEST(suite,test_lw_arc_center);
 	PG_ADD_TEST(suite,test_point_density);
 	PG_ADD_TEST(suite,test_kmeans);
+	PG_ADD_TEST(suite,test_median_handles_3d_correctly);
+	PG_ADD_TEST(suite,test_median_robustness);
 }

Modified: trunk/liblwgeom/liblwgeom.h.in
===================================================================
--- trunk/liblwgeom/liblwgeom.h.in	2016-03-04 14:51:36 UTC (rev 14745)
+++ trunk/liblwgeom/liblwgeom.h.in	2016-03-05 01:22:48 UTC (rev 14746)
@@ -1448,6 +1448,12 @@
 extern LWMPOINT *lwmpoly_to_points(const LWMPOLY *mpoly, int npoints);
 extern LWMPOINT *lwgeom_to_points(const LWGEOM *lwgeom, int npoints);
 
+/*
+ * Geometric median
+ */
+extern LWPOINT* lwgeom_median(const LWGEOM *g, double tol, uint32_t maxiter, char fail_if_not_converged);
+extern LWPOINT* lwmpoint_median(const LWMPOINT *g, double tol, uint32_t maxiter, char fail_if_not_converged);
+
 /**
 * Calculate the GeoHash (http://geohash.org) string for a geometry. Caller must free.
 */

Modified: trunk/postgis/lwgeom_functions_analytic.c
===================================================================
--- trunk/postgis/lwgeom_functions_analytic.c	2016-03-04 14:51:36 UTC (rev 14745)
+++ trunk/postgis/lwgeom_functions_analytic.c	2016-03-05 01:22:48 UTC (rev 14746)
@@ -53,6 +53,7 @@
 Datum LWGEOM_SetEffectiveArea(PG_FUNCTION_ARGS);
 Datum ST_LineCrossingDirection(PG_FUNCTION_ARGS);
 Datum ST_MinimumBoundingRadius(PG_FUNCTION_ARGS);
+Datum ST_GeometricMedian(PG_FUNCTION_ARGS);
 
 
 static double determineSide(const POINT2D *seg1, const POINT2D *seg2, const POINT2D *point);
@@ -1141,3 +1142,91 @@
 	PG_RETURN_DATUM(result);
 }
 
+/**********************************************************************
+ *
+ * ST_GeometricMedian
+ *
+ **********************************************************************/
+
+PG_FUNCTION_INFO_V1(ST_GeometricMedian);
+Datum ST_GeometricMedian(PG_FUNCTION_ARGS)
+{
+	GSERIALIZED* geom;
+	GSERIALIZED* result;
+	LWGEOM* input;
+	LWPOINT* lwresult;
+	double tolerance;
+	bool compute_tolerance_from_box;
+	bool fail_if_not_converged;
+	int max_iter;
+
+	/* Read and validate our input arguments */
+	if (PG_ARGISNULL(0))
+		PG_RETURN_NULL();
+
+	compute_tolerance_from_box = PG_ARGISNULL(1);
+
+	if (!compute_tolerance_from_box)
+	{
+		tolerance = PG_GETARG_FLOAT8(1);
+		if (tolerance < 0)
+		{
+			lwpgerror("Tolerance must be positive.");
+			PG_RETURN_NULL();
+		}
+	}
+
+	max_iter = PG_ARGISNULL(2) ? -1 : PG_GETARG_INT32(2);
+	fail_if_not_converged = PG_ARGISNULL(3) ? LW_FALSE : PG_GETARG_BOOL(3);
+
+	if (max_iter < 0)
+	{
+		lwpgerror("Maximum iterations must be positive.");
+		PG_RETURN_NULL();
+	}
+
+	/* OK, inputs are valid. */
+	geom = PG_GETARG_GSERIALIZED_P(0);
+	input = lwgeom_from_gserialized(geom);
+
+	if (compute_tolerance_from_box)
+	{
+		/* Compute a default tolerance based on the smallest dimension
+		 * of the geometry's bounding box.
+		 */
+		static const double min_default_tolerance = 1e-8;
+		static const double tolerance_coefficient = 1e-6;
+		const GBOX* box = lwgeom_get_bbox(input);
+
+		if (!box)
+		{
+			tolerance = min_default_tolerance;
+		}
+		else
+		{
+			double min_dim = FP_MIN(box->xmax - box->xmin, box->ymax - box->ymin);
+			if (lwgeom_has_z(input))
+				min_dim = FP_MIN(min_dim, box->zmax - box->zmin);
+
+			/* Apply a lower bound to the computed default tolerance to
+			 * avoid a tolerance of zero in the case of collinear 
+			 * points.
+			 */
+			tolerance = FP_MAX(min_default_tolerance, tolerance_coefficient * min_dim);
+		}
+	}
+
+	lwresult = lwgeom_median(input, tolerance, max_iter, fail_if_not_converged);
+	lwgeom_free(input);
+
+	if(!lwresult)
+	{
+		lwpgerror("Error computing geometric median.");
+		PG_RETURN_NULL();
+	}
+
+	result = geometry_serialize(lwpoint_as_lwgeom(lwresult));
+	
+	PG_RETURN_POINTER(result);
+}
+

Modified: trunk/postgis/postgis.sql.in
===================================================================
--- trunk/postgis/postgis.sql.in	2016-03-04 14:51:36 UTC (rev 14745)
+++ trunk/postgis/postgis.sql.in	2016-03-05 01:22:48 UTC (rev 14746)
@@ -4007,6 +4007,13 @@
 	AS 'MODULE_PATHNAME', 'centroid'
 	LANGUAGE 'c' IMMUTABLE STRICT;
 	
+
+-- Availability: 2.3.0
+CREATE OR REPLACE FUNCTION ST_GeometricMedian(g geometry, tolerance float8 DEFAULT NULL, max_iter int DEFAULT 10000, fail_if_not_converged boolean DEFAULT false)
+	RETURNS geometry
+	AS 'MODULE_PATHNAME', 'ST_GeometricMedian'
+	LANGUAGE 'c' IMMUTABLE;
+
 -- PostGIS equivalent function: IsRing(geometry)
 CREATE OR REPLACE FUNCTION ST_IsRing(geometry)
 	RETURNS boolean

Modified: trunk/regress/Makefile.in
===================================================================
--- trunk/regress/Makefile.in	2016-03-04 14:51:36 UTC (rev 14745)
+++ trunk/regress/Makefile.in	2016-03-05 01:22:48 UTC (rev 14746)
@@ -87,6 +87,7 @@
 	estimatedextent \
 	forcecurve \
 	geography \
+	geometric_median \
 	in_geohash \
 	in_gml \
 	in_kml \



More information about the postgis-tickets mailing list