[postgis-users] Determining clusters of points
Kevin Neufeld
kneufeld.ca at gmail.com
Tue Dec 7 21:20:38 PST 2010
On 12/7/2010 5:15 PM, pcreso at pcreso.com wrote:
> You might look at the newish (v8.4+ ?) RECURSIVE/WITH capabilities of
> Postgres.
>
> Given Postgis allows you to create arbitrary buffer zones around your
> points, the RECURSIVE query capability allows you to write an SQL that
> will recursively track points which can interact with each other
So, just playing around a bit, I came up with this RECURSIVE query
statement that will find all points in a particular "cluster". Every
time I run the UPDATE statement, another cluster of points is identified
and updated to have the same group id. I have no idea how this would
perform over a table of a million points ...
CREATE TABLE pts (id serial, grp int, geom geometry);
CREATE SEQUENCE pts_grp_seq; -- to label cluster groups
-- Sample data similiar to Sébastien's original post
INSERT INTO pts (geom)
SELECT (ST_Dump(
'MULTIPOINT (
6 129,
21 200,
30 66,
33 109,
41 137,
48 191,
76 119,
90 197,
175 212,
178 230,
182 53,
196 209,
259 124,
259 148
)')).geom;
-- Simple wrapper around an UPDATE statement that identifies a
-- point cluster everytime its run
CREATE OR REPLACE FUNCTION cluster_pts() RETURNS text AS
$body$
DECLARE
results int;
BEGIN
LOOP
-- Assign all records in the same cluster, the same group id
UPDATE pts
SET grp = f.grp
FROM (
WITH RECURSIVE t(id, geom, grp, path, cycle) AS (
-- Grab the first record that doesn't have a group id
(SELECT id, geom, nextval('pts_grp_seq'), ARRAY[id], false
FROM pts
WHERE grp IS NULL
LIMIT 1
)
UNION ALL
-- Find all the other records that are iteratively within some
-- distance to the starting point.
-- This includes A to B to C paths, but not A to B to A
cyclic paths
SELECT a.id, a.geom, b.grp, path || ARRAY[a.id], a.id =
ANY(path)
FROM pts a, t b
WHERE a.id != b.id
AND ST_DWithin(a.geom, b.geom, 80)
AND NOT cycle
)
SELECT DISTINCT id, grp FROM t
) AS f
WHERE pts.id = f.id;
-- repeat until no more updates are performed.
GET DIAGNOSTICS results = ROW_COUNT;
EXIT WHEN results = 0;
END LOOP;
RETURN 'done.';
END;
$body$ LANGUAGE plpgsql;
SELECT cluster_pts();
-------------- next part --------------
A non-text attachment was scrubbed...
Name: pts.png
Type: image/png
Size: 2336 bytes
Desc: not available
URL: <http://lists.osgeo.org/pipermail/postgis-users/attachments/20101207/a649baf4/attachment.png>
More information about the postgis-users
mailing list