[postgis-users] Determining clusters of points

Kevin Neufeld kneufeld.ca at gmail.com
Tue Dec 7 21:20:38 PST 2010


On 12/7/2010 5:15 PM, pcreso at pcreso.com wrote:
> You might look at the newish (v8.4+ ?) RECURSIVE/WITH capabilities of 
> Postgres.
>
> Given Postgis allows you to create arbitrary buffer zones around your 
> points, the RECURSIVE query capability allows you to write an SQL that 
> will recursively track points which can interact with each other

So, just playing around a bit, I came up with this RECURSIVE query 
statement that will find all points in a particular "cluster".  Every 
time I run the UPDATE statement, another cluster of points is identified 
and updated to have the same group id.  I have no idea how this would 
perform over a table of a million points ...


CREATE TABLE pts (id serial, grp int, geom geometry);
CREATE SEQUENCE pts_grp_seq; -- to label cluster groups

-- Sample data similiar to Sébastien's original post
INSERT INTO pts (geom)
SELECT (ST_Dump(
'MULTIPOINT (
     6 129,
     21 200,
     30 66,
     33 109,
     41 137,
     48 191,
     76 119,
     90 197,
     175 212,
     178 230,
     182 53,
     196 209,
     259 124,
     259 148
)')).geom;

-- Simple wrapper around an UPDATE statement that identifies a
-- point cluster everytime its run
CREATE OR REPLACE FUNCTION cluster_pts() RETURNS text AS
$body$
DECLARE
    results int;
BEGIN
    LOOP
       -- Assign all records in the same cluster, the same group id
       UPDATE pts
       SET grp = f.grp
       FROM (
          WITH RECURSIVE t(id, geom, grp, path, cycle) AS (
              -- Grab the first record that doesn't have a group id
             (SELECT id, geom, nextval('pts_grp_seq'), ARRAY[id], false
              FROM pts
              WHERE grp IS NULL
              LIMIT 1
              )
            UNION ALL
              -- Find all the other records that are iteratively within some
              -- distance to the starting point.
              -- This includes A to B to C paths, but not A to B to A 
cyclic paths
              SELECT a.id, a.geom, b.grp, path || ARRAY[a.id], a.id = 
ANY(path)
              FROM pts a, t b
              WHERE a.id != b.id
              AND ST_DWithin(a.geom, b.geom, 80)
              AND NOT cycle
          )
          SELECT DISTINCT id, grp FROM t
       ) AS f
       WHERE pts.id = f.id;

       -- repeat until no more updates are performed.
       GET DIAGNOSTICS results = ROW_COUNT;
       EXIT WHEN results = 0;
    END LOOP;

    RETURN 'done.';
END;
$body$ LANGUAGE plpgsql;

SELECT cluster_pts();


-------------- next part --------------
A non-text attachment was scrubbed...
Name: pts.png
Type: image/png
Size: 2336 bytes
Desc: not available
URL: <http://lists.osgeo.org/pipermail/postgis-users/attachments/20101207/a649baf4/attachment.png>


More information about the postgis-users mailing list