[postgis-devel] Lame attempt at cascade union in sql

Obe, Regina robe.dnd at cityofboston.gov
Fri Aug 8 06:09:56 PDT 2008


Paul et. al,

I'm beating Kevin sometimes now.  But I guess I have no hope of beating
Martin.
I think its something wrong with my approximation of the difficulty of
gluing geometries the larger they get.

Anyrate from 1000 - 5000 geometries I appear to be consistently beating
Kevin's query with this dataset.  After around 8,000 he starts passing
me and doing the whole set still seems the same as before - stuck at
around 4 minutes vs. Kevin's 1.25 minutes.

Martin, - can you test the below on your box and tell me how it
performs.

Below is the revised union aggregate and bench marks.

Thanks,
Regina

CREATE OR REPLACE FUNCTION st_unitecascade_garray(geometry[], iter int,
startcount int)
  RETURNS geometry AS
$$
SELECT CASE WHEN array_upper($1,1) < 5 OR  array_upper($1,1) BETWEEN 0
AND ln($3 + 1)/ln($2 + 1) THEN st_unite_garray($1) ELSE
	st_unitecascade_garray(ARRAY(SELECT st_unite_garray($1[i:least(i
+ CAST(ln($3 + 1)/ln($2 + 1) As int) - 1,array_upper($1,1))]) As geom
FROM generate_series(1, array_upper($1,1), CAST(ln($3 + 1)/ln($2 + 1) As
int)) As i), $2 + 1, $3) END

$$
  LANGUAGE 'sql' IMMUTABLE;

CREATE OR REPLACE FUNCTION st_unitecascade_garray_sort(geometry[])
  RETURNS geometry AS
$$
	SELECT CASE WHEN array_upper($1,1) < 51  THEN
st_unite_garray($1) ELSE
		st_unitecascade_garray(ARRAY(SELECT ($1)[s] FROM
generate_series(1, array_upper($1, 1)) AS s 
			ORDER BY ST_SnapToGrid(ST_Centroid(($1)[s]),0.1)
) , 1, array_upper($1,1)) END;
$$
  LANGUAGE 'sql' IMMUTABLE;


  
--DROP AGGREGATE st_cascadeunion(geometry);

CREATE AGGREGATE st_cascadeunion(geometry) (
  SFUNC=st_geom_accum,
  STYPE=geometry[],
  FINALFUNC=st_unitecascade_garray_sort
);

--BEGIN TESTS --
CREATE INDEX idx_sample_poly_the_geom
  ON sample_poly
  USING gist
  (the_geom);

  
ALTER TABLE sample_poly CLUSTER ON idx_sample_poly_the_geom;
CLUSTER sample_poly;


vacuum analyze sample_poly;




--2409 ms, 2438 ms, 2422 ms )
SELECT ST_CascadeUnion(the_geom)
FROM (SELECT * FROM sample_poly limit 2000) As foo;


--2578 ms, 2563 ms, 2657 ms
SELECT ST_Union(the_geom) AS the_geom
FROM (
   SELECT min(id) AS id, ST_Union(the_geom) AS the_geom
   FROM (
     SELECT min(id) AS id, ST_Union(the_geom) AS the_geom
     FROM (
       SELECT min(id) AS id, ST_Union(the_geom) AS the_geom
       FROM (
         SELECT min(id) AS id, ST_Union(the_geom) AS the_geom
         FROM (SELECT * FROM sample_poly limit 2000) As foo
         GROUP BY round(id/10)
         ORDER BY id) AS tmp1
       GROUP BY round(id/100)
       ORDER BY id) AS tmp2
     GROUP BY round(id/1000)
     ORDER BY id) AS tmp3
   GROUP BY round(id/10000)
   ORDER BY id) AS tmp4
GROUP BY round(id/100000);

-- 7594 ms, 7672 ms, 7718 ms
SELECT ST_CascadeUnion(the_geom)
FROM (SELECT * FROM sample_poly limit 5000) As foo;

--8438 ms, 8343 ms, 8328 ms
SELECT ST_Union(the_geom) AS the_geom
FROM (
   SELECT min(id) AS id, ST_Union(the_geom) AS the_geom
   FROM (
     SELECT min(id) AS id, ST_Union(the_geom) AS the_geom
     FROM (
       SELECT min(id) AS id, ST_Union(the_geom) AS the_geom
       FROM (
         SELECT min(id) AS id, ST_Union(the_geom) AS the_geom
         FROM (SELECT * FROM sample_poly limit 5000) As foo
         GROUP BY round(id/10)
         ORDER BY id) AS tmp1
       GROUP BY round(id/100)
       ORDER BY id) AS tmp2
     GROUP BY round(id/1000)
     ORDER BY id) AS tmp3
   GROUP BY round(id/10000)
   ORDER BY id) AS tmp4
GROUP BY round(id/100000);

-- 17,921 ms, 18016 ms = .29 min
SELECT ST_CascadeUnion(the_geom)
FROM (SELECT * FROM sample_poly limit 8000) As foo;

-- 15,296 ms 15172 ms = .25 min
SELECT ST_Union(the_geom) AS the_geom
FROM (
   SELECT min(id) AS id, ST_Union(the_geom) AS the_geom
   FROM (
     SELECT min(id) AS id, ST_Union(the_geom) AS the_geom
     FROM (
       SELECT min(id) AS id, ST_Union(the_geom) AS the_geom
       FROM (
         SELECT min(id) AS id, ST_Union(the_geom) AS the_geom
         FROM (SELECT * FROM sample_poly limit 8000) As foo
         GROUP BY round(id/10)
         ORDER BY id) AS tmp1
       GROUP BY round(id/100)
       ORDER BY id) AS tmp2
     GROUP BY round(id/1000)
     ORDER BY id) AS tmp3
   GROUP BY round(id/10000)
   ORDER BY id) AS tmp4
GROUP BY round(id/100000);

-- 245,497 ms = 4.09 min
SELECT ST_CascadeUnion(the_geom)
FROM (SELECT * FROM sample_poly) As foo;

-- 77,078 ms, 75124 ms = 1.26 min
SELECT ST_Union(the_geom) AS the_geom
FROM (
   SELECT min(id) AS id, ST_Union(the_geom) AS the_geom
   FROM (
     SELECT min(id) AS id, ST_Union(the_geom) AS the_geom
     FROM (
       SELECT min(id) AS id, ST_Union(the_geom) AS the_geom
       FROM (
         SELECT min(id) AS id, ST_Union(the_geom) AS the_geom
         FROM (SELECT * FROM sample_poly) As foo
         GROUP BY round(id/10)
         ORDER BY id) AS tmp1
       GROUP BY round(id/100)
       ORDER BY id) AS tmp2
     GROUP BY round(id/1000)
     ORDER BY id) AS tmp3
   GROUP BY round(id/10000)
   ORDER BY id) AS tmp4
GROUP BY round(id/100000);
  

-----Original Message-----
From: postgis-devel-bounces at postgis.refractions.net
[mailto:postgis-devel-bounces at postgis.refractions.net] On Behalf Of Paul
Ramsey
Sent: Thursday, August 07, 2008 3:55 PM
To: PostGIS Development Discussion
Subject: Re: [postgis-devel] Lame attempt at cascade union in sql

Gods,

Your last test is a close approximation to the true cascaded union,
and that's 2 orders of magnitude faster than the naive approach.

P.

-----------------------------------------
The substance of this message, including any attachments, may be
confidential, legally privileged and/or exempt from disclosure
pursuant to Massachusetts law. It is intended
solely for the addressee. If you received this in error, please
contact the sender and delete the material from any computer.




More information about the postgis-devel mailing list