[postgis-devel] KNN Box Op <#>

Paragon Corporation lr at pcorp.us
Wed Sep 28 02:44:33 PDT 2011


> -----Original Message-----
> From: postgis-devel-bounces at postgis.refractions.net 
> [mailto:postgis-devel-bounces at postgis.refractions.net] On 
> Behalf Of Paul Ramsey
> Sent: Tuesday, September 27, 2011 5:16 PM
> To: PostGIS Development Discussion
> Subject: [postgis-devel] KNN Box Op <#>
> 
> OK, I have a box op, <#>, but OsGeo SVN is hosed right now, 
> so for keeners I attach a patch while I go off to take care 
> of kiddies for a few hours.
> 
> P.
> 
-- How does the index stuff kick in.  I assume we still need to do
ST_DWithin etc. checks but can just make that balloon bigger? 
I didn't see any apparent use in the plan saying anything about pulling from
index etc?

Okay took a quick stab at comparing these.  Not a big test so somewhat
anectdotal.

For street index (line / line), the box check seems more accurate.  For
street, point again box is more accurate over centroid.
If I acutally look at the sorting order -- the box distance much more
closely mirrors that of the actual distance for line/line line/point
(of street length type features)

Haven't played much with the polygons.  For largish polygons as expected
both are bad, but the box distance seems much worse than the box centroid at
lease for this very very small sampling.  It rpobably varies a lot by
percent area taken up by the geometry in its box.


Anyrate Here is a small sampling (consisting of 25,000 roads).

--compare ranking of proximity based on real distance, box distance,
centroid box distance
-- line / line
SELECT b.tlid, ROW_NUMBER() OVER(ORDER BY ST_Distance(b.geom, d.geom),
b.tlid) As r_dist, 
    ROW_NUMBER() OVER( ORDER BY b.geom <#> d.geom, b.tlid) As r_box,
    ROW_NUMBER() OVER( ORDER BY b.geom <-> d.geom, b.tlid) As r_cbox
    FROM bos_roads As b 
    CROSS JOIN (SELECT geom FROM bos_roads WHERE tlid = 85732029) As d 
ORDER BY ST_Distance(b.geom, d.geom), b.tlid LIMIT 10;

   tlid    | r_dist | r_box | r_cbox
-----------+--------+-------+--------
  85732027 |      1 |     1 |      5
  85732029 |      2 |     2 |      1
  85732031 |      3 |     3 |      4
  85734335 |      4 |     4 |      2
  85736037 |      5 |     5 |      7
 624683742 |      6 |     6 |      3
  85719343 |      7 |     9 |      9
  85741826 |      8 |     7 |      8
  85732032 |      9 |    10 |     13
  85735592 |     10 |     8 |      6

--compare ranking of proximity based on real distance, box distance,
centroid box distance
-- line / point
SELECT b.tlid, ROW_NUMBER() OVER(ORDER BY ST_Distance(b.geom, d.geom),
b.tlid) As r_dist, 
    ROW_NUMBER() OVER( ORDER BY b.geom <#> d.geom, b.tlid) As r_box,
    ROW_NUMBER() OVER( ORDER BY b.geom <-> d.geom, b.tlid) As r_cbox
    FROM bos_roads As b 
    CROSS JOIN (SELECT ST_STartPoint(geom) As geom FROM bos_roads WHERE tlid
= 85732029) As d 
ORDER BY ST_Distance(b.geom, d.geom), b.tlid LIMIT 10;

   tlid    | r_dist | r_box | r_cbox
-----------+--------+-------+--------
  85732027 |      1 |     1 |      1
  85732029 |      2 |     2 |      3
  85736037 |      3 |     3 |      2
 624683742 |      4 |     4 |     15
  85719343 |      5 |     6 |      4
  85741826 |      6 |     5 |      5
  85727279 |      7 |     7 |      7
  85727286 |      8 |     8 |     11
  85742522 |      9 |     9 |      6
  85734378 |     10 |    10 |      9


-- line / line box distance -- 98 out of 100 caught
WITH d AS
    (SELECT geom FROM bos_roads WHERE tlid = 85732029) 
SELECT count(*)
FROM (SELECT b.tlid
FROM bos_roads As b 
    CROSS JOIN d
ORDER BY b.geom <#> d.geom,b.tlid LIMIT 100) As foo
WHERE foo.tlid IN(SELECT b.tlid
FROM bos_roads As b 
    CROSS JOIN d 
ORDER BY ST_Distance(b.geom, d.geom),b.tlid LIMIT 100);

-- line / line centroid box distance -- 96 out of 100 caught
WITH d AS
    (SELECT geom FROM bos_roads WHERE tlid = 85732029) 
SELECT count(*)
FROM (SELECT b.tlid
FROM bos_roads As b 
    CROSS JOIN d
ORDER BY b.geom <-> d.geom,b.tlid LIMIT 100) As foo
WHERE foo.tlid IN(SELECT b.tlid
FROM bos_roads As b 
    CROSS JOIN d 
ORDER BY ST_Distance(b.geom, d.geom),b.tlid LIMIT 100);


-- line / point box distance -- 99 out of 100 caught
WITH d AS
    (SELECT ST_StartPoint(geom) As geom FROM bos_roads WHERE tlid =
85732029) 
SELECT count(*)
FROM (SELECT b.tlid
FROM bos_roads As b 
    CROSS JOIN d
ORDER BY b.geom <#> d.geom,b.tlid LIMIT 100) As foo
WHERE foo.tlid IN(SELECT b.tlid
FROM bos_roads As b 
    CROSS JOIN d 
ORDER BY ST_Distance(b.geom, d.geom),b.tlid LIMIT 100);


-- line / point centroid box distance -- 95 out of 100 caught
WITH d AS
    (SELECT ST_StartPoint(geom) As geom FROM bos_roads WHERE tlid =
85732029) 
SELECT count(*)
FROM (SELECT b.tlid
FROM bos_roads As b 
    CROSS JOIN d
ORDER BY b.geom <-> d.geom,b.tlid LIMIT 100) As foo
WHERE foo.tlid IN(SELECT b.tlid
FROM bos_roads As b 
    CROSS JOIN d 
ORDER BY ST_Distance(b.geom, d.geom),b.tlid LIMIT 100);


-- line / polygon box distance -- 8 out of 100 caught
WITH d AS
    (SELECT geom FROM nei WHERE nei_name = 'Dorchester') ,
-- real matches
    mat As (SELECT b.tlid, ST_Distance(b.geom, d.geom) As dist
FROM bos_roads As b 
     INNER JOIN d ON ( NOT ST_Intersects(b.geom, d.geom) AND
ST_DWithin(b.geom,d.geom,1000) )
     ORDER BY  ST_Distance(b.geom, d.geom), b.tlid LIMIT 100
     )
-- approximates 8	1.48770742881592,	86.5991984830209,
89.7872195676031
SELECT count(*), min(dist), max(dist), (SELECT max(dist) fROM mat) As
mat_max
FROM (SELECT b.tlid, ST_Distance(b.geom, d.geom) As dist
FROM bos_roads As b 
    INNER JOIN d ON (NOT ST_Intersects(b.geom, d.geom) AND
ST_DWithin(b.geom,d.geom,1000) )
ORDER BY b.geom <#> d.geom, b.tlid LIMIT 100) As foo
WHERE foo.tlid IN( SELECT tlid FROM mat);


-- line / polygon box centroid distance -- 28 out of 100 caught
WITH d AS
    (SELECT geom FROM nei WHERE nei_name = 'Dorchester') ,
-- real matches
    mat As (SELECT b.tlid, ST_Distance(b.geom, d.geom) As dist
FROM bos_roads As b 
     INNER JOIN d ON ( NOT ST_Intersects(b.geom, d.geom) AND
ST_DWithin(b.geom,d.geom,1000) )
     ORDER BY  ST_Distance(b.geom, d.geom), b.tlid LIMIT 100
     )
-- approximates (28, 0.826181843053216, 84.4177305750625)
SELECT count(*), min(dist), max(dist)
FROM (SELECT b.tlid, ST_Distance(b.geom, d.geom) As dist
FROM bos_roads As b 
    INNER JOIN d ON (NOT ST_Intersects(b.geom, d.geom) AND
ST_DWithin(b.geom,d.geom,1000) )
ORDER BY b.geom <-> d.geom, b.tlid LIMIT 100) As foo
WHERE foo.tlid IN( SELECT tlid FROM mat);








More information about the postgis-devel mailing list