[postgis-devel] [PostGIS] #1382: Some addresses take a long time to geocode or normalize

PostGIS trac at osgeo.org
Sat Dec 17 19:35:51 PST 2011


#1382: Some addresses take a long time to geocode or normalize
----------------------------+-----------------------------------------------
 Reporter:  raviada         |       Owner:  robe         
     Type:  defect          |      Status:  assigned     
 Priority:  medium          |   Milestone:  PostGIS 2.0.0
Component:  tiger geocoder  |     Version:  trunk        
 Keywords:                  |  
----------------------------+-----------------------------------------------

Comment(by raviada):

 select geocode_address(normalize_address('2734 21ST ST, ASTORIA, NY
 111023653'),1)

 "("(2734,E,21st,St,,,""New
 York"",NY,11235,t)",0101000020AD100000097F608BA97C52C06D6463B6F24A4440,13)"
 "("(2734,,21st,St,,,""Niagara
 Falls"",NY,14305,t)",0101000020AD100000625E1B7D62C253C06AD712AC798E4540,17)"
 "("(,,21st,St,,,Troy,NY,12180,t)",0101000020AD100000AA30B0E8D16A52C09CD542B2EC5E4540,18)"



 NOTICE:  Bad zip newzip range: '{}'
 NOTICE:  Ignore new zip range that is bad too: '{}'
 NOTICE:  Zip range based on only considering city: '{}'
 NOTICE:  stmt: WITH a AS
         ( SELECT *
                 FROM (SELECT f.*, ad.side, ad.zip, ad.fromhn, ad.tohn,
                                         RANK() OVER(ORDER BY
 diff_zip(ad.zip,'111023653') +  CASE WHEN lower(f.name) = lower('21ST')
 THEN 0 ELSE levenshtein_ignore_case(f.name, lower('21ST') )  END +
 levenshtein_ignore_case(f.fullname, lower('21ST' || ' ' ||
 COALESCE('St','')) )
                                                 + CASE WHEN
 (greatest_hn(ad.fromhn,ad.tohn) % 2)::integer = ('2734' % 2)::integer THEN
 0 ELSE 1 END
                                                 + CASE WHEN
 '2734'::integer BETWEEN least_hn(ad.fromhn,ad.tohn) AND
 greatest_hn(ad.fromhn, ad.tohn)
                                                         THEN 0 ELSE 4 END
                                                         + CASE WHEN
 lower('St') = lower(f.suftypabrv) OR lower('St') = lower(f.pretypabrv)
 THEN 0 ELSE 1 END
                                                         +
 rate_attributes(NULL, f.predirabrv,    '21ST',  f.name , 'St',
 suftypabrv , NULL,    sufdirabrv, prequalabr)
                                                         )
                                                 As rank
                                 FROM featnames As f INNER JOIN addr As ad
 ON (f.tlid = ad.tlid)
                     WHERE '36' = f.statefp AND '36' = ad.statefp
                          AND lower(f.name) = lower('21ST')     AND (
 ad.zip = ANY('{}'::varchar[]) )   ) AS foo ORDER BY rank LIMIT 3 )
         SELECT * FROM (
     SELECT DISTINCT ON (sub.predirabrv,sub.fename,COALESCE(sub.suftypabrv,
 sub.pretypabrv) ,sub.sufdirabrv,sub.place,s.stusps,sub.zip)
 sub.predirabrv   as fedirp,    sub.fename,    COALESCE(sub.suftypabrv,
 sub.pretypabrv)   as fetype,    sub.sufdirabrv   as fedirs,    sub.place ,
 s.stusps as state,    sub.zip as zip,    interpolate_from_address('2734',
 sub.fromhn,        sub.tohn, sub.the_geom, sub.side) as address_geom,
 sub.sub_rating +   least(coalesce(diff_zip('111023653' , sub.zip),0),
 10)::integer   + coalesce(levenshtein_ignore_case('ASTORIA', sub.place),5)
 as sub_rating,    sub.exact_address as exact_address, sub.tohn, sub.fromhn
 FROM (  SELECT tlid, predirabrv, COALESCE(b.prequalabr || ' ','' ) ||
 b.name As fename, suftypabrv, sufdirabrv, fromhn, tohn,
                     side,  zip, rate_attributes(NULL, predirabrv,
 '21ST',  b.name , 'St',    suftypabrv , NULL,    sufdirabrv, prequalabr) +
 CASE         WHEN '2734'::integer IS NULL OR b.fromhn IS NULL THEN 20
 WHEN '2734'::integer >= least_hn(b.fromhn, b.tohn)             AND
 '2734'::integer <= greatest_hn(b.fromhn,b.tohn)            AND
 ('2734'::integer % 2) = (to_number(b.fromhn,'99999999') % 2)::integer
 THEN 0        WHEN '2734'::integer >= least_hn(b.fromhn,b.tohn)
 AND '2734'::integer <= greatest_hn(b.fromhn,b.tohn)            THEN 2
 ELSE            ((1.0 -
 (least_hn('2734'::text,least_hn(b.fromhn,b.tohn)::text)::numeric /
 (greatest(1,greatest_hn('2734'::text,greatest_hn(b.fromhn,b.tohn)::text)))
 )) * 5)::integer + 5        END    as sub_rating,'2734'::integer >=
 least_hn(b.fromhn,b.tohn)             AND '2734'::integer <=
 greatest_hn(b.fromhn,b.tohn)             AND ('2734' %
 2)::numeric::integer = (to_number(b.fromhn,'99999999') % 2)    as
 exact_address, b.name, b.prequalabr, b.pretypabrv, b.tfidr, b.tfidl,
 b.the_geom, b.place   FROM
              (SELECT   a.tlid, a.fullname, a.name, a.predirabrv,
 a.suftypabrv, a.sufdirabrv, a.prequalabr, a.pretypabrv,
                 b.the_geom, tfidr, tfidl,
                 a.side ,
                 a.fromhn,
                 a.tohn,
                 a.zip,
                 p.name as place

                 FROM  a INNER JOIN edges As b ON (a.statefp = b.statefp
 AND a.tlid = b.tlid  )
                     INNER JOIN faces AS f ON ('36' = f.statefp AND (
 (b.tfidl = f.tfid AND a.side = 'L') OR (b.tfidr = f.tfid AND a.side = 'R'
 ) ))
                     INNER JOIN place p ON ('36' = p.statefp AND f.placefp
 = p.placefp )
                 WHERE a.statefp = '36'  AND  b.statefp = '36'

           )   As b
            ORDER BY 10 ,  11 DESC
            LIMIT 20
             ) AS sub
           JOIN state s ON ('36' = s.statefp)
             ORDER BY 1,2,3,4,5,6,7,9
           LIMIT 20) As foo ORDER BY sub_rating, exact_address DESC LIMIT
 1
 NOTICE:  SELECT DISTINCT ON
 (sub.predirabrv,sub.fename,COALESCE(sub.suftypabrv, sub.pretypabrv)
 ,sub.sufdirabrv,coalesce(p.name,zip.city,cs.name,co.name),s.stusps,sub.zip)
 sub.predirabrv   as fedirp,    sub.fename,    COALESCE(sub.suftypabrv,
 sub.pretypabrv)   as fetype,    sub.sufdirabrv   as fedirs,
 coalesce(p.name,zip.city,cs.name,co.name)::varchar as place,    s.stusps
 as state,    sub.zip as zip,    interpolate_from_address($1, sub.fromhn,
 sub.tohn, e.the_geom, sub.side) as address_geom,       sub.sub_rating +
 least((coalesce(diff_zip($7 , sub.zip),0) *1.00/2)::integer,
 coalesce(levenshtein_ignore_case($7, sub.zip),0) )  +
 coalesce(least(levenshtein_ignore_case($3,
 coalesce(p.name,zip.city,cs.name,co.name)), levenshtein_ignore_case($3,
 coalesce(cs.name,co.name))),5)    as sub_rating,    sub.exact_address as
 exact_address  FROM (  SELECT a.tlid, predirabrv, COALESCE(a.prequalabr ||
 ' ','' ) || a.name As fename, suftypabrv, sufdirabrv, fromhn, tohn,
                     side, a.statefp, zip, rate_attributes($5,
 a.predirabrv,    $2,  a.name , $4,    a.suftypabrv , $6,    a.sufdirabrv,
 a.prequalabr) +     CASE         WHEN $1::integer IS NULL OR b.fromhn IS
 NULL THEN 20        WHEN $1::integer >= least_hn(b.fromhn, b.tohn)
 AND $1::integer <= greatest_hn(b.fromhn,b.tohn)            AND
 ($1::integer % 2) = (to_number(b.fromhn,'99999999') % 2)::integer
 THEN 0        WHEN $1::integer >= least_hn(b.fromhn,b.tohn)            AND
 $1::integer <= greatest_hn(b.fromhn,b.tohn)            THEN 2        ELSE
 ((1.0 - (least_hn($1::text,least_hn(b.fromhn,b.tohn)::text)::numeric /
 greatest(1,greatest_hn($1::text,greatest_hn(b.fromhn,b.tohn)::text)))) *
 5)::integer + 5        END    as sub_rating,$1::integer >=
 least_hn(b.fromhn,b.tohn)             AND $1::integer <=
 greatest_hn(b.fromhn,b.tohn)             AND ($1 % 2)::numeric::integer =
 (to_number(b.fromhn,'99999999') % 2)    as exact_address, a.name,
 a.prequalabr, a.pretypabrv   FROM featnames a join addr b ON (a.tlid =
 b.tlid AND a.statefp = b.statefp  )  WHERE        a.statefp = '36' AND
 a.mtfcc LIKE 'S%'      AND ( soundex($2) = soundex(a.name)  OR (
 (length($2) > 15 or (length($2) > 7 AND a.prequalabr > '') ) AND
 lower(a.fullname) LIKE lower(substring($2,1,15)) || '%' ) OR
 numeric_streets_equal($2, a.name) )   ORDER BY 11  LIMIT 20    ) AS sub
 JOIN edges e ON ('36' = e.statefp AND sub.tlid = e.tlid AND e.mtfcc LIKE
 'S%' )   JOIN state s ON ('36' = s.statefp)  JOIN faces f ON ('36' =
 f.statefp AND (e.tfidl = f.tfid OR e.tfidr = f.tfid))  LEFT JOIN
 zip_lookup_base zip ON (sub.zip = zip.zip AND zip.statefp='36')  LEFT JOIN
 place p ON ('36' = p.statefp AND f.placefp = p.placefp)  LEFT JOIN county
 co ON ('36' = co.statefp AND f.countyfp = co.countyfp)  LEFT JOIN cousub
 cs ON ('36' = cs.statefp AND cs.cosbidfp = sub.statefp || co.countyfp ||
 f.cousubfp) WHERE  ( (sub.side = 'L' and e.tfidl = f.tfid) OR (sub.side =
 'R' and e.tfidr = f.tfid) )  ORDER BY 1,2,3,4,5,6,7,9 LIMIT 10
 NOTICE:  PREPARE query_base_geo(integer,
 varchar,varchar,varchar,varchar,varchar,varchar,geometry) As SELECT
 DISTINCT ON (sub.predirabrv,sub.fename,COALESCE(sub.suftypabrv,
 sub.pretypabrv)
 ,sub.sufdirabrv,coalesce(p.name,zip.city,cs.name,co.name),s.stusps,sub.zip)
 sub.predirabrv   as fedirp,    sub.fename,    COALESCE(sub.suftypabrv,
 sub.pretypabrv)   as fetype,    sub.sufdirabrv   as fedirs,
 coalesce(p.name,zip.city,cs.name,co.name)::varchar as place,    s.stusps
 as state,    sub.zip as zip,    interpolate_from_address($1, sub.fromhn,
 sub.tohn, e.the_geom, sub.side) as address_geom,       sub.sub_rating +
 least((coalesce(diff_zip($7 , sub.zip),0) *1.00/2)::integer,
 coalesce(levenshtein_ignore_case($7, sub.zip),0) )  +
 coalesce(least(levenshtein_ignore_case($3,
 coalesce(p.name,zip.city,cs.name,co.name)), levenshtein_ignore_case($3,
 coalesce(cs.name,co.name))),5)    as sub_rating,    sub.exact_address as
 exact_address  FROM (  SELECT a.tlid, predirabrv, COALESCE(a.prequalabr ||
 ' ','' ) || a.name As fename, suftypabrv, sufdirabrv, fromhn, tohn,
                     side, a.statefp, zip, rate_attributes($5,
 a.predirabrv,    $2,  a.name , $4,    a.suftypabrv , $6,    a.sufdirabrv,
 a.prequalabr) +     CASE         WHEN $1::integer IS NULL OR b.fromhn IS
 NULL THEN 20        WHEN $1::integer >= least_hn(b.fromhn, b.tohn)
 AND $1::integer <= greatest_hn(b.fromhn,b.tohn)            AND
 ($1::integer % 2) = (to_number(b.fromhn,'99999999') % 2)::integer
 THEN 0        WHEN $1::integer >= least_hn(b.fromhn,b.tohn)            AND
 $1::integer <= greatest_hn(b.fromhn,b.tohn)            THEN 2        ELSE
 ((1.0 - (least_hn($1::text,least_hn(b.fromhn,b.tohn)::text)::numeric /
 greatest(1,greatest_hn($1::text,greatest_hn(b.fromhn,b.tohn)::text)))) *
 5)::integer + 5        END    as sub_rating,$1::integer >=
 least_hn(b.fromhn,b.tohn)             AND $1::integer <=
 greatest_hn(b.fromhn,b.tohn)             AND ($1 % 2)::numeric::integer =
 (to_number(b.fromhn,'99999999') % 2)    as exact_address, a.name,
 a.prequalabr, a.pretypabrv   FROM featnames a join addr b ON (a.tlid =
 b.tlid AND a.statefp = b.statefp  )  WHERE        a.statefp = '36' AND
 a.mtfcc LIKE 'S%'      AND ( soundex($2) = soundex(a.name)  OR (
 (length($2) > 15 or (length($2) > 7 AND a.prequalabr > '') ) AND
 lower(a.fullname) LIKE lower(substring($2,1,15)) || '%' ) OR
 numeric_streets_equal($2, a.name) )   ORDER BY 11  LIMIT 20    ) AS sub
 JOIN edges e ON ('36' = e.statefp AND sub.tlid = e.tlid AND e.mtfcc LIKE
 'S%' )   JOIN state s ON ('36' = s.statefp)  JOIN faces f ON ('36' =
 f.statefp AND (e.tfidl = f.tfid OR e.tfidr = f.tfid))  LEFT JOIN
 zip_lookup_base zip ON (sub.zip = zip.zip AND zip.statefp='36')  LEFT JOIN
 place p ON ('36' = p.statefp AND f.placefp = p.placefp)  LEFT JOIN county
 co ON ('36' = co.statefp AND f.countyfp = co.countyfp)  LEFT JOIN cousub
 cs ON ('36' = cs.statefp AND cs.cosbidfp = sub.statefp || co.countyfp ||
 f.cousubfp) WHERE  ( (sub.side = 'L' and e.tfidl = f.tfid) OR (sub.side =
 'R' and e.tfidr = f.tfid) )  ORDER BY 1,2,3,4,5,6,7,9 LIMIT 10
 NOTICE:  EXECUTE
 query_base_geo(2734,'21ST','ASTORIA','St',NULL,NULL,'111023653',NULL);
 NOTICE:  DEALLOCATE query_base_geo;
 Total query runtime: 14172 ms.
 3 rows retrieved.

-- 
Ticket URL: <http://trac.osgeo.org/postgis/ticket/1382#comment:7>
PostGIS <http://trac.osgeo.org/postgis/>
The PostGIS Trac is used for bug, enhancement & task tracking, a user and developer wiki, and a view into the subversion code repository of PostGIS project.


More information about the postgis-devel mailing list