[postgis-devel] postgis_tiger geocoding extension : France & temporal geocoding

Rémi Cura remi.cura at gmail.com
Thu Jun 16 03:10:19 PDT 2016


Hey dear list,
I'm currently considering extending postgis geocoder.

I have two needs:
 - to work with French data
 - to perform temporal geocoding (you give an addresses *and *a date, you
get geographic position).

For the moment I'm trying to understand how the geocoder works with USA
data.

I imported Massachusetts for a test, and everything seems okay except that
my tiger.state table is empty, which prevents geocoding.

Notwithstanding the import step, and the adress normaliser, it seems to
boil down to a large query in the function tiger.geocode_address().


I would be delighted to have some pointers on the design of the
postgis_tiger_geocoder extension.
In perticular, I don't understand the adress standardizer business at all
(yeah, I found some doc, no, I don't understand it well)


Many thanks for help,
Cheers,
Rémi-C






-------------------------------------
WITH a AS
(
         SELECT   *
         FROM     (
                             SELECT     f.*,
                                        ad.side,
                                        ad.zip,
                                        ad.fromhn,
                                        ad.tohn,
                                        Rank() over(ORDER BY
Diff_zip(ad.zip,'01104')*2.00 +
                                        CASE
                                                   WHEN Lower(f.name) =
Lower('FRANKLIN') THEN 0
                                                   ELSE
Levenshtein_ignore_case(f.name, Lower('FRANKLIN') )
                                        END +
Levenshtein_ignore_case(f.fullname, Lower('FRANKLIN'
                                                   || ' '
                                                   || Coalesce('ST','')) ) +
                                        CASE
                                                   WHEN (

Greatest_hn(ad.fromhn,ad.tohn) % 2)::INTEGER = ('1' % 2)::INTEGER THEN 0
                                                   ELSE 1
                                        END +
                                        CASE
                                                   WHEN '1'::INTEGER
BETWEEN Least_hn(ad.fromhn,ad.tohn) AND        Greatest_hn(ad.fromhn,
ad.tohn) THEN 0
                                                   ELSE 4
                                        END +
                                        CASE
                                                   WHEN Lower('ST') =
Lower(f.suftypabrv)
                                                   OR         Lower('ST') =
Lower(f.pretypabrv) THEN 0
                                                   ELSE 1
                                        END + Rate_attributes(NULL,
f.predirabrv, 'FRANKLIN', f.name , 'ST', suftypabrv , NULL, sufdirabrv,
prequalabr) ) AS rank
                             FROM
tiger.featnames
AS f
                             inner join
tiger.addr
AS ad
                             ON         (
                                                   f.tlid = ad.tlid)
                             WHERE      '25' = f.statefp
                             AND        '25' = ad.statefp
                             AND        (
                                                   Lower(f.fullname) LIKE
(Coalesce(NULL
                                                              || ' ','')
                                                              ||
Lower('FRANKLIN')
                                                              || '%')::text
                                        OR         Lower(f.name) =
Lower('FRANKLIN')
                                        OR         Soundex(f.name) =
Soundex('FRANKLIN') )
                             AND        (
                                                   ad.zip =
ANY('{01102,01103,01104,01105,01106}'::VARCHAR[]) ) ) AS foo
         ORDER BY rank limit 30 )

, b AS (
                  SELECT     a.tlid,
                     a.fullname,
                     a.name,
                     a.predirabrv,
                     a.suftypabrv,
                     a.sufdirabrv,
                     a.prequalabr,
                     a.pretypabrv,
                     b.the_geom,
                     tfidr,
                     tfidl,
                     a.side ,
                     a.fromhn,
                     a.tohn,
                     a.zip,
                     p.name AS place
                  FROM       a
                  inner join tiger.edges AS b
                  ON         (
                            a.statefp = b.statefp
                     AND        a.tlid = b.tlid )
                  inner join tiger.faces AS f
                  ON         (
                            '25' = f.statefp
                     AND        ( (
                                      b.tfidl = f.tfid
                                   AND        a.side = 'L')
                            OR         (
                                      b.tfidr = f.tfid
                                   AND        a.side = 'R' ) ))
                  inner join tiger.place p
                  ON         (
                            '25' = p.statefp
                     AND        f.placefp = p.placefp )
                  WHERE      a.statefp = '25'
                  AND        b.statefp = '25' )

, sub AS  (
    SELECT   tlid,
           predirabrv,
           coalesce(b.prequalabr
                || ' ','' )
                || b.name AS fename,
           suftypabrv,
           sufdirabrv,
           fromhn,
           tohn,
           side,
           zip,
           rate_attributes(NULL, predirabrv, 'FRANKLIN', b.name , 'ST',
suftypabrv , NULL, sufdirabrv, prequalabr) +
           CASE
                WHEN '1'::INTEGER IS NULL
                OR       b.fromhn IS NULL THEN 20
                WHEN '1'::    INTEGER >= least_hn(b.fromhn, b.tohn)
                AND      '1'::INTEGER <= greatest_hn(b.fromhn,b.tohn)
                AND      (
                          '1'::INTEGER % 2) =
(to_number(b.fromhn,'99999999') % 2)::INTEGER THEN 0
                WHEN '1'::    INTEGER >= least_hn(b.fromhn,b.tohn)
                AND      '1'::INTEGER <= greatest_hn(b.fromhn,b.tohn) THEN 2
                ELSE ((1.0 -
(least_hn('1'::text,least_hn(b.fromhn,b.tohn)::text)::NUMERIC /
(greatest(1,greatest_hn('1'::text,greatest_hn(b.fromhn,b.tohn)::text))) ))
* 5)::INTEGER + 5
           END::INTEGER AS sub_rating,
           '1'::INTEGER >= least_hn(b.fromhn,b.tohn)
      AND      '1'::INTEGER <= greatest_hn(b.fromhn,b.tohn)
      AND      (
                '1' % 2)::NUMERIC::INTEGER =
(to_number(b.fromhn,'99999999') % 2) AS exact_address,
           b.name,
           b.prequalabr,
           b.pretypabrv,
           b.tfidr,
           b.tfidl,
           b.the_geom,
           b.place
      FROM     b
      ORDER BY 10 ,
           11 DESC limit 20 )



--, foo AS (

    SELECT DISTINCT
         ON (
                         sub.predirabrv,sub.fename,coalesce(sub.suftypabrv,
sub.pretypabrv) ,sub.sufdirabrv,sub.place,s.stusps,sub.zip) sub.predirabrv
AS fedirp,
                 sub.fename,
                 coalesce(sub.suftypabrv, sub.pretypabrv) AS fetype,
                 sub.sufdirabrv                           AS fedirs,
                 sub.place ,

 s.stusps
AS state,

 sub.zip
AS zip,
                 interpolate_from_address('1', sub.fromhn, sub.tohn,
sub.the_geom,
sub.side)
AS address_geom,
                 (sub.sub_rating + least(coalesce(diff_zip('01104' ,
sub.zip),0), 20)*2.00 + coalesce(levenshtein_ignore_case('SPRINGFIELD',
sub.place),5) )::INTEGER AS sub_rating,

 sub.exact_address
AS exact_address,
                 sub.tohn,
                 sub.fromhn
         FROM           sub
         join            tiger.state s
         ON              (
                         '25' = s.statefp)
         ORDER BY        1,
                 2,
                 3,
                 4,
                 5,
                 6,
                 7,
                 9 limit 20)

SELECT   *
FROM foo
ORDER BY sub_rating,
         exact_address DESC limit 100
-------------------------------------
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.osgeo.org/pipermail/postgis-devel/attachments/20160616/b5ece40f/attachment.html>


More information about the postgis-devel mailing list