[SCM] PostGIS branch master updated. 3.6.0rc2-383-gbd6ef65e8

git at osgeo.org git at osgeo.org
Mon Mar 9 13:27:31 PDT 2026


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "PostGIS".

The branch, master has been updated
       via  bd6ef65e8d418d17064225b182347478ebe84b11 (commit)
      from  08054a171b90f96bc4ad4ba89ada81c4709cb141 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit bd6ef65e8d418d17064225b182347478ebe84b11
Author: Darafei Praliaskouski <me at komzpa.net>
Date:   Tue Mar 10 00:25:07 2026 +0400

    #2459 Improve tiger geocoder wrapper normalization
    
    Speed up pagc_normalize_address with a single standardize_address() call while preserving ZIP+4, country, and ZIP-only fallbacks. Convert several expression-only tiger wrappers to SQL where install order allows, preserve extension schema qualifications, and refresh the related docs and regression coverage.
    
    Closes #2459

diff --git a/NEWS b/NEWS
index c6d77cc77..6d6847db0 100644
--- a/NEWS
+++ b/NEWS
@@ -41,9 +41,16 @@ This version requires GEOS 3.10 or higher
  - #4828, geometry_columns handles NOT VALID SRID checks without errors (Darafei Praliaskouski)
  - #6048, [raster] ST_Clip no longer crashes when clipping sparse band
           selections (Darafei Praliaskouski)
+ - #2459, [tiger geocoder] Speed up pagc_normalize_address by avoiding
+          redundant wrapper parsing while preserving ZIP+4 and country
+          details from the structured parser (Darafei Praliaskouski)
+ - [tiger geocoder] Replace several expression-only PL/pgSQL wrappers with
+          SQL functions in helper and text geocode entry points while
+          preserving extension schema qualifications
+          (Darafei Praliaskouski)
  - #1461, [tiger geocoder] geocode_intersection now tolerates irregular
           spacing in highway names such as `I- 635` vs `I-635`
-          (OpenAI)
+          (Darafei Praliaskouski)
  - #1599, [address_standardizer] parse_address() and normalize_address() now
           canonicalize trailing country tokens to ISO 3166-1 alpha-2 codes and
           expose country on normalized addresses
diff --git a/doc/extras_tigergeocoder.xml b/doc/extras_tigergeocoder.xml
index 2889f6286..ec4906a2e 100644
--- a/doc/extras_tigergeocoder.xml
+++ b/doc/extras_tigergeocoder.xml
@@ -1104,9 +1104,7 @@ FROM pagc_normalize_address('9000 E ROO ST STE 999, Springfield, CO') AS addy;
 ---------+--------------+------------+------------------+---------------+-----------+-------------+-------------+-----+--------
     9000 | E            | ROO        | ST               |               | SUITE 999 | SPRINGFIELD | CO          |     | t</programlisting>
 
-            <para>Batch call.  There are currently speed issues with the way postgis_tiger_geocoder wraps the address_standardizer.  These will hopefully
-be resolved in later editions.  To work around them, if you need speed for batch geocoding to call generate a normaddy  in batch mode, you are encouraged
-to directly call the address_standardizer standardize_address function as shown below which is similar exercise to what we did in <xref linkend="Normalize_Address"/> that uses data created in <xref linkend="Geocode"/>.</para>
+            <para>Batch call. <function>pagc_normalize_address</function> now follows the same single-address standardizer path internally, but for large set-based geocoding jobs you may still prefer to call <function>standardize_address</function> directly when materializing <varname>norm_addy</varname> rows in SQL, as shown below. This is similar to the exercise in <xref linkend="Normalize_Address"/> that uses data created in <xref linkend="Geocode"/>.</para>
 
     <programlisting>WITH g AS (SELECT address, ROW((sa).house_num, (sa).predir, (sa).name
   , (sa).suftype, (sa).sufdir, (sa).unit , (sa).city, (sa).state, (sa).postcode, true)::norm_addy As na
diff --git a/extensions/address_standardizer/address_standardizer.c b/extensions/address_standardizer/address_standardizer.c
index 5adb43666..7eee099b0 100644
--- a/extensions/address_standardizer/address_standardizer.c
+++ b/extensions/address_standardizer/address_standardizer.c
@@ -1,3 +1,7 @@
+/*
+ * Copyright (c) 2026 Darafei Praliaskouski <me at komzpa.net>
+ */
+
 #include "postgres.h"
 #include "fmgr.h"
 #include "funcapi.h"
@@ -496,6 +500,22 @@ parse_address_wrapper_input(const char *function_name, const char *raw_address,
 	return parsed_address;
 }
 
+/*
+ * Preserve ZIP+4 details from structured parses so callers can reuse the
+ * standardized postcode without reparsing the original address text.
+ */
+static char *
+build_postcode_component(const ADDRESS *parsed_address)
+{
+	if (!parsed_address || !parsed_address->zip || parsed_address->zip[0] == '\0')
+		return NULL;
+
+	if (parsed_address->zipplus && parsed_address->zipplus[0] != '\0')
+		return psprintf("%s-%s", parsed_address->zip, parsed_address->zipplus);
+
+	return pstrdup(parsed_address->zip);
+}
+
 /*
  * The explicit SQL macro argument models city/state/postcode text, not a full
  * address. parseaddress() mostly gives us that split already, but when a city
@@ -523,6 +543,10 @@ parse_macro_input(const char *function_name, const char *raw_macro)
 	return parsed_macro;
 }
 
+/*
+ * Diagnostic entry point that returns tokenization, rule matching, and the
+ * final standardized address for either split or one-line input.
+ */
 PG_FUNCTION_INFO_V1(debug_standardize_address);
 
 Datum
@@ -751,6 +775,7 @@ standardize_address(PG_FUNCTION_ARGS)
 	char **values;
 	HeapTuple tuple;
 	ADDRESS *parsed_macro = NULL;
+	char *postcode = NULL;
 
 	DBG("Start standardize_address");
 
@@ -777,9 +802,10 @@ standardize_address(PG_FUNCTION_ARGS)
 
 	if (parsed_macro)
 	{
+		postcode = build_postcode_component(parsed_macro);
 		DBG("calling std_standardize('%s', ... parsed macro)", micro);
 		stdaddr = std_standardize(
-		    std, micro, parsed_macro->city, parsed_macro->st, parsed_macro->zip, parsed_macro->cc, 0);
+		    std, micro, parsed_macro->city, parsed_macro->st, postcode, parsed_macro->cc, 0);
 	}
 	else
 	{
@@ -806,6 +832,10 @@ standardize_address(PG_FUNCTION_ARGS)
 	PG_RETURN_DATUM(result);
 }
 
+/*
+ * One-line standardization entry point. Parse the raw address first so the
+ * structured postcode and country can be preserved during normalization.
+ */
 PG_FUNCTION_INFO_V1(standardize_address1);
 
 Datum
@@ -824,6 +854,7 @@ standardize_address1(PG_FUNCTION_ARGS)
 	char **values;
 	HeapTuple tuple;
 	ADDRESS *parsed_address;
+	char *postcode;
 
 	DBG("Start standardize_address");
 
@@ -840,6 +871,7 @@ standardize_address1(PG_FUNCTION_ARGS)
 	attinmeta = TupleDescGetAttInMetadata(tuple_desc);
 
 	parsed_address = parse_address_wrapper_input(__func__, addr, &micro, NULL);
+	postcode = build_postcode_component(parsed_address);
 
 	DBG("calling GetStdUsingFCInfo(fcinfo, '%s', '%s', '%s')", lextab, gaztab, rultab);
 	std = GetStdUsingFCInfo(fcinfo, lextab, gaztab, rultab);
@@ -848,7 +880,7 @@ standardize_address1(PG_FUNCTION_ARGS)
 
 	DBG("calling std_standardize('%s', ...)", micro);
 	stdaddr = std_standardize(
-	    std, micro, parsed_address->city, parsed_address->st, parsed_address->zip, parsed_address->cc, 0);
+	    std, micro, parsed_address->city, parsed_address->st, postcode, parsed_address->cc, 0);
 
 	DBG("back from fetch_stdaddr");
 
diff --git a/extensions/address_standardizer/expected/test-standardize_address_1.out b/extensions/address_standardizer/expected/test-standardize_address_1.out
index ff6dbd83d..849732c2e 100644
--- a/extensions/address_standardizer/expected/test-standardize_address_1.out
+++ b/extensions/address_standardizer/expected/test-standardize_address_1.out
@@ -58,6 +58,18 @@ SELECT '#5695a' AS ticket, * FROM standardize_address('us_lex', 'us_gaz', 'us_ru
  #5695a |          | 1         | EAST   |      |         | PIMA | STREET  |        |            |       | TUCSON | ARIZONA | US      |          |     | SUITE 999
 (1 row)
 
+SELECT '#2459a' AS ticket, * FROM standardize_address('us_lex', 'us_gaz', 'us_rules', '26 Court Street, Boston, Massachusetts 02109, France');
+ ticket | building | house_num | predir | qual | pretype | name  | suftype | sufdir | ruralroute | extra |  city  |     state     | country | postcode | box | unit 
+--------+----------+-----------+--------+------+---------+-------+---------+--------+------------+-------+--------+---------------+---------+----------+-----+------
+ #2459a |          | 26        |        |      |         | COURT | STREET  |        |            |       | BOSTON | MASSACHUSETTS | FR      | 02109    |     | 
+(1 row)
+
+SELECT '#2459b' AS ticket, * FROM standardize_address('us_lex', 'us_gaz', 'us_rules', '212 3rd Ave N, MINNEAPOLIS, MN 553404');
+ ticket | building | house_num | predir | qual | pretype | name | suftype | sufdir | ruralroute | extra |    city     |   state   | country | postcode | box | unit 
+--------+----------+-----------+--------+------+---------+------+---------+--------+------------+-------+-------------+-----------+---------+----------+-----+------
+ #2459b |          | 212       |        |      |         | 3    | AVENUE  | NORTH  |            |       | MINNEAPOLIS | MINNESOTA | US      | 55340-4  |     | 
+(1 row)
+
 DO $$
 BEGIN
 	PERFORM standardize_address('us_lex', 'us_gaz', 'us_rules', '   ');
diff --git a/extensions/address_standardizer/standard.c b/extensions/address_standardizer/standard.c
index fbd35a319..f88e8af31 100644
--- a/extensions/address_standardizer/standard.c
+++ b/extensions/address_standardizer/standard.c
@@ -7,6 +7,7 @@ Prototype 7H08 (This file was written by Walter Sinclair).
 This file is part of PAGC.
 
 Copyright (c) 2009 Walter Bruce Sinclair
+Copyright (c) 2026 Darafei Praliaskouski <me at komzpa.net>
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 
@@ -452,31 +453,53 @@ STDADDR *std_standardize_one(STANDARDIZER *std, char *address_one_line, int opti
 }
 */
 
+/* Replace one STDADDR field with a caller-supplied structured component. */
+static void
+replace_stdaddr_component(char **field, const char *value)
+{
+	if (!field)
+		return;
+
+	if (*field)
+	{
+		free(*field);
+		*field = NULL;
+	}
+
+	if (value && value[0] != '\0')
+		*field = strdup(value);
+}
+
 /*
  * The string-based analyzer can collapse a supplied state into the city slot
- * when there is no city component. When the caller provides structured macro
- * components, preserve that split in the final STDADDR.
+ * when there is no city component. When callers provide structured city/state/
+ * postcode/country pieces, restore those values onto the final STDADDR.
  */
 static void
-apply_component_split_to_stdaddr(STDADDR *stdaddr, const char *city, const char *state)
+apply_component_values_to_stdaddr(
+    STDADDR *stdaddr, const char *city, const char *state, const char *postcode, const char *country)
 {
 	if (!stdaddr)
 		return;
 
-	if ((city && city[0] != '\0') || !state || state[0] == '\0')
-		return;
-
-	if (stdaddr->state && stdaddr->state[0] != '\0')
-		return;
-
-	if (stdaddr->city && stdaddr->city[0] != '\0')
+	if ((!city || city[0] == '\0') && state && state[0] != '\0' && (!stdaddr->state || stdaddr->state[0] == '\0'))
 	{
-		stdaddr->state = stdaddr->city;
-		stdaddr->city = NULL;
-		return;
+		if (stdaddr->city && stdaddr->city[0] != '\0')
+		{
+			stdaddr->state = stdaddr->city;
+			stdaddr->city = NULL;
+		}
+		else
+		{
+			stdaddr->state = strdup(state);
+		}
 	}
 
-	stdaddr->state = strdup(state);
+	if (postcode && postcode[0] != '\0')
+		replace_stdaddr_component(&stdaddr->postcode, postcode);
+
+	if (country && country[0] != '\0')
+		replace_stdaddr_component(&stdaddr->country, country);
 }
 
 /*
@@ -617,7 +640,7 @@ std_standardize(STANDARDIZER *std,
 	if (macro)
 		free(macro);
 
-	apply_component_split_to_stdaddr(stdaddr, city, state);
+	apply_component_values_to_stdaddr(stdaddr, city, state, postcode, country);
 	return stdaddr;
 }
 
diff --git a/extensions/address_standardizer/test-standardize_address_1.sql.in b/extensions/address_standardizer/test-standardize_address_1.sql.in
index b67017e57..7084c9384 100644
--- a/extensions/address_standardizer/test-standardize_address_1.sql.in
+++ b/extensions/address_standardizer/test-standardize_address_1.sql.in
@@ -8,6 +8,8 @@ SELECT '#state_only_macro' AS ticket, * FROM standardize_address('us_lex','us_ga
 SELECT '#5299a' AS ticket, * FROM standardize_address('us_lex',  'us_gaz', 'us_rules','1 Timepiece Point','Boston, MA, 02220');
 SELECT '#5299b' AS ticket, * FROM standardize_address('us_lex',  'us_gaz', 'us_rules','50 Gold Piece Drive','Boston, MA, 02020');
 SELECT '#5695a' AS ticket, * FROM standardize_address('us_lex', 'us_gaz', 'us_rules', 'ONE E PIMA ST STE 999, TUCSON, AZ');
+SELECT '#2459a' AS ticket, * FROM standardize_address('us_lex', 'us_gaz', 'us_rules', '26 Court Street, Boston, Massachusetts 02109, France');
+SELECT '#2459b' AS ticket, * FROM standardize_address('us_lex', 'us_gaz', 'us_rules', '212 3rd Ave N, MINNEAPOLIS, MN 553404');
 DO $$
 BEGIN
 	PERFORM standardize_address('us_lex', 'us_gaz', 'us_rules', '   ');
diff --git a/extensions/postgis_tiger_geocoder/Makefile.in b/extensions/postgis_tiger_geocoder/Makefile.in
index c103b3069..a4335489a 100644
--- a/extensions/postgis_tiger_geocoder/Makefile.in
+++ b/extensions/postgis_tiger_geocoder/Makefile.in
@@ -126,11 +126,11 @@ sql/$(EXTENSION)--$(EXTVERSION)next--$(EXTVERSION).sql: sql/$(EXTENSION)--$(EXTV
 sql/tiger_geocoder_minor.sql.in: ../../extras/tiger_geocoder/utility/set_search_path.sql \
   sql_bits/upgrade_before.sql.in \
   	../../extras/tiger_geocoder/geocode_settings.sql \
-  ../../extras/tiger_geocoder/tiger_loader_2025.sql \
+	../../extras/tiger_geocoder/tiger_loader_2025.sql \
 	../../extras/tiger_geocoder/utility/utmzone.sql \
 	../../extras/tiger_geocoder/utility/cull_null.sql \
-	../../extras/tiger_geocoder/utility/nullable_levenshtein.sql \
 	../../extras/tiger_geocoder/utility/levenshtein_ignore_case.sql \
+	../../extras/tiger_geocoder/utility/nullable_levenshtein.sql \
 	../../extras/tiger_geocoder/normalize/end_soundex.sql \
 	../../extras/tiger_geocoder/normalize/count_words.sql \
 	../../extras/tiger_geocoder/normalize/state_extract.sql \
@@ -166,8 +166,8 @@ sql/tiger_geocoder.sql.in: sql_bits/norm_addy_create.sql.in \
 	../../extras/tiger_geocoder/utility/set_search_path.sql \
 	../../extras/tiger_geocoder/utility/utmzone.sql \
 	../../extras/tiger_geocoder/utility/cull_null.sql \
-	../../extras/tiger_geocoder/utility/nullable_levenshtein.sql \
 	../../extras/tiger_geocoder/utility/levenshtein_ignore_case.sql \
+	../../extras/tiger_geocoder/utility/nullable_levenshtein.sql \
 	../../extras/tiger_geocoder/normalize/end_soundex.sql \
 	../../extras/tiger_geocoder/normalize/count_words.sql \
 	../../extras/tiger_geocoder/normalize/state_extract.sql \
diff --git a/extras/tiger_geocoder/create_geocode.sql b/extras/tiger_geocoder/create_geocode.sql
index daaa98f33..9f48e8766 100644
--- a/extras/tiger_geocoder/create_geocode.sql
+++ b/extras/tiger_geocoder/create_geocode.sql
@@ -39,8 +39,8 @@ CREATE TYPE norm_addy AS (
 -- System/General helper functions
 \i utility/utmzone.sql
 \i utility/cull_null.sql
-\i utility/nullable_levenshtein.sql
 \i utility/levenshtein_ignore_case.sql
+\i utility/nullable_levenshtein.sql
 
 ---- Address normalizer
 -- General helpers
diff --git a/extras/tiger_geocoder/geocode/geocode.sql b/extras/tiger_geocoder/geocode/geocode.sql
index 34df34fd7..b1c547fe8 100644
--- a/extras/tiger_geocoder/geocode/geocode.sql
+++ b/extras/tiger_geocoder/geocode/geocode.sql
@@ -1,42 +1,5 @@
-CREATE OR REPLACE FUNCTION geocode(
-    input VARCHAR, max_results integer DEFAULT 10,
-    restrict_geom geometry DEFAULT NULL,
-    OUT ADDY NORM_ADDY,
-    OUT GEOMOUT GEOMETRY,
-    OUT RATING INTEGER
-) RETURNS SETOF RECORD
-AS $_$
-DECLARE
-  rec RECORD;
-BEGIN
-
-  IF input IS NULL THEN
-    RETURN;
-  END IF;
-
-  -- Pass the input string into the address normalizer
-  ADDY := normalize_address(input);
-  IF NOT ADDY.parsed THEN
-    RETURN;
-  END IF;
-
-/*  FOR rec IN SELECT * FROM geocode(ADDY)
-  LOOP
-
-    ADDY := rec.addy;
-    GEOMOUT := rec.geomout;
-    RATING := rec.rating;
-
-    RETURN NEXT;
-  END LOOP;*/
-
-  RETURN QUERY SELECT g.addy, g.geomout, g.rating FROM geocode(ADDY, max_results, restrict_geom) As g ORDER BY g.rating;
-
-END;
-$_$ LANGUAGE plpgsql COST 1000
-STABLE PARALLEL SAFE
-ROWS 1;
-
+-- Geocode an already parsed address, preferring precise address matches and
+-- falling back to broader location matches when needed.
 CREATE OR REPLACE FUNCTION geocode(
     IN_ADDY NORM_ADDY,
     max_results integer DEFAULT 10,
@@ -131,3 +94,30 @@ $_$ LANGUAGE plpgsql
 COST 1000
 STABLE PARALLEL SAFE
 ROWS 1;
+
+-- Normalize free-form input once, then only delegate to the structured
+-- geocoder overload when parsing succeeds.
+CREATE OR REPLACE FUNCTION geocode(
+    input VARCHAR, max_results integer DEFAULT 10,
+    restrict_geom geometry DEFAULT NULL,
+    OUT ADDY NORM_ADDY,
+    OUT GEOMOUT GEOMETRY,
+    OUT RATING INTEGER
+) RETURNS SETOF RECORD
+AS $$
+  WITH normalized AS (
+      SELECT @extschema at .normalize_address(input) AS addy
+      WHERE input IS NOT NULL
+  ),
+  parsed AS (
+      SELECT addy
+      FROM normalized
+      WHERE (addy).parsed
+  )
+  SELECT g.addy, g.geomout, g.rating
+  FROM parsed
+    CROSS JOIN LATERAL @extschema at .geocode(addy, max_results, restrict_geom) AS g
+  ORDER BY g.rating;
+$$ LANGUAGE sql COST 1000
+STABLE PARALLEL SAFE
+ROWS 1;
diff --git a/extras/tiger_geocoder/geocode/rate_attributes.sql b/extras/tiger_geocoder/geocode/rate_attributes.sql
index 1949dcbde..1e19be911 100644
--- a/extras/tiger_geocoder/geocode/rate_attributes.sql
+++ b/extras/tiger_geocoder/geocode/rate_attributes.sql
@@ -1,31 +1,3 @@
--- rate_attributes(dirpA, dirpB, streetNameA, streetNameB, streetTypeA,
--- streetTypeB, dirsA, dirsB, locationA, locationB)
--- Rates the street based on the given attributes.  The locations must be
--- non-null.  The other eight values are handled by the other rate_attributes
--- function, so it's requirements must also be met.
--- changed: 2010-10-18 Regina Obe - all references to verbose to var_verbose since causes compile errors in 9.0
--- changed: 2011-06-25 revise to use real named args and fix direction rating typo
-CREATE OR REPLACE FUNCTION rate_attributes(dirpA VARCHAR, dirpB VARCHAR, streetNameA VARCHAR, streetNameB VARCHAR,
-    streetTypeA VARCHAR, streetTypeB VARCHAR, dirsA VARCHAR, dirsB VARCHAR,  locationA VARCHAR, locationB VARCHAR, prequalabr VARCHAR) RETURNS INTEGER
-AS $_$
-DECLARE
-  result INTEGER := 0;
-  locationWeight INTEGER := 14;
-  var_verbose BOOLEAN := FALSE;
-BEGIN
-  IF locationA IS NOT NULL AND locationB IS NOT NULL THEN
-    result := tiger.levenshtein_ignore_case(locationA, locationB);
-  ELSE
-    IF var_verbose THEN
-      RAISE NOTICE 'rate_attributes() - Location names cannot be null!';
-    END IF;
-    RETURN NULL;
-  END IF;
-  result := result + rate_attributes($1, $2, streetNameA, streetNameB, $5, $6, $7, $8,prequalabr);
-  RETURN result;
-END;
-$_$ LANGUAGE plpgsql IMMUTABLE;
-
 -- rate_attributes(dirpA, dirpB, streetNameA, streetNameB, streetTypeA,
 -- streetTypeB, dirsA, dirsB)
 -- Rates the street based on the given attributes.  Only streetNames are
@@ -76,3 +48,20 @@ BEGIN
   return result;
 END;
 $_$ LANGUAGE plpgsql IMMUTABLE;
+
+-- rate_attributes(dirpA, dirpB, streetNameA, streetNameB, streetTypeA,
+-- streetTypeB, dirsA, dirsB, locationA, locationB)
+-- Rates the street based on the given attributes.  The locations must be
+-- non-null.  The other eight values are handled by the other rate_attributes
+-- function, so it's requirements must also be met.
+-- changed: 2010-10-18 Regina Obe - all references to verbose to var_verbose since causes compile errors in 9.0
+-- changed: 2011-06-25 revise to use real named args and fix direction rating typo
+CREATE OR REPLACE FUNCTION rate_attributes(dirpA VARCHAR, dirpB VARCHAR, streetNameA VARCHAR, streetNameB VARCHAR,
+    streetTypeA VARCHAR, streetTypeB VARCHAR, dirsA VARCHAR, dirsB VARCHAR,  locationA VARCHAR, locationB VARCHAR, prequalabr VARCHAR) RETURNS INTEGER
+AS $$
+  SELECT CASE
+      WHEN locationA IS NULL OR locationB IS NULL THEN NULL
+      ELSE @extschema at .levenshtein_ignore_case(locationA, locationB)
+         + @extschema at .rate_attributes($1, $2, streetNameA, streetNameB, $5, $6, $7, $8, prequalabr)
+  END;
+$$ LANGUAGE sql IMMUTABLE;
diff --git a/extras/tiger_geocoder/normalize/end_soundex.sql b/extras/tiger_geocoder/normalize/end_soundex.sql
index 36a8bda56..be9e71c6a 100644
--- a/extras/tiger_geocoder/normalize/end_soundex.sql
+++ b/extras/tiger_geocoder/normalize/end_soundex.sql
@@ -2,16 +2,8 @@
 -- Words are allowed to be separated by space, comma, period, new-line
 -- tab or form feed.
 CREATE OR REPLACE FUNCTION end_soundex(VARCHAR) RETURNS VARCHAR
-AS $_$
-DECLARE
-  tempString VARCHAR;
-BEGIN
-  tempString := substring($1, E'[ ,.\n\t\f]([a-zA-Z0-9]*)$');
-  IF tempString IS NOT NULL THEN
-    tempString := @extschema:fuzzystrmatch at .soundex(tempString);
-  ELSE
-    tempString := @extschema:fuzzystrmatch at .soundex($1);
-  END IF;
-  return tempString;
-END;
-$_$ LANGUAGE plpgsql IMMUTABLE;
+AS $$
+  SELECT @extschema:fuzzystrmatch at .soundex(
+      COALESCE(substring($1, E'[ ,.\n\t\f]([a-zA-Z0-9]*)$'), $1)
+  );
+$$ LANGUAGE sql IMMUTABLE;
diff --git a/extras/tiger_geocoder/normalize/location_extract_place_exact.sql b/extras/tiger_geocoder/normalize/location_extract_place_exact.sql
index 1ab610164..373e1e8de 100644
--- a/extras/tiger_geocoder/normalize/location_extract_place_exact.sql
+++ b/extras/tiger_geocoder/normalize/location_extract_place_exact.sql
@@ -7,54 +7,14 @@ CREATE OR REPLACE FUNCTION location_extract_place_exact(
     fullStreet VARCHAR,
     stateAbbrev VARCHAR
 ) RETURNS VARCHAR
-AS $_$
-DECLARE
-  ws VARCHAR;
-  location VARCHAR;
-  tempInt INTEGER;
-  lstate VARCHAR;
-  rec RECORD;
-BEGIN
-  ws := E'[ ,.\n\f\t]';
-
-  -- Try for an exact match against places
-  IF stateAbbrev IS NOT NULL THEN
-    lstate := statefp FROM state WHERE stusps = stateAbbrev;
-    SELECT INTO tempInt count(*) FROM place
-        WHERE place.statefp = lstate AND fullStreet ILIKE '%' || name || '%'
-        AND texticregexeq(fullStreet, '(?i)' || name || '$');
-  ELSE
-    SELECT INTO tempInt count(*) FROM place
-        WHERE fullStreet ILIKE '%' || name || '%' AND
-        	texticregexeq(fullStreet, '(?i)' || name || '$');
-  END IF;
-
-  IF tempInt > 0 THEN
-    -- Some matches were found.  Look for the last one in the string.
-    IF stateAbbrev IS NOT NULL THEN
-      FOR rec IN SELECT substring(fullStreet, '(?i)('
-          || name || ')$') AS value, name FROM place
-          WHERE place.statefp = lstate AND fullStreet ILIKE '%' || name || '%'
-          AND texticregexeq(fullStreet, '(?i)'
-          || name || '$') ORDER BY length(name) DESC LOOP
-        -- Since the regex is end of string, only the longest (first) result
-        -- is useful.
-        location := rec.value;
-        EXIT;
-      END LOOP;
-    ELSE
-      FOR rec IN SELECT substring(fullStreet, '(?i)('
-          || name || ')$') AS value, name FROM place
-          WHERE fullStreet ILIKE '%' || name || '%' AND texticregexeq(fullStreet, '(?i)'
-          || name || '$') ORDER BY length(name) DESC LOOP
-        -- Since the regex is end of string, only the longest (first) result
-        -- is useful.
-        location := rec.value;
-        EXIT;
-      END LOOP;
-    END IF;
-  END IF;
-
-  RETURN location;
-END;
-$_$ LANGUAGE plpgsql STABLE COST 100;
+AS $$
+  SELECT (
+      SELECT substring(fullStreet, '(?i)(' || name || ')$')
+      FROM place
+      WHERE (stateAbbrev IS NULL OR place.statefp = (SELECT statefp FROM state WHERE stusps = stateAbbrev))
+        AND fullStreet ILIKE '%' || name || '%'
+        AND texticregexeq(fullStreet, '(?i)' || name || '$')
+      ORDER BY length(name) DESC
+      LIMIT 1
+  );
+$$ LANGUAGE sql STABLE COST 100;
diff --git a/extras/tiger_geocoder/pagc_normalize/pagc_normalize_address.sql b/extras/tiger_geocoder/pagc_normalize/pagc_normalize_address.sql
index befce7643..15d0734fd 100644
--- a/extras/tiger_geocoder/pagc_normalize/pagc_normalize_address.sql
+++ b/extras/tiger_geocoder/pagc_normalize/pagc_normalize_address.sql
@@ -4,51 +4,51 @@
 -- zip code, depending on what can be found in the string.
 -- This is a drop in replacement for packaged normalize_address
 -- that uses the pagc address standardizer C library instead
+-- Keep the wrapper in plpgsql so the optional address_standardizer extension
+-- is only resolved when PAGC parsing is invoked, while still using a single
+-- standardize_address() call to preserve the wrapper speedup work.
 -- USAGE: SELECT * FROM tiger.pagc_normalize_address('One Devonshire Place, PH 301, Boston, MA 02109');
 SELECT tiger.SetSearchPathForInstall('tiger');
 CREATE OR REPLACE FUNCTION pagc_normalize_address(in_rawinput character varying)
   RETURNS norm_addy AS
 $$
 DECLARE
-  result norm_addy;
-  var_rec RECORD;
-  var_parse_rec RECORD;
-  rawInput VARCHAR;
-
+  sa RECORD;
+  parsed RECORD;
+  raw_input VARCHAR;
 BEGIN
-  result.parsed := FALSE;
+  raw_input := trim(in_rawinput);
 
-  rawInput := trim(in_rawinput);
-  var_parse_rec := parse_address(rawInput);
+  -- Preserve parse_address()'s macro-only ZIP and country detection for
+  -- non-street inputs while keeping a single standardize_address() call.
+  SELECT *
+    INTO parsed
+  FROM parse_address(raw_input);
 
-  result.location := var_parse_rec.city;
-  result.stateAbbrev := trim(var_parse_rec.state);
-  result.zip := var_parse_rec.zip;
-  result.zip4 := NULLIF(var_parse_rec.zipplus,'');
-  result.country := NULLIF(var_parse_rec.country,'');
+  SELECT *
+    INTO sa
+  FROM standardize_address(
+      'pagc_lex',
+      'pagc_gaz',
+      'pagc_rules',
+      raw_input
+  ) AS sa;
 
- var_rec := standardize_address('pagc_lex'
-       , 'pagc_gaz'
-       , 'pagc_rules'
-, COALESCE(var_parse_rec.address1,''),
-   COALESCE(var_parse_rec.city,'') || COALESCE(', ' || var_parse_rec.state, '') || COALESCE(' ' || var_parse_rec.zip,'')  ) ;
-
- -- For address number only put numbers and stop if reach a non-number e.g. 123-456 will return 123
-  result.address := to_number(substring(var_rec.house_num, '[0-9]+'), '99999999');
-  result.address_alphanumeric := var_rec.house_num;
-   --get rid of extraneous spaces before we return
-  result.zip := COALESCE(var_rec.postcode,result.zip);
-  result.streetName := trim(var_rec.name);
-  result.location := trim(var_rec.city);
-  result.stateAbbrev := trim(var_rec.state);
-  result.country := COALESCE(NULLIF(var_rec.country,''), result.country);
-  --this should be broken out separately like pagc, but normalizer doesn't have a slot for it
-  result.streettypeAbbrev := trim(COALESCE(var_rec.suftype, var_rec.pretype));
-  result.preDirAbbrev := trim(var_rec.predir);
-  result.postDirAbbrev := trim(var_rec.sufdir);
-  result.internal := trim(regexp_replace(replace(var_rec.unit, '#',''), '([0-9]+)\s+([A-Za-z]){0,1}', E'\\1\\2'));
-  result.parsed := TRUE;
-  RETURN result;
+  RETURN ROW(
+      to_number(substring(sa.house_num, '[0-9]+'), '99999999'),
+      NULLIF(trim(sa.predir), ''),
+      NULLIF(trim(sa.name), ''),
+      NULLIF(trim(COALESCE(sa.suftype, sa.pretype)), ''),
+      NULLIF(trim(sa.sufdir), ''),
+      NULLIF(trim(regexp_replace(replace(COALESCE(sa.unit, ''), '#', ''), '([0-9]+)\s+([A-Za-z]){0,1}', E'\\1\\2')), ''),
+      COALESCE(NULLIF(trim(sa.city), ''), NULLIF(trim(parsed.city), '')),
+      COALESCE(NULLIF(trim(sa.state), ''), NULLIF(trim(parsed.state), '')),
+      COALESCE(NULLIF(split_part(COALESCE(sa.postcode, ''), '-', 1), ''), NULLIF(parsed.zip, '')),
+      TRUE,
+      COALESCE(NULLIF(split_part(COALESCE(sa.postcode, ''), '-', 2), ''), NULLIF(parsed.zipplus, '')),
+      NULLIF(sa.house_num, ''),
+      COALESCE(NULLIF(trim(sa.country), ''), NULLIF(trim(parsed.country), ''), 'US')
+  )::norm_addy;
 END
 $$
   LANGUAGE plpgsql IMMUTABLE STRICT
diff --git a/extras/tiger_geocoder/regress/normalize_address_regress b/extras/tiger_geocoder/regress/normalize_address_regress
index 676bb7698..4814836f0 100644
--- a/extras/tiger_geocoder/regress/normalize_address_regress
+++ b/extras/tiger_geocoder/regress/normalize_address_regress
@@ -49,3 +49,12 @@
 #1614a|8435 Co Rd 20 SE, ROCHESTER, MN 55904, US|8435||20|Co Rd|SE||ROCHESTER|MN|55904|t||8435|US
 #1614b|3208 US Hwy 52, Rochester, MN 55901, US|3208||52|US Hwy|||Rochester|MN|55901|t||3208|US
 #1108a|529 Main St, Suite 201, Boston, MA 02129, US|529||Main|St||Suite 201|Boston|MA|02129|t||529|US
+#sqlify1|S363|S363|
+#sqlify2||3|3|1
+#sqlify3|0|
+#sqlify4|
+#sqlify5|0
+#sqlify6|t
+#sqlify7|t
+#sqlify8|t|t
+#sqlify9|t|t
diff --git a/extras/tiger_geocoder/regress/normalize_address_regress.sql b/extras/tiger_geocoder/regress/normalize_address_regress.sql
index 46749542e..1f99fcba2 100644
--- a/extras/tiger_geocoder/regress/normalize_address_regress.sql
+++ b/extras/tiger_geocoder/regress/normalize_address_regress.sql
@@ -102,4 +102,39 @@ SELECT '#1614b' As ticket, pprint_addy(addy), addy.* FROM normalize_address('320
 
 --internal address prefix sometimes get caught in post dir
 SELECT '#1108a' As ticket, pprint_addy(addy), addy.* FROM normalize_address('529 Main Street, Suite 201, Boston, MA 02129') AS addy;
+
+-- direct helper coverage for SQL wrappers
+SELECT '#sqlify1' As ticket, end_soundex('123 Main Street'), end_soundex('Street'), end_soundex(NULL::varchar);
+SELECT '#sqlify2' As ticket, nullable_levenshtein(NULL::varchar, 'foo'), nullable_levenshtein('foo', NULL::varchar), nullable_levenshtein('foo', ''), nullable_levenshtein('foo', 'food');
+SELECT '#sqlify3' As ticket, rate_attributes('N','N','MAIN','MAIN','ST','ST',NULL,NULL,'BOSTON','BOSTON',NULL), rate_attributes('N','N','MAIN','MAIN','ST','ST',NULL,NULL,NULL,'BOSTON',NULL);
+SELECT '#sqlify4' As ticket, location_extract_place_exact('999 Main Street, Boston', 'MA');
+SELECT '#sqlify5' As ticket, count(*) FROM geocode(NULL::varchar, 1) AS g;
+SELECT '#sqlify6' As ticket, strpos(
+  pg_get_functiondef(to_regprocedure((SELECT n.nspname || '.nullable_levenshtein(character varying,character varying)' FROM pg_extension e JOIN pg_namespace n ON n.oid = e.extnamespace WHERE e.extname = 'postgis_tiger_geocoder'))),
+  (SELECT n.nspname || '.levenshtein_ignore_case' FROM pg_extension e JOIN pg_namespace n ON n.oid = e.extnamespace WHERE e.extname = 'postgis_tiger_geocoder')
+) > 0;
+SELECT '#sqlify7' As ticket, CASE
+  WHEN current_setting('server_version_num')::integer >= 160000 THEN strpos(
+    pg_get_functiondef(to_regprocedure((SELECT n.nspname || '.end_soundex(character varying)' FROM pg_extension e JOIN pg_namespace n ON n.oid = e.extnamespace WHERE e.extname = 'postgis_tiger_geocoder'))),
+    (SELECT n.nspname || '.soundex' FROM pg_extension e JOIN pg_namespace n ON n.oid = e.extnamespace WHERE e.extname = 'fuzzystrmatch')
+  ) > 0
+  ELSE strpos(
+    pg_get_functiondef(to_regprocedure((SELECT n.nspname || '.end_soundex(character varying)' FROM pg_extension e JOIN pg_namespace n ON n.oid = e.extnamespace WHERE e.extname = 'postgis_tiger_geocoder'))),
+    'soundex('
+  ) > 0
+END;
+SELECT '#sqlify8' As ticket, strpos(
+  pg_get_functiondef(to_regprocedure((SELECT n.nspname || '.geocode(character varying,integer,geometry)' FROM pg_extension e JOIN pg_namespace n ON n.oid = e.extnamespace WHERE e.extname = 'postgis_tiger_geocoder'))),
+  (SELECT n.nspname || '.normalize_address' FROM pg_extension e JOIN pg_namespace n ON n.oid = e.extnamespace WHERE e.extname = 'postgis_tiger_geocoder')
+) > 0, strpos(
+  pg_get_functiondef(to_regprocedure((SELECT n.nspname || '.geocode(character varying,integer,geometry)' FROM pg_extension e JOIN pg_namespace n ON n.oid = e.extnamespace WHERE e.extname = 'postgis_tiger_geocoder'))),
+  (SELECT n.nspname || '.geocode(addy, max_results, restrict_geom)' FROM pg_extension e JOIN pg_namespace n ON n.oid = e.extnamespace WHERE e.extname = 'postgis_tiger_geocoder')
+) > 0;
+SELECT '#sqlify9' As ticket, strpos(
+  pg_get_functiondef(to_regprocedure((SELECT n.nspname || '.rate_attributes(character varying,character varying,character varying,character varying,character varying,character varying,character varying,character varying,character varying,character varying,character varying)' FROM pg_extension e JOIN pg_namespace n ON n.oid = e.extnamespace WHERE e.extname = 'postgis_tiger_geocoder'))),
+  (SELECT n.nspname || '.levenshtein_ignore_case' FROM pg_extension e JOIN pg_namespace n ON n.oid = e.extnamespace WHERE e.extname = 'postgis_tiger_geocoder')
+) > 0, strpos(
+  pg_get_functiondef(to_regprocedure((SELECT n.nspname || '.rate_attributes(character varying,character varying,character varying,character varying,character varying,character varying,character varying,character varying,character varying,character varying,character varying)' FROM pg_extension e JOIN pg_namespace n ON n.oid = e.extnamespace WHERE e.extname = 'postgis_tiger_geocoder'))),
+  (SELECT n.nspname || '.rate_attributes(' FROM pg_extension e JOIN pg_namespace n ON n.oid = e.extnamespace WHERE e.extname = 'postgis_tiger_geocoder')
+) > 0;
 --\timing
diff --git a/extras/tiger_geocoder/regress/pagc_normalize_address_regress b/extras/tiger_geocoder/regress/pagc_normalize_address_regress
index 9ea5fefd6..507db6bfe 100644
--- a/extras/tiger_geocoder/regress/pagc_normalize_address_regress
+++ b/extras/tiger_geocoder/regress/pagc_normalize_address_regress
@@ -42,9 +42,9 @@ true
 #1125a|19596 CO RD 480, COLCORD, OK 74338, US
 #1125b|4345 353 RTE, SALAMANCA, NY 14779, US|4345||353|RTE|||SALAMANCA|NY|14779|t||4345|US
 #1125c|19799 STATE RTE 0, COSBY, MO 64436, US|19799||0|STATE RTE|||COSBY|MO|64436|t||19799|US
-#1125d|||||||||||t|||US
-#1125e|||||||||||t|||US
-#1125f|||||||||||t|||US
+#1125d|Boston, MA, US|||||||Boston|MA||t|||US
+#1125e|Boston, MA, US|||||||Boston|MA||t|||US
+#1125f|Boston, MA, US|||||||Boston|MA||t|||US
 #1310a|1110 W CAPITOL AVE, WEST SACRAMENTO, CA, US|1110|W|CAPITOL|AVE|||WEST SACRAMENTO|CA||t||1110|US
 #1614a|8435 CO RD 20 SE, ROCHESTER, MN 55904, US|8435||20|CO RD|SE||ROCHESTER|MN|55904|t||8435|US
 #1614b|3208 US RTE 52, ROCHESTER, MN 55901, US|3208||52|US RTE|||ROCHESTER|MN|55901|t||3208|US
@@ -55,4 +55,8 @@ true
 #2978a|10||DORRANCE|ST|||PROVIDENCE|RI||t||10 20|US
 #2978b|10||DORRANCE|ST|||PROVIDENCE|RI||t||10 20|US
 #2978c|10||DORRANCE|ST|||PROVIDENCE|RI||t||10 20|US
+#2459|1000|S|FREMONT|AVE|||ALHAMBRA|CA|91803|t||1000|US
+#2459zipa|||||||||02109|t|||US
+#2459zipb|||||||||02109|t|1234||US
+#2459zipc|||||||Minneapolis|MN|55401|t|||US
 false
diff --git a/extras/tiger_geocoder/regress/pagc_normalize_address_regress.sql b/extras/tiger_geocoder/regress/pagc_normalize_address_regress.sql
index 783732fce..cf2a11b07 100644
--- a/extras/tiger_geocoder/regress/pagc_normalize_address_regress.sql
+++ b/extras/tiger_geocoder/regress/pagc_normalize_address_regress.sql
@@ -112,6 +112,10 @@ SELECT '#2981' As ticket, * FROM normalize_address('1566 NEW STATE HWY, RAYNHAM,
 SELECT '#2978a' As ticket, * FROM normalize_address('10-20 DORRANCE ST PROVIDENCE RI' ) ;
 SELECT '#2978b' As ticket, * FROM normalize_address('10 20 DORRANCE ST PROVIDENCE RI' ) ;
 SELECT '#2978c' As ticket, * FROM normalize_address('10-20 DORRANCE ST, PROVIDENCE. RI' ) ;
+SELECT '#2459' As ticket, * FROM normalize_address(E'1000 S. Fremont Ave.,\nBldg. A-11,\nAlhambra, CA 91803');
+SELECT '#2459zipa' As ticket, * FROM normalize_address('02109');
+SELECT '#2459zipb' As ticket, * FROM normalize_address('02109-1234');
+SELECT '#2459zipc' As ticket, * FROM normalize_address('Minneapolis, MN 55401');
 
 --\timing
 SELECT set_geocode_setting('use_pagc_address_parser', 'false');
diff --git a/extras/tiger_geocoder/upgrade_geocode.sql b/extras/tiger_geocoder/upgrade_geocode.sql
index c98decd9f..775f18a1b 100644
--- a/extras/tiger_geocoder/upgrade_geocode.sql
+++ b/extras/tiger_geocoder/upgrade_geocode.sql
@@ -286,8 +286,8 @@ SELECT create_census_base_tables();
 -- System/General helper functions
 \i utility/utmzone.sql
 \i utility/cull_null.sql
-\i utility/nullable_levenshtein.sql
 \i utility/levenshtein_ignore_case.sql
+\i utility/nullable_levenshtein.sql
 
 ---- Address normalizer
 -- General helpers
diff --git a/extras/tiger_geocoder/utility/nullable_levenshtein.sql b/extras/tiger_geocoder/utility/nullable_levenshtein.sql
index e0ee3beca..bbeff98b1 100644
--- a/extras/tiger_geocoder/utility/nullable_levenshtein.sql
+++ b/extras/tiger_geocoder/utility/nullable_levenshtein.sql
@@ -4,25 +4,10 @@
 -- 3, otherwise it is the levenshtein difference between the two.
 -- Change 2010-10-18 Regina Obe - name verbose to var_verbose since get compile error in PostgreSQL 9.0
 CREATE OR REPLACE FUNCTION nullable_levenshtein(VARCHAR, VARCHAR) RETURNS INTEGER
-AS $_$
-DECLARE
-  given_string VARCHAR;
-  result INTEGER := 3;
-  var_verbose BOOLEAN := FALSE; /**change from verbose to param_verbose since its a keyword and get compile error in 9.0 **/
-BEGIN
-  IF $1 IS NULL THEN
-    IF var_verbose THEN
-      RAISE NOTICE 'nullable_levenshtein - given string is NULL!';
-    END IF;
-    RETURN NULL;
-  ELSE
-    given_string := $1;
-  END IF;
-
-  IF $2 IS NOT NULL AND $2 != '' THEN
-    result := @extschema:fuzzystrmatch at .levenshtein_ignore_case_ignore_case(given_string, $2);
-  END IF;
-
-  RETURN result;
-END
-$_$ LANGUAGE plpgsql IMMUTABLE COST 10;
+AS $$
+  SELECT CASE
+      WHEN $1 IS NULL THEN NULL
+      WHEN $2 IS NOT NULL AND $2 != '' THEN @extschema at .levenshtein_ignore_case($1, $2)
+      ELSE 3
+  END;
+$$ LANGUAGE sql IMMUTABLE COST 10;

-----------------------------------------------------------------------

Summary of changes:
 NEWS                                               |  9 ++-
 doc/extras_tigergeocoder.xml                       |  4 +-
 .../address_standardizer/address_standardizer.c    | 36 ++++++++++-
 .../expected/test-standardize_address_1.out        | 12 ++++
 extensions/address_standardizer/standard.c         | 53 +++++++++++-----
 .../test-standardize_address_1.sql.in              |  2 +
 extensions/postgis_tiger_geocoder/Makefile.in      |  6 +-
 extras/tiger_geocoder/create_geocode.sql           |  2 +-
 extras/tiger_geocoder/geocode/geocode.sql          | 68 +++++++++------------
 extras/tiger_geocoder/geocode/rate_attributes.sql  | 45 ++++++--------
 extras/tiger_geocoder/normalize/end_soundex.sql    | 18 ++----
 .../normalize/location_extract_place_exact.sql     | 62 ++++---------------
 .../pagc_normalize/pagc_normalize_address.sql      | 70 +++++++++++-----------
 .../regress/normalize_address_regress              |  9 +++
 .../regress/normalize_address_regress.sql          | 35 +++++++++++
 .../regress/pagc_normalize_address_regress         | 10 +++-
 .../regress/pagc_normalize_address_regress.sql     |  4 ++
 extras/tiger_geocoder/upgrade_geocode.sql          |  2 +-
 .../utility/nullable_levenshtein.sql               | 29 +++------
 19 files changed, 259 insertions(+), 217 deletions(-)


hooks/post-receive
-- 
PostGIS


More information about the postgis-tickets mailing list