- Log -----------------------------------------------------------------
commit 58cd6af22685f3c250d7c7f7bf458e528dae58ca
Author: Regina Obe <lr at pcorp.us>
Date:   Mon Jun 12 10:24:48 2023 -0400

    References #5397
    - Get rid of unused variable and declaration warnings
    - Revise documentation to show how you can expand the json document

diff --git a/doc/extras_address_standardizer.xml b/doc/extras_address_standardizer.xml
index 9104eac04..6e638e97e 100644
--- a/doc/extras_address_standardizer.xml
+++ b/doc/extras_address_standardizer.xml
@@ -594,6 +594,29 @@ into includes in the future for easier maintenance.</para></listitem>
             <para>For single line addresses use just <varname>micro</varname></para>
             <para>For two line address A <varname>micro</varname> consisting of standard first line of postal address e.g. <code>house_num street</code>, and a macro consisting of standard postal second line of an address e.g <code>city, state postal_code country</code>.</para>
+            <para>Elements returned in the json document are </para>
+            <variablelist>
+						<varlistentry>
+                            <term>input_tokens</term>
+                            <listitem>
+                                <para>For each word in the input address, returns the position of the word,
+                                    token categorization of the word, and the standard word it is mapped too.
+                                    Note that for some input words, you might get back multiple records because some inputs can be categorized
+                                    as more than one thing. </para>
+                            </listitem>
+						</varlistentry>
+						<varlistentry><term>rules</term>
+							<listitem>
+								<para>The set of rules matching the input and the corresponding score for each. The first rule (highest scoring) is
+                                what is used for standardization</para>
+							</listitem>
+						</varlistentry>
+						<varlistentry><term>stdaddr</term>
+							<listitem>
+								<para>The standardized address elements that would be returned when running <xref id="standardize_address" /></para>
+							</listitem>
+						</varlistentry>
+				</variablelist>
             <!-- use this format if new function -->
         <para>Availability: 3.4.0</para>
@@ -605,82 +628,41 @@ into includes in the future for easier maintenance.</para></listitem>
             <para>Using address_standardizer_data_us extension</para>
             <programlisting>CREATE EXTENSION address_standardizer_data_us; -- only needs to be done once</programlisting>
-        <para>Variant 1: Single line address.  This doesn't work well with non-US addresses</para>
-        <programlisting>SELECT s::jsonb
-            FROM debug_standardize_address('us_lex',
-                'us_gaz', 'us_rules', 'One Devonshire Place, PH 301, Boston, MA 02109') AS s;</programlisting>
-            <screen><![CDATA[{
-  "macro": "Boston,MA,02109,US,",
-  "micro": "One Devonshire Place, PH 301",
-  "rules": [
-    {
-      "no": 0,
-      "score": 0.87625,
-      "raw_score": 0.87625,
-      "rule_string": "0 1 2 17 0 -1 1 5 6 17 17",
-      "rule_tokens": [
-        {
-          "pos": 0,
-          "input-word": "ONE",
-          "input-token": "NUMBER",
-          "mapped-word": "1",
-          "output-token": "HOUSE",
-          "input-token-code": 0,
-          "output-token-code": 1
-        },
-        {
-          "pos": 1,
-          "input-word": "DEVONSHIRE",
-          "input-token": "WORD",
-          "mapped-word": "DEVONSHIRE",
-          "output-token": "STREET",
-          "input-token-code": 1,
-          "output-token-code": 5
-        },
-        :
-      ]
-    },
-        :
-      ]
-    }
-  ],
-  "stdaddr": {
-    "box": null,
-    "city": "BOSTON",
-    "name": "DEVONSHIRE",
-    "qual": null,
-    "unit": "# PENTHOUSE 301",
-    "extra": null,
-    "state": "MASSACHUSETTS",
-    "predir": null,
-    "sufdir": null,
-    "country": "USA",
-    "pretype": null,
-    "suftype": "PLACE",
-    "building": null,
-    "postcode": "02109",
-    "house_num": "1",
-    "ruralroute": null
-  },
-  "input_tokens": [
-    {
-      "pos": 0,
-      "word": "ONE",
-      "token": "NUMBER",
-      "stdword": "1",
-      "token-code": 0
-    },
-    {
-      "pos": 0,
-      "word": "ONE",
-      "token": "WORD",
-      "stdword": "1",
-      "token-code": 1
-    },
-	:
-  ]
+        <para>Variant 1: Single line address and returning the input tokens</para>
+        <programlisting><![CDATA[SELECT it->>'pos' AS position, it->>'word' AS word, it->>'stdword' AS standardized_word,
+            it->>'token' AS token, it->>'token-code' AS token_code
+    FROM jsonb(
+            debug_standardize_address('us_lex',
+                'us_gaz', 'us_rules', 'One Devonshire Place, PH 301, Boston, MA 02109')
+                 ) AS s, jsonb_array_elements(s->'input_tokens') AS it;]]></programlisting>
+            <screen>position |    word    | standardized_word | token  | token_code
+ 0        | ONE        | 1                 | NUMBER | 0
+ 0        | ONE        | 1                 | WORD   | 1
+ 1        | DEVONSHIRE | DEVONSHIRE        | WORD   | 1
+ 2        | PLACE      | PLACE             | TYPE   | 2
+ 3        | PH         | PATH              | TYPE   | 2
+ 3        | PH         | PENTHOUSE         | UNITT  | 17
+ 4        | 301        | 301               | NUMBER | 0
+(7 rows)</screen>
+        <para>Variant 2: Multi line address and returning first rule input mappings and score</para>
+        <programlisting><![CDATA[SELECT (s->'rules'->0->>'score')::numeric AS score, it->>'pos' AS position,
+        it->>'input-word' AS word, it->>'input-token' AS input_token, it->>'mapped-word' AS standardized_word,
+            it->>'output-token' AS output_token
+    FROM jsonb(
+            debug_standardize_address('us_lex',
+                'us_gaz', 'us_rules', 'One Devonshire Place, PH 301', 'Boston, MA 02109')
+                 ) AS s, jsonb_array_elements(s->'rules'->0->'rule_tokens') AS it;]]></programlisting>
+            <screen> score   | position |    word    | input_token | standardized_word | output_token
+ 0.876250 | 0        | ONE        | NUMBER      | 1                 | HOUSE
+ 0.876250 | 1        | DEVONSHIRE | WORD        | DEVONSHIRE        | STREET
+ 0.876250 | 2        | PLACE      | TYPE        | PLACE             | SUFTYP
+ 0.876250 | 3        | PH         | UNITT       | PENTHOUSE         | UNITT
+ 0.876250 | 4        | 301        | NUMBER      | 301               | UNITT
+(5 rows)
diff --git a/extensions/address_standardizer/address_standardizer.c b/extensions/address_standardizer/address_standardizer.c
index b4375dfdf..2a13d6406 100644
--- a/extensions/address_standardizer/address_standardizer.c
+++ b/extensions/address_standardizer/address_standardizer.c
@@ -63,15 +63,19 @@ debug_standardize_address(PG_FUNCTION_ARGS)
     char                *rultab;
     char                *micro;
     char                *macro;
-    //Datum                result;
     STDADDR             *stdaddr;
-    char               **values;
 	int k;
     char rule_in[100];
 	char rule_out[100];
 	char temp[10];
     int stz_no , n ;
-	//SEG *rseg;
+	DEF *__def__ ;
+    STZ **__stz_list__;
+	STZ_PARAM *__stz_info__ ;
+	int lex_pos;
+	int started;
+	STZ *__cur_stz__;
 	StringInfo	result  = makeStringInfo();
 	elog(DEBUG2, "Start %s", __func__);
@@ -90,8 +94,8 @@ debug_standardize_address(PG_FUNCTION_ARGS)
 	else {
 		ADDRESS             *paddr;
 		HHash               *stH;
-		stH = (HHash *) palloc0(sizeof(HHash));
 		int                  err;
+		stH = (HHash *) palloc0(sizeof(HHash));
 		if (!stH) {
 			elog(ERROR, "%s: Failed to allocate memory for hash!", __func__);
 			return -1;
@@ -151,21 +155,16 @@ debug_standardize_address(PG_FUNCTION_ARGS)
     if (!std)
         elog(ERROR, "%s failed to create the address standardizer object!",  __func__);
-	//output_rule_statistics( std->pagc_p->rules, std->err_p );
-	STAND_PARAM *ms = std->misc_stand;
 	elog(DEBUG2, "%s: calling std_standardize_mm('%s', '%s')", __func__ , micro, macro);
     stdaddr = std_standardize_mm( std, micro, macro, 0 );
-	elog(DEBUG2, "%s back from fetch_stdaddr",  __func__);
-	DEF *__def__ ;
-    STZ **__stz_list__;
-	STZ_PARAM *__stz_info__ = ms->stz_info ;
+	ms = std->misc_stand;
+	__stz_info__ = ms->stz_info ;
+	elog(DEBUG2, "%s back from fetch_stdaddr",  __func__);
-	int lex_pos;
 	elog(DEBUG2, "Input tokenization candidates:\n");
 	appendStringInfoString(result, "\"input_tokens\":[");
-	int started = 0;
+	started = 0;
 	for (lex_pos = FIRST_LEX_POS;lex_pos < ms->LexNum;lex_pos ++)
@@ -202,9 +201,7 @@ debug_standardize_address(PG_FUNCTION_ARGS)
 		strcpy(rule_in, "");
 		strcpy(rule_out, "");
-		STZ *__cur_stz__ = __stz_list__[stz_no] ;
-		KW *ruleref = __cur_stz__->build_key;
+		__cur_stz__ = __stz_list__[stz_no] ;
 		elog( DEBUG2, "Raw standardization %d with score %f:\n" , ( stz_no  ) , __cur_stz__->score ) ;
 		appendStringInfo(result, "{\"score\": %f,", __cur_stz__->score);
 		appendStringInfo(result, "\"raw_score\": %f,", __cur_stz__->raw_score);


Summary of changes:
 doc/extras_address_standardizer.xml                | 134 +++++++++------------
 .../address_standardizer/address_standardizer.c    |  29 ++---
 2 files changed, 71 insertions(+), 92 deletions(-)


