[postgis-tickets] [SCM] PostGIS branch master updated. 3.3.0rc2-1003-gd9016782e

git at osgeo.org git at osgeo.org
Sun Jun 25 15:12:49 PDT 2023


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "PostGIS".

The branch, master has been updated
       via  d9016782e1de2344d9dde9fefccd98be49212e8d (commit)
      from  9f61faf5c2a684f47141a4485fbfac07be4fb090 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit d9016782e1de2344d9dde9fefccd98be49212e8d
Author: Regina Obe <lr at pcorp.us>
Date:   Sun Jun 25 15:53:02 2023 -0400

    debug_standardize_address
    
     - Add rule_string and rule_id, by looking up from rule table
     - If no matching rule found (shouldn't happen but it does)
       return -1 for rule_id
     - Also return rule_stub_string which is the filter used to
       search for the rule in rule table
     - Add test to output rule_string/rule_stub_string
    
    Closes #5397

diff --git a/extensions/address_standardizer/address_standardizer.c b/extensions/address_standardizer/address_standardizer.c
index a0a78f59f..050aa95a8 100644
--- a/extensions/address_standardizer/address_standardizer.c
+++ b/extensions/address_standardizer/address_standardizer.c
@@ -2,6 +2,8 @@
 #include "fmgr.h"
 #include "funcapi.h"
 #include "catalog/pg_type.h"
+#include "utils/memutils.h"
+#include "executor/spi.h"
 #include "utils/builtins.h"
 
 #undef DEBUG
@@ -67,7 +69,7 @@ debug_standardize_address(PG_FUNCTION_ARGS)
 	int k;
     char rule_in[100];
 	char rule_out[100];
-	char temp[10];
+	char temp[100];
     int stz_no , n ;
 	DEF *__def__ ;
     STZ **__stz_list__;
@@ -76,6 +78,16 @@ debug_standardize_address(PG_FUNCTION_ARGS)
 	int lex_pos;
 	int started;
 	STZ *__cur_stz__;
+	/**start: variables for filtering rules **/
+	StringInfo	sql;
+	SPIPlanPtr plan = NULL;
+	Datum datrul;
+	bool isnull;
+	Datum values[1];
+  	Oid argtypes[1];
+	int spi_result;
+	int spi_conn_ret;
+	/**stop: variables for filtering rules **/
 	StringInfo	result  = makeStringInfo();
 
 	elog(DEBUG2, "Start %s", __func__);
@@ -166,6 +178,7 @@ debug_standardize_address(PG_FUNCTION_ARGS)
 	elog(DEBUG2, "Input tokenization candidates:\n");
 	appendStringInfoString(result, "\"input_tokens\":[");
 	started = 0;
+
 	for (lex_pos = FIRST_LEX_POS;lex_pos < ms->LexNum;lex_pos ++)
 	{
 
@@ -194,6 +207,27 @@ debug_standardize_address(PG_FUNCTION_ARGS)
 	started = 0;
 
 	appendStringInfoString(result, ", \"rules\":[");
+	sql = makeStringInfo();
+	appendStringInfo(sql, "SELECT id, rule FROM %s ", quote_identifier(rultab));
+	appendStringInfoString(sql, "WHERE rule LIKE $1::varchar");
+	argtypes[0] = CSTRINGOID;
+
+	spi_conn_ret = 1;
+	spi_conn_ret = SPI_connect();
+	if (spi_conn_ret != SPI_OK_CONNECT) {
+		elog(ERROR, "%s: Could not connect to the SPI manager", __func__);
+		return -1;
+	};
+	plan = SPI_prepare(sql->data, 1, argtypes);
+	//plan = SPI_prepare("SELECT id, rule FROM us_rules WHERE rule LIKE '1234%'", 0, NULL);
+	if ( ! plan )
+    {
+      elog(ERROR, "%s: unexpected return (%d) from query preparation: %s, (%d)",
+              __func__, SPI_result, sql->data, SPI_ERROR_UNCONNECTED);
+      return -1;
+    }
+	SPI_keepplan(plan);
+
 	for ( stz_no = 0 ; stz_no < n ; stz_no ++ )
 	{
 		if  (stz_no > 0 ){
@@ -239,20 +273,66 @@ debug_standardize_address(PG_FUNCTION_ARGS)
 			if ( k2 == FAIL ) break ;
 		}
 
-		appendStringInfo(result, "], \"rule_string\":\"%s",rule_in);
-		appendStringInfoString(result, " -1 ");
-		appendStringInfo(result, "%s",rule_out);
+		appendStringInfoString(result, "]");
+		/* get the sql for the matching rule records */
+		strcpy(temp, "");
+		strcat(temp, rule_in);
+		strcat(temp, " -1 ");
+		strcat(temp, rule_out);
+		strcat(temp, " -1 %");
+		 /* execute */
+ 		values[0] = CStringGetDatum(temp);
+  		spi_result = SPI_execute_plan(plan, values, NULL, true, 1);
+
+		//MemoryContextSwitchTo( oldcontext ); /* switch back */
+		if ( spi_result != SPI_OK_SELECT )
+		{
+			elog(ERROR, "%s: unexpected return (%d) from query execution: %s", __func__, spi_result, sql->data);
+			return -1;
+		}
+		else {
+
+			elog(DEBUG2, "%s: query success, sql: %s, parameter: %s", __func__, sql->data, temp);
+		}
+
+		appendStringInfo(result, ", \"rule_stub_string\": %s", quote_identifier(temp));
+
+		if (SPI_processed > 0 && SPI_tuptable != NULL) {
+			elog(DEBUG2, "%s: Processing results for: %s, rule: %s", __func__, sql->data, temp );
+			datrul = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull);
+			if ( isnull )
+			{
+				elog(NOTICE, "%s: No match %s",  __func__, sql->data);
+				SPI_freetuptable(SPI_tuptable);
+				return -1;
+			}
+
+
+			appendStringInfo(result, ", \"rule_id\": %d",DatumGetInt32(datrul));
+			datrul = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull);
+			//char *srule = TextDatumGetCString(datrul);
+			appendStringInfo(result, ", \"rule_string\": %s", quote_identifier(TextDatumGetCString(datrul)));
+			//appendStringInfoString(result, " -1 ");
+			//appendStringInfo(result, "%s",rule_out);
+			SPI_freetuptable(SPI_tuptable);
+		}
+		else {
+			/** TODO: Figure out why this happens a lot **/
+			elog(DEBUG2, "%s: no match found for, sql: %s, parameter: %s", __func__, sql->data, temp);
+			appendStringInfoString(result, ", \"rule_id\": -1");
+		}
 
 		/**
-		 * TODO:  Figure out how to add the type and weight rule type
+		 * TODO:  Add type and weight of rule (parsed from rulestring)
 		 * **/
 		//appendStringInfo(result, " %d", ruleref->Type);
 		//elog(DEBUG2, "Rule  type  %d",  ruleref->Type);
 		/** rule weight **/
 		//appendStringInfo(result, " %d", ruleref->Weight);
 
-		appendStringInfoString(result, "\"}");
+		appendStringInfoString(result, "}");
 	}
+	SPI_finish();
 	appendStringInfoChar(result, ']');
 	elog(DEBUG2, "%s: setup values json", __func__);
 	appendStringInfoString(result, ",\"stdaddr\": {");
diff --git a/extensions/address_standardizer/expected/test-debug_standardize_address.out b/extensions/address_standardizer/expected/test-debug_standardize_address.out
index 31f312258..c4d62e004 100644
--- a/extensions/address_standardizer/expected/test-debug_standardize_address.out
+++ b/extensions/address_standardizer/expected/test-debug_standardize_address.out
@@ -18,10 +18,12 @@ SELECT '#5299a' AS ticket, jsonb_array_length(d->'rules') AS num_rules, jsonb_ar
  #5299a |         5 |                4 | 0.902500
 (1 row)
 
-SELECT '#2978c' As ticket, , jsonb_array_length(d->'rules') AS num_rules, jsonb_array_length(d->'input_tokens') AS num_input_tokens,  d->'rules'->0->'raw_score' AS best_score FROM jsonb(debug_standardize_address('us_lex','us_gaz','us_rules', '10-20 DORRANCE ST, PROVIDENCE, RI')) AS d;
-ERROR:  syntax error at or near ","
-LINE 1: SELECT '#2978c' As ticket, , jsonb_array_length(d->'rules') ...
-                                   ^
+SELECT '#2978c' As ticket, jsonb_array_length(d->'rules') AS num_rules, jsonb_array_length(d->'input_tokens') AS num_input_tokens,  d->'rules'->0->'raw_score' AS best_score FROM jsonb(debug_standardize_address('us_lex','us_gaz','us_rules', '10-20 DORRANCE ST, PROVIDENCE, RI')) AS d;
+ ticket | num_rules | num_input_tokens | best_score
+--------+-----------+------------------+------------
+ #2978c |         6 |                5 | 0.751875
+(1 row)
+
 SELECT '#5299b' AS ticket, jsonb_array_length(d->'rules') AS num_rules, jsonb_array_length(d->'input_tokens') AS num_input_tokens, d->'rules'->0->'score' AS best_score FROM jsonb(debug_standardize_address('us_lex',  'us_gaz', 'us_rules','50 Gold Piece Drive, Boston, MA, 02020')) AS d;
  ticket | num_rules | num_input_tokens | best_score
 --------+-----------+------------------+------------
@@ -37,3 +39,11 @@ WHERE it @> '{"pos": 2}'::jsonb;
  #5299bt | 2   | PIECE | WORD  | PIECE
 (3 rows)
 
+SELECT r->>'score' AS score,r->>'rule_string' AS rule, r->>'rule_stub_string' AS rule_stub  FROM jsonb(debug_standardize_address('us_lex','us_gaz','us_rules', '25 Prince Street, NC 09985')) AS d, jsonb_array_elements(d->'rules') AS r;
+  score   |          rule          |      rule_stub
+----------+------------------------+---------------------
+ 0.902500 | 0 1 2 -1 1 5 6 -1 1 16 | 0 1 2 -1 1 5 6 -1 %
+ 0.791875 | 0 1 2 -1 1 5 6 -1 1 16 | 0 1 2 -1 1 5 6 -1 %
+ 0.661250 |                        | 0 1 2 -1 1 5 5 -1 %
+(3 rows)
+
diff --git a/extensions/address_standardizer/test-debug_standardize_address.sql.in b/extensions/address_standardizer/test-debug_standardize_address.sql.in
index fc8f7fcbb..8d3e77b5e 100644
--- a/extensions/address_standardizer/test-debug_standardize_address.sql.in
+++ b/extensions/address_standardizer/test-debug_standardize_address.sql.in
@@ -3,9 +3,11 @@ SELECT jsonb_array_length(jsonb(d)->'rules') FROM debug_standardize_address('us_
 
 SELECT '#5299a' AS ticket, jsonb_array_length(d->'rules') AS num_rules, jsonb_array_length(d->'input_tokens') AS num_input_tokens,  d->'rules'->0->'raw_score' AS best_score FROM jsonb(debug_standardize_address('us_lex',  'us_gaz', 'us_rules','1 Timepiece Point','Boston, MA, 02220')) As d;
 
-SELECT '#2978c' As ticket, , jsonb_array_length(d->'rules') AS num_rules, jsonb_array_length(d->'input_tokens') AS num_input_tokens,  d->'rules'->0->'raw_score' AS best_score FROM jsonb(debug_standardize_address('us_lex','us_gaz','us_rules', '10-20 DORRANCE ST, PROVIDENCE, RI')) AS d;
+SELECT '#2978c' As ticket, jsonb_array_length(d->'rules') AS num_rules, jsonb_array_length(d->'input_tokens') AS num_input_tokens,  d->'rules'->0->'raw_score' AS best_score FROM jsonb(debug_standardize_address('us_lex','us_gaz','us_rules', '10-20 DORRANCE ST, PROVIDENCE, RI')) AS d;
 
 SELECT '#5299b' AS ticket, jsonb_array_length(d->'rules') AS num_rules, jsonb_array_length(d->'input_tokens') AS num_input_tokens, d->'rules'->0->'score' AS best_score FROM jsonb(debug_standardize_address('us_lex',  'us_gaz', 'us_rules','50 Gold Piece Drive, Boston, MA, 02020')) AS d;
 
 SELECT '#5299bt' AS ticket, it->>'pos' AS pos, it->>'word' AS word,  it->>'token' AS token, it->>'stdword' AS stdword FROM jsonb(debug_standardize_address('us_lex',  'us_gaz', 'us_rules','50 Gold Piece Drive, Boston, MA, 02020')) AS d, jsonb_array_elements(d->'input_tokens') AS it
 WHERE it @> '{"pos": 2}'::jsonb;
+
+SELECT r->>'score' AS score,r->>'rule_string' AS rule, r->>'rule_stub_string' AS rule_stub  FROM jsonb(debug_standardize_address('us_lex','us_gaz','us_rules', '25 Prince Street, NC 09985')) AS d, jsonb_array_elements(d->'rules') AS r;

-----------------------------------------------------------------------

Summary of changes:
 .../address_standardizer/address_standardizer.c    | 92 ++++++++++++++++++++--
 .../expected/test-debug_standardize_address.out    | 18 ++++-
 .../test-debug_standardize_address.sql.in          |  4 +-
 3 files changed, 103 insertions(+), 11 deletions(-)


hooks/post-receive
-- 
PostGIS


More information about the postgis-tickets mailing list