[postgis-tickets] [SCM] PostGIS branch master updated. 3.3.0rc2-1003-gd9016782e
git at osgeo.org
git at osgeo.org
Sun Jun 25 15:12:49 PDT 2023
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "PostGIS".
The branch, master has been updated
via d9016782e1de2344d9dde9fefccd98be49212e8d (commit)
from 9f61faf5c2a684f47141a4485fbfac07be4fb090 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit d9016782e1de2344d9dde9fefccd98be49212e8d
Author: Regina Obe <lr at pcorp.us>
Date: Sun Jun 25 15:53:02 2023 -0400
debug_standardize_address
- Add rule_string and rule_id, by looking up from rule table
- If no matching rule found (shouldn't happen but it does)
return -1 for rule_id
- Also return rule_stub_string which is the filter used to
search for the rule in rule table
- Add test to output rule_string/rule_stub_string
Closes #5397
diff --git a/extensions/address_standardizer/address_standardizer.c b/extensions/address_standardizer/address_standardizer.c
index a0a78f59f..050aa95a8 100644
--- a/extensions/address_standardizer/address_standardizer.c
+++ b/extensions/address_standardizer/address_standardizer.c
@@ -2,6 +2,8 @@
#include "fmgr.h"
#include "funcapi.h"
#include "catalog/pg_type.h"
+#include "utils/memutils.h"
+#include "executor/spi.h"
#include "utils/builtins.h"
#undef DEBUG
@@ -67,7 +69,7 @@ debug_standardize_address(PG_FUNCTION_ARGS)
int k;
char rule_in[100];
char rule_out[100];
- char temp[10];
+ char temp[100];
int stz_no , n ;
DEF *__def__ ;
STZ **__stz_list__;
@@ -76,6 +78,16 @@ debug_standardize_address(PG_FUNCTION_ARGS)
int lex_pos;
int started;
STZ *__cur_stz__;
+ /**start: variables for filtering rules **/
+ StringInfo sql;
+ SPIPlanPtr plan = NULL;
+ Datum datrul;
+ bool isnull;
+ Datum values[1];
+ Oid argtypes[1];
+ int spi_result;
+ int spi_conn_ret;
+ /**stop: variables for filtering rules **/
StringInfo result = makeStringInfo();
elog(DEBUG2, "Start %s", __func__);
@@ -166,6 +178,7 @@ debug_standardize_address(PG_FUNCTION_ARGS)
elog(DEBUG2, "Input tokenization candidates:\n");
appendStringInfoString(result, "\"input_tokens\":[");
started = 0;
+
for (lex_pos = FIRST_LEX_POS;lex_pos < ms->LexNum;lex_pos ++)
{
@@ -194,6 +207,27 @@ debug_standardize_address(PG_FUNCTION_ARGS)
started = 0;
appendStringInfoString(result, ", \"rules\":[");
+ sql = makeStringInfo();
+ appendStringInfo(sql, "SELECT id, rule FROM %s ", quote_identifier(rultab));
+ appendStringInfoString(sql, "WHERE rule LIKE $1::varchar");
+ argtypes[0] = CSTRINGOID;
+
+ spi_conn_ret = 1;
+ spi_conn_ret = SPI_connect();
+ if (spi_conn_ret != SPI_OK_CONNECT) {
+ elog(ERROR, "%s: Could not connect to the SPI manager", __func__);
+ return -1;
+ };
+ plan = SPI_prepare(sql->data, 1, argtypes);
+ //plan = SPI_prepare("SELECT id, rule FROM us_rules WHERE rule LIKE '1234%'", 0, NULL);
+ if ( ! plan )
+ {
+ elog(ERROR, "%s: unexpected return (%d) from query preparation: %s, (%d)",
+ __func__, SPI_result, sql->data, SPI_ERROR_UNCONNECTED);
+ return -1;
+ }
+ SPI_keepplan(plan);
+
for ( stz_no = 0 ; stz_no < n ; stz_no ++ )
{
if (stz_no > 0 ){
@@ -239,20 +273,66 @@ debug_standardize_address(PG_FUNCTION_ARGS)
if ( k2 == FAIL ) break ;
}
- appendStringInfo(result, "], \"rule_string\":\"%s",rule_in);
- appendStringInfoString(result, " -1 ");
- appendStringInfo(result, "%s",rule_out);
+ appendStringInfoString(result, "]");
+ /* get the sql for the matching rule records */
+ strcpy(temp, "");
+ strcat(temp, rule_in);
+ strcat(temp, " -1 ");
+ strcat(temp, rule_out);
+ strcat(temp, " -1 %");
+ /* execute */
+ values[0] = CStringGetDatum(temp);
+ spi_result = SPI_execute_plan(plan, values, NULL, true, 1);
+
+ //MemoryContextSwitchTo( oldcontext ); /* switch back */
+ if ( spi_result != SPI_OK_SELECT )
+ {
+ elog(ERROR, "%s: unexpected return (%d) from query execution: %s", __func__, spi_result, sql->data);
+ return -1;
+ }
+ else {
+
+ elog(DEBUG2, "%s: query success, sql: %s, parameter: %s", __func__, sql->data, temp);
+ }
+
+ appendStringInfo(result, ", \"rule_stub_string\": %s", quote_identifier(temp));
+
+ if (SPI_processed > 0 && SPI_tuptable != NULL) {
+ elog(DEBUG2, "%s: Processing results for: %s, rule: %s", __func__, sql->data, temp );
+ datrul = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull);
+ if ( isnull )
+ {
+ elog(NOTICE, "%s: No match %s", __func__, sql->data);
+ SPI_freetuptable(SPI_tuptable);
+ return -1;
+ }
+
+
+ appendStringInfo(result, ", \"rule_id\": %d",DatumGetInt32(datrul));
+ datrul = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull);
+ //char *srule = TextDatumGetCString(datrul);
+ appendStringInfo(result, ", \"rule_string\": %s", quote_identifier(TextDatumGetCString(datrul)));
+ //appendStringInfoString(result, " -1 ");
+ //appendStringInfo(result, "%s",rule_out);
+ SPI_freetuptable(SPI_tuptable);
+ }
+ else {
+ /** TODO: Figure out why this happens a lot **/
+ elog(DEBUG2, "%s: no match found for, sql: %s, parameter: %s", __func__, sql->data, temp);
+ appendStringInfoString(result, ", \"rule_id\": -1");
+ }
/**
- * TODO: Figure out how to add the type and weight rule type
+ * TODO: Add type and weight of rule (parsed from rulestring)
* **/
//appendStringInfo(result, " %d", ruleref->Type);
//elog(DEBUG2, "Rule type %d", ruleref->Type);
/** rule weight **/
//appendStringInfo(result, " %d", ruleref->Weight);
- appendStringInfoString(result, "\"}");
+ appendStringInfoString(result, "}");
}
+ SPI_finish();
appendStringInfoChar(result, ']');
elog(DEBUG2, "%s: setup values json", __func__);
appendStringInfoString(result, ",\"stdaddr\": {");
diff --git a/extensions/address_standardizer/expected/test-debug_standardize_address.out b/extensions/address_standardizer/expected/test-debug_standardize_address.out
index 31f312258..c4d62e004 100644
--- a/extensions/address_standardizer/expected/test-debug_standardize_address.out
+++ b/extensions/address_standardizer/expected/test-debug_standardize_address.out
@@ -18,10 +18,12 @@ SELECT '#5299a' AS ticket, jsonb_array_length(d->'rules') AS num_rules, jsonb_ar
#5299a | 5 | 4 | 0.902500
(1 row)
-SELECT '#2978c' As ticket, , jsonb_array_length(d->'rules') AS num_rules, jsonb_array_length(d->'input_tokens') AS num_input_tokens, d->'rules'->0->'raw_score' AS best_score FROM jsonb(debug_standardize_address('us_lex','us_gaz','us_rules', '10-20 DORRANCE ST, PROVIDENCE, RI')) AS d;
-ERROR: syntax error at or near ","
-LINE 1: SELECT '#2978c' As ticket, , jsonb_array_length(d->'rules') ...
- ^
+SELECT '#2978c' As ticket, jsonb_array_length(d->'rules') AS num_rules, jsonb_array_length(d->'input_tokens') AS num_input_tokens, d->'rules'->0->'raw_score' AS best_score FROM jsonb(debug_standardize_address('us_lex','us_gaz','us_rules', '10-20 DORRANCE ST, PROVIDENCE, RI')) AS d;
+ ticket | num_rules | num_input_tokens | best_score
+--------+-----------+------------------+------------
+ #2978c | 6 | 5 | 0.751875
+(1 row)
+
SELECT '#5299b' AS ticket, jsonb_array_length(d->'rules') AS num_rules, jsonb_array_length(d->'input_tokens') AS num_input_tokens, d->'rules'->0->'score' AS best_score FROM jsonb(debug_standardize_address('us_lex', 'us_gaz', 'us_rules','50 Gold Piece Drive, Boston, MA, 02020')) AS d;
ticket | num_rules | num_input_tokens | best_score
--------+-----------+------------------+------------
@@ -37,3 +39,11 @@ WHERE it @> '{"pos": 2}'::jsonb;
#5299bt | 2 | PIECE | WORD | PIECE
(3 rows)
+SELECT r->>'score' AS score,r->>'rule_string' AS rule, r->>'rule_stub_string' AS rule_stub FROM jsonb(debug_standardize_address('us_lex','us_gaz','us_rules', '25 Prince Street, NC 09985')) AS d, jsonb_array_elements(d->'rules') AS r;
+ score | rule | rule_stub
+----------+------------------------+---------------------
+ 0.902500 | 0 1 2 -1 1 5 6 -1 1 16 | 0 1 2 -1 1 5 6 -1 %
+ 0.791875 | 0 1 2 -1 1 5 6 -1 1 16 | 0 1 2 -1 1 5 6 -1 %
+ 0.661250 | | 0 1 2 -1 1 5 5 -1 %
+(3 rows)
+
diff --git a/extensions/address_standardizer/test-debug_standardize_address.sql.in b/extensions/address_standardizer/test-debug_standardize_address.sql.in
index fc8f7fcbb..8d3e77b5e 100644
--- a/extensions/address_standardizer/test-debug_standardize_address.sql.in
+++ b/extensions/address_standardizer/test-debug_standardize_address.sql.in
@@ -3,9 +3,11 @@ SELECT jsonb_array_length(jsonb(d)->'rules') FROM debug_standardize_address('us_
SELECT '#5299a' AS ticket, jsonb_array_length(d->'rules') AS num_rules, jsonb_array_length(d->'input_tokens') AS num_input_tokens, d->'rules'->0->'raw_score' AS best_score FROM jsonb(debug_standardize_address('us_lex', 'us_gaz', 'us_rules','1 Timepiece Point','Boston, MA, 02220')) As d;
-SELECT '#2978c' As ticket, , jsonb_array_length(d->'rules') AS num_rules, jsonb_array_length(d->'input_tokens') AS num_input_tokens, d->'rules'->0->'raw_score' AS best_score FROM jsonb(debug_standardize_address('us_lex','us_gaz','us_rules', '10-20 DORRANCE ST, PROVIDENCE, RI')) AS d;
+SELECT '#2978c' As ticket, jsonb_array_length(d->'rules') AS num_rules, jsonb_array_length(d->'input_tokens') AS num_input_tokens, d->'rules'->0->'raw_score' AS best_score FROM jsonb(debug_standardize_address('us_lex','us_gaz','us_rules', '10-20 DORRANCE ST, PROVIDENCE, RI')) AS d;
SELECT '#5299b' AS ticket, jsonb_array_length(d->'rules') AS num_rules, jsonb_array_length(d->'input_tokens') AS num_input_tokens, d->'rules'->0->'score' AS best_score FROM jsonb(debug_standardize_address('us_lex', 'us_gaz', 'us_rules','50 Gold Piece Drive, Boston, MA, 02020')) AS d;
SELECT '#5299bt' AS ticket, it->>'pos' AS pos, it->>'word' AS word, it->>'token' AS token, it->>'stdword' AS stdword FROM jsonb(debug_standardize_address('us_lex', 'us_gaz', 'us_rules','50 Gold Piece Drive, Boston, MA, 02020')) AS d, jsonb_array_elements(d->'input_tokens') AS it
WHERE it @> '{"pos": 2}'::jsonb;
+
+SELECT r->>'score' AS score,r->>'rule_string' AS rule, r->>'rule_stub_string' AS rule_stub FROM jsonb(debug_standardize_address('us_lex','us_gaz','us_rules', '25 Prince Street, NC 09985')) AS d, jsonb_array_elements(d->'rules') AS r;
-----------------------------------------------------------------------
Summary of changes:
.../address_standardizer/address_standardizer.c | 92 ++++++++++++++++++++--
.../expected/test-debug_standardize_address.out | 18 ++++-
.../test-debug_standardize_address.sql.in | 4 +-
3 files changed, 103 insertions(+), 11 deletions(-)
hooks/post-receive
--
PostGIS
More information about the postgis-tickets
mailing list