[postgis-tickets] [SCM] PostGIS branch master updated. 3.2.0beta1-31-g6cf2fba94

git at osgeo.org git at osgeo.org
Thu Nov 25 08:53:16 PST 2021


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "PostGIS".

The branch, master has been updated
       via  6cf2fba94a434e79d6526a6a5cfdb6057a91bff8 (commit)
      from  b24d7125a3b9de291bcc46d6d92721a66702f71c (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 6cf2fba94a434e79d6526a6a5cfdb6057a91bff8
Author: Paul Ramsey <pramsey at cleverelephant.ca>
Date:   Thu Nov 25 08:52:53 2021 -0800

    Support PCRE2 where available and PCRE otherwise, closes #5019

diff --git a/configure.ac b/configure.ac
index 3532805b8..aa330ebf3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1190,7 +1190,9 @@ AC_ARG_WITH([address-standardizer],
                     [Disable the address_standardizer extension])],
     [], [])
 
-HAVE_PCRE=no
+HAVE_PCRE1=no
+HAVE_PCRE2=no
+PCRE_SUPPORT="not found"
 ADDRESS_STANDARDIZER=""
 
 if test "x$with_address_standardizer" != "xno"; then
@@ -1209,42 +1211,74 @@ if test "x$with_address_standardizer" != "xno"; then
             AC_MSG_ERROR([you must specify a parameter to --with-pcredir, e.g. --with-pcredir=/path/to])
         else
             if test -d "$PCREDIR"; then
-                AC_MSG_RESULT([Using user-specified PCRE directory: $PCREDIR])
-
-                AC_CHECK_FILE("$PCREDIR/include/pcre.h",
-                        [
-                            PCRE_CPPFLAGS="-I$PCREDIR/include"
-                            PCRE_LDFLAGS="-L$PCREDIR/lib -lpcre"
-                            HAVE_PCRE=yes
-                            ADDRESS_STANDARDIZER="address_standardizer"
-                        ],
-                        [AC_MSG_ERROR([Could not find header: pcre.h])])
+                AC_MSG_CHECKING([for PCRE in $PCREDIR...])
+
+                AC_CHECK_FILE("$PCREDIR/include/pcre2.h",
+                    [
+                        PCRE_CPPFLAGS="-I$PCREDIR/include"
+                        PCRE_LDFLAGS="-L$PCREDIR/lib -lpcre2-8"
+                        HAVE_PCRE2=yes
+                        ADDRESS_STANDARDIZER="address_standardizer"
+                        AC_MSG_RESULT([found pcre2.h])
+                    ],
+                    [
+                		AC_CHECK_FILE("$PCREDIR/include/pcre.h",
+                		[
+	                        PCRE_CPPFLAGS="-I$PCREDIR/include"
+	                        PCRE_LDFLAGS="-L$PCREDIR/lib -lpcre"
+	                        HAVE_PCRE1=yes
+	                        ADDRESS_STANDARDIZER="address_standardizer"
+                            AC_MSG_RESULT([found pcre.h])
+                		],
+                		[
+                            AC_MSG_RESULT([no])
+                			AC_MSG_ERROR(["Could not find PCRE header in $PCREDIR/include"])
+                		])
+                    ])
             else
                 AC_MSG_ERROR([the --with-pcredir directory "$PCREDIR" cannot be found])
             fi
         fi
     elif test ! -z "$PKG_CONFIG"; then
-	PKG_CHECK_MODULES([PCRE], [libpcre], [
-                PCRE_CPPFLAGS="$PCRE_CFLAGS"
-                PCRE_LDFLAGS="$PCRE_LIBS"
+        PKG_CHECK_MODULES([PCRE2], [libpcre2-8], [
+                PCRE_CPPFLAGS="$PCRE2_CFLAGS"
+                PCRE_LDFLAGS="$PCRE2_LIBS"
                 ADDRESS_STANDARDIZER="address_standardizer"
-                HAVE_PCRE=yes
+                HAVE_PCRE2=yes
             ],
             [
-                ADDRESS_STANDARDIZER=""
-                HAVE_PCRE=no
+                PKG_CHECK_MODULES([PCRE], [libpcre], [
+                    PCRE_CPPFLAGS="$PCRE_CFLAGS"
+                    PCRE_LDFLAGS="$PCRE_LIBS"
+                    ADDRESS_STANDARDIZER="address_standardizer"
+                    HAVE_PCRE1=yes
+                    ],[])
             ])
     fi
 
+    if test "x$HAVE_PCRE2" = "xyes"; then
+        PCRE_SUPPORT="Version 2"
+        PCRE_VERSION=2
+    elif test "x$HAVE_PCRE1" = "xyes"; then
+        PCRE_SUPPORT="Version 1"
+        PCRE_VERSION=1
+    else
+        ADDRESS_STANDARDIZER=""
+        PCRE_SUPPORT="not found"
+        PCRE_VERSION=0
+        AC_MSG_RESULT([ADDRESS_STANDARDIZER support: disabled])
+    fi
+
     AC_SUBST([PCRE_CPPFLAGS])
     AC_SUBST([PCRE_LDFLAGS])
+    AC_SUBST([PCRE_LDFLAGS])
+    AC_SUBST([PCRE_VERSION])
 
 else
     ADDRESS_STANDARDIZER=""
     AC_MSG_RESULT([ADDRESS_STANDARDIZER support: disabled])
 fi
 
-AC_SUBST([HAVE_PCRE])
 AC_SUBST([ADDRESS_STANDARDIZER])
 
 CPPFLAGS="$PGSQL_CPPFLAGS $GEOS_CPPFLAGS $PROJ_CPPFLAGS $PROTOBUF_CPPFLAGS $XML2_CPPFLAGS $SFCGAL_CPPFLAGS $JSON_CPPFLAGS $PCRE_CPPFLAGS $CPPFLAGS"
@@ -1730,7 +1764,7 @@ if test "x$HAVE_PROTOBUF" = "xyes"; then
   AC_MSG_RESULT([  protobuf-c version:   ${PROTOC_VERSION}])
 fi
 
-AC_MSG_RESULT([  PCRE support:         ${HAVE_PCRE}])
+AC_MSG_RESULT([  PCRE support:         ${PCRE_SUPPORT}])
 AC_MSG_RESULT([  Perl:                 ${PERL}])
 
 AC_MSG_RESULT()
diff --git a/extensions/address_standardizer/Makefile.in b/extensions/address_standardizer/Makefile.in
index 502648f31..3a3632c69 100644
--- a/extensions/address_standardizer/Makefile.in
+++ b/extensions/address_standardizer/Makefile.in
@@ -55,7 +55,7 @@ EXTRA_CLEAN = sql/
 
 
 DOCS = README.address_standardizer
-PG_CPPFLAGS = @CFLAGS@ @CPPFLAGS@
+PG_CPPFLAGS = @CFLAGS@ @CPPFLAGS@ -DPCRE_VERSION=@PCRE_VERSION@
 
 SHLIB_LINK = @SHLIB_LINK@ @PCRE_LDFLAGS@
 EXTRA_CLEAN = usps-st-city-name.txt mk-st-regexp mk-city-regex test_main
diff --git a/extensions/address_standardizer/address_parser.c b/extensions/address_standardizer/address_parser.c
index 892b42c45..3e1c642bf 100644
--- a/extensions/address_standardizer/address_parser.c
+++ b/extensions/address_standardizer/address_parser.c
@@ -8,7 +8,6 @@
 #include "utils/builtins.h"
 
 #include "parseaddress-api.h"
-#include <pcre.h>
 #include <string.h>
 
 #undef DEBUG
diff --git a/extensions/address_standardizer/expected/test-standardize_address_1.out b/extensions/address_standardizer/expected/test-standardize_address_1.out
index 34e825809..65e1767bf 100644
--- a/extensions/address_standardizer/expected/test-standardize_address_1.out
+++ b/extensions/address_standardizer/expected/test-standardize_address_1.out
@@ -10,7 +10,7 @@ select * from standardize_address('us_lex'::text, 'us_gaz'::text, 'us_rules'::te
           | 123       |        |      |         | MAIN | STREET  |        |            |       | KANSAS CITY | MISSOURI |         | 45678    |     | 
 (1 row)
 
-SELECT '#2981' As ticket, * FROM standardize_address('us_lex','us_gaz','us_rules', '1566 NEW STATE HWY, RAYNHAM, MA') ;
+SELECT '#2981' As ticket, * FROM standardize_address('us_lex','us_gaz','us_rules', '1566 NEW STATE HWY, RAYNHAM, MA');
  ticket | building | house_num | predir | qual | pretype |       name        | suftype | sufdir | ruralroute | extra |  city   |     state     | country | postcode | box | unit 
 --------+----------+-----------+--------+------+---------+-------------------+---------+--------+------------+-------+---------+---------------+---------+----------+-----+------
  #2981  |          | 1566      |        |      |         | NEW STATE HIGHWAY |         |        |            |       | RAYNHAM | MASSACHUSETTS | USA     |          |     | 
diff --git a/extensions/address_standardizer/parseaddress-api.c b/extensions/address_standardizer/parseaddress-api.c
index d5008c3f7..41efcf231 100644
--- a/extensions/address_standardizer/parseaddress-api.c
+++ b/extensions/address_standardizer/parseaddress-api.c
@@ -20,7 +20,16 @@
 #include <string.h>
 #include <ctype.h>
 #include <stdio.h>
-#include <pcre.h>
+
+#if PCRE_VERSION <= 1
+# include <pcre.h>
+# define PARSE_CASELESS PCRE_CASELESS
+#else
+# define PCRE2_CODE_UNIT_WIDTH 8
+# include <pcre2.h>
+# define PARSE_CASELESS PCRE2_CASELESS
+#endif
+
 #include "parseaddress-api.h"
 
 #undef DEBUG
@@ -87,6 +96,7 @@ void strtoupper(char *s)
         s[i] = toupper(s[i]);
 }
 
+#if PCRE_VERSION <= 1
 int match(char *pattern, char *s, int *ovect, int options)
 {
     const char *error;
@@ -101,10 +111,51 @@ int match(char *pattern, char *s, int *ovect, int options)
     free(re);
 
     if (rc < 0) return rc;
-    else if (rc == 0) rc = OVECCOUNT/3;
+    else if (rc == 0) rc = OVECPAIRS; // more matches than ovect can hold
 
     return rc;
 }
+#else
+int match(char *pattern, char *s, int *ovect, int options)
+{
+    int errorcode;
+    PCRE2_SIZE erroffset;
+    pcre2_code *re;
+    int rc;
+    pcre2_match_data *match_data;
+    PCRE2_SIZE *ovect2;
+    int i;
+
+    re = pcre2_compile((PCRE2_SPTR8)pattern, PCRE2_ZERO_TERMINATED, options, &errorcode, &erroffset, NULL);
+    if (!re) return -99;
+
+    match_data = pcre2_match_data_create(OVECPAIRS, NULL);
+
+    rc = pcre2_match(re, (PCRE2_SPTR8)s, strlen(s), 0, 0, match_data, NULL);
+
+    if (rc < 0) { // no match or error
+        pcre2_code_free(re);
+        pcre2_match_data_free(match_data);
+        return rc;
+    }
+
+    if (rc == 0) { // more matches than ovect can hold
+        rc = OVECPAIRS;
+    }
+
+    // copy the results out so we can free everything
+    // before returning
+    ovect2 = pcre2_get_ovector_pointer(match_data);
+    for (i = 0; i < rc; i++) {
+        ovect[2*i] = ovect2[2*i];
+        ovect[2*i + 1] = ovect2[2*i + 1];
+    }
+
+    pcre2_code_free(re);
+    pcre2_match_data_free(match_data);
+    return rc;
+}
+#endif
 
 #define RET_ERROR(a,e) if (!a) {*reterr = e; return NULL;}
 
@@ -180,7 +231,7 @@ ADDRESS *parseaddress(HHash *stH, char *s, int *reterr)
     }
     /* get canada zipcode components */
     else {
-        rc = match("\\b([a-z]\\d[a-z]\\s?\\d[a-z]\\d)$", s, ovect, PCRE_CASELESS);
+        rc = match("\\b([a-z]\\d[a-z]\\s?\\d[a-z]\\d)$", s, ovect, PARSE_CASELESS);
         if (rc >= 1) {
             ret->zip = (char *) palloc0((ovect[1]-ovect[0]+1) * sizeof(char));
             strncpy(ret->zip, s+ovect[0], ovect[1]-ovect[0]);
@@ -199,7 +250,7 @@ ADDRESS *parseaddress(HHash *stH, char *s, int *reterr)
     caregx = "^(?-xism:(?i:(?=[abmnopqsy])(?:n[ltsu]|[am]b|[bq]c|on|pe|sk|yt)))$";
     stregx = "\\b(?-xism:(?i:(?=[abcdfghiklmnopqrstuvwy])(?:a(?:l(?:a(?:bam|sk)a|berta)?|mer(?:ican)?\\ samoa|r(?:k(?:ansas)?|izona)?|[kszb])|s(?:a(?:moa|skatchewan)|outh\\ (?:carolin|dakot)a|\\ (?:carolin|dakot)a|[cdk])|c(?:a(?:lif(?:ornia)?)?|o(?:nn(?:ecticut)?|lorado)?|t)|d(?:e(?:la(?:ware)?)?|istrict\\ of\\ columbia|c)|f(?:l(?:(?:orid)?a)?|ederal\\ states\\ of\\ micronesia|m)|m(?:i(?:c(?:h(?:igan)?|ronesia)|nn(?:esota)?|ss(?:(?:issipp|our)i)?)?|a(?:r(?:shall(?:\\ is(?:l(?:and)?)?)?|yland)|ss(?:achusetts)?|ine|nitoba)?|o(?:nt(?:ana)?)?|[ehdnstpb])|g(?:u(?:am)?|(?:eorgi)?a)|h(?:awai)?i|i(?:d(?:aho)?|l(?:l(?:inois)?)?|n(?:d(?:iana)?)?|(?:ow)?a)|k(?:(?:ansa)?s|(?:entuck)?y)|l(?:a(?:bordor)?|ouisiana)|n(?:e(?:w(?:\\ (?:foundland(?:\\ and\\ labordor)?|hampshire|jersey|mexico|(?:yor|brunswic)k)|foundland)|(?:brask|vad)a)?|o(?:rth(?:\\ (?:mariana(?:\\ is(?:l(?:and)?)?)?|(?:carolin|dakot)a)|west\\ territor(?:ies|y))|va\\ scotia)|\\ (?:carolin|dakot)a|u(?:navut)?|[vhjmycdblsf]|w?t)|o(?:h(
 ?:io)?|k(?:lahoma)?|r(?:egon)?|n(?:t(?:ario)?)?)|p(?:a(?:lau)?|e(?:nn(?:sylvania)?|i)?|r(?:ince\\ edward\\ island)?|w|uerto\\ rico)|r(?:hode\\ island|i)|t(?:e(?:nn(?:essee)?|xas)|[nx])|ut(?:ah)?|v(?:i(?:rgin(?:\\ islands|ia))?|(?:ermon)?t|a)|w(?:a(?:sh(?:ington)?)?|i(?:sc(?:onsin)?)?|y(?:oming)?|(?:est)?\\ virginia|v)|b(?:ritish\\ columbia|c)|q(?:uebe)?c|y(?:ukon|t))))$";
 
-    rc = match(stregx, s, ovect, PCRE_CASELESS);
+    rc = match(stregx, s, ovect, PARSE_CASELESS);
     if (rc > 0) {
         state = (char *) palloc0((ovect[1]-ovect[0]+1) * sizeof(char));
         strncpy(state, s+ovect[0], ovect[1]-ovect[0]);
@@ -229,7 +280,7 @@ ADDRESS *parseaddress(HHash *stH, char *s, int *reterr)
         }
 
         /* check if it a Canadian Province */
-        rc = match(caregx, ret->st, ovect, PCRE_CASELESS);
+        rc = match(caregx, ret->st, ovect, PARSE_CASELESS);
         if (rc > 0) {
             strcpy(ret->cc, "CA");
             // if (ret->cc) printf("  CC: %s\n", ret->cc);
diff --git a/extensions/address_standardizer/parseaddress-api.h b/extensions/address_standardizer/parseaddress-api.h
index a3773a431..2037ef6da 100644
--- a/extensions/address_standardizer/parseaddress-api.h
+++ b/extensions/address_standardizer/parseaddress-api.h
@@ -20,7 +20,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
 
 #include "postgres.h"
 
-#define OVECCOUNT 30
+#define OVECPAIRS 10
+#define OVECCOUNT 3*OVECPAIRS
 
 #ifdef USE_HSEARCH
 

-----------------------------------------------------------------------

Summary of changes:
 configure.ac                                       | 72 ++++++++++++++++------
 extensions/address_standardizer/Makefile.in        |  2 +-
 extensions/address_standardizer/address_parser.c   |  1 -
 .../expected/test-standardize_address_1.out        |  2 +-
 extensions/address_standardizer/parseaddress-api.c | 61 ++++++++++++++++--
 extensions/address_standardizer/parseaddress-api.h |  3 +-
 6 files changed, 113 insertions(+), 28 deletions(-)


hooks/post-receive
-- 
PostGIS


More information about the postgis-tickets mailing list