[postgis-devel] [PostGIS] #38: shp2pgsql: Handling numeric field containing scientific notation

PostGIS trac at osgeo.org
Mon Jun 22 17:09:39 PDT 2009


#38: shp2pgsql: Handling numeric field containing scientific notation
-----------------------+----------------------------------------------------
  Reporter:  hustvedt  |       Owner:               
      Type:  defect    |      Status:  new          
  Priority:  medium    |   Milestone:  postgis 1.4.1
 Component:  postgis   |     Version:               
Resolution:            |    Keywords:               
-----------------------+----------------------------------------------------
Changes (by pramsey):

  * milestone:  => postgis 1.4.1

Old description:

> The numeric casting hack that I have in there will definitely not work in
> the dump mode of output, so maybe shp2pgsql will have to convert the
> numeric fields to an integer?
>
> There is what appears to be a slight issue with the .dbf part of a
> shapefile generated by ArcEditor 9.0 or 9.1 that has a value in an
> integer
> field of 10 billion or more if that shapefile is then used with
> shp2pgsql.
> Attempting to load the resulting sql file results in a casting error. One
> possible solution is to cast the field to numeric in the sql output,
> before
> pgsql attempts to store it in a int8 field.
>

> If a shapefile has an integer column with a value of 10 billion, some
> versions of ERSI ArcEditor will store that into the .dbf as '1.00e+010'.
> As
> far as I can tell, that is an invalid string to store in a dBase III
> file,
> since the only characters allowed in a Numeric field are {+-.0123456789}.
>
> shp2pgsql doesn't do any coversions on what is stored in the .dbf file,
> so
> that gets put directly into the .sql file that is generated. It does set
> the types of the fields in the sql file, of course, so that affects the
> castings that pgsql is going to do. '1.00e+010' is a valid numeric type,
> and that can be casted to an int8, with any necessary rounding done. It
> is
> not a valid int8 value, even if it will fit into an int8 field. shp2pgsql
> deduces correctly that an int8 will be needed to store the resulting
> number, but I think that is only based on the field length from the
> header
> of the .dbf file.
>

> One solution is that for any field that has a type of an integer, to add
> an
> explicit cast to numeric in the sql file, then psql will be able to
> handle
> this case. This is a simple solution, since it is just adding a
> '::numeric'
> after any integer field value. Patching shp2pgsql and adding a
> 'printf('::numeric);' is the easiest way to do that I can see.
>

> Here is what the current code will tell psql to do, and the resulting
> error:
> # select '1.00e+010'::integer;
> ERROR:  invalid input syntax for integer: '1.00e+010'
>

>
> However, if that value is cast to ::numeric first, then it will work as
> an
> int8:
> # select '1.00e+010'::numeric;
>   numeric  -------------
> 10000000000
> (1 row)
>
> # select '1.00e+010'::numeric::int8;
>    int8    -------------
> 10000000000
> (1 row)
>

> The command line we use is 'shp2pgsql -g the_geom -W UTF8 -s 4326 -c -N
> skip SAPEFILE TABLE > SQL'
> Running this sql code that shp2pgsql created generates the error,
> 'PGError:
> ERROR:  invalid input syntax for integer: '1.00e+010' ':
>

> CREATE TABLE 'data_staging'.'shp_121336819166268_20867' (gid serial
> PRIMARY
> KEY,
> 'flag' int2,
> 'v1' varchar(2),
> 'v2' varchar(1),
> 'v3' varchar(3),
> 'v4' varchar(3),
> 'v5' varchar(2),
> 'v6' varchar(5),
> 'v6a' varchar(16),  'v7' varchar(50),
> 'v7a' int4,
> 'v8' varchar(45),
> 'v9' varchar(5),
> 'v10' varchar(6),
> 'v11' int8,
> 'v12' varchar(32),
> 'v13' varchar(2),
> 'v17' varchar(50),
> 'v18' varchar(50),
> 'address' varchar(45),
> 'city' varchar(32),
> 'state' varchar(2),
> 'zipcode' varchar(5),
> 'zip_p4' varchar(4),
> 'v25' varchar(9),
> 'zip_p4_a' varchar(10),
> 'v33' varchar(10),
> 'v34' int2,
> 'v38' int2,
> 'v39' int2,
> 'v40' int2,
> 'v41' int2,
> 'v42' int2,
> 'v43' int2,
> 'v44' int2,
> 'v45' int2,
> 'v46' int2,
> 'v47' int2,
> 'v52' int2,
> 'v53' int2,
> 'v54' int2,
> 'v55' int2,
> 'v56' int2,
> 'v57' int2,
> 'v58' int2,
> 'v59' int2,
> 'v60' int2,
> 'v61' int2,
> 'v62' int2,
> 'v62f' int2,
> 'v63' int2,
> 'v63f' int2,
> 'v64' int2,
> 'v64f' int2,
> 'v65' int4,
> 'v65f' int2,
> 'v66' int4,
> 'v66f' int2,
> 'v67' int4,
> 'v67f' int2,
> 'v68' int4,
> 'v68f' int2,
> 'v69' int4,
> 'v69f' int2,
> 'v70' int4,
> 'v70f' int2,
> 'v71' int4,
> 'v71f' int2,
> 'v72' int4,
> 'v72f' int2,
> 'v73' int4,
> 'v73f' int2,
> 'v74' int4,
> 'v74f' int2,
> 'v75' int4,
> 'v75f' int2,
> 'v76' int4,
> 'v76f' int2,
> 'v77' int4,
> 'v77f' int2,
> 'v78' int4,
> 'v78f' int2,
> 'v79' int4,
> 'v79f' int2,
> 'v80' int4,
> 'v80f' int2,
> 'v81' int4,
> 'v81f' int2,
> 'v82' int8,
> 'v82f' int2,
> 'v83' int2,
> 'v84' int8,
> 'v84f' int2,
> 'v85' int4,
> 'cp_pct' float8,
> 'filter__' int2,
> 'sizecat' float8,
> 'joyce_st' float8,
> 'longitude' numeric,
> 'latitude' numeric,
> 'georesult' varchar(11),
> 'georstclp' varchar(2),
> 'fid_1' numeric,
> 'correction' varchar(50),
> 'long_adj' numeric,
> 'lati_adj' numeric);
> SELECT
> AddGeometryColumn('data_staging','shp_121336819166268_20867','the_geom','4326','POINT',2);
>
> INSERT INTO 'data_staging'.'shp_121336819166268_20867'
> ('flag','v1','v2','v3','v4','v5','v6','v6a','v7','v7a','v8','v9','v10','v11','v12','v13','v17','v18','address','city','state','zipcode','zip_p4','v25','zip_p4_a','v33','v34','v38','v39','v40','v41','v42','v43','v44','v45','v46','v47','v52','v53','v54','v55','v56','v57','v58','v59','v60','v61','v62','v62f','v63','v63f','v64','v64f','v65','v65f','v66','v66f','v67','v67f','v68','v68f','v69','v69f','v70','v70f','v71','v71f','v72','v72f','v73','v73f','v74','v74f','v75','v75f','v76','v76f','v77','v77f','v78','v78f','v79','v79f','v80','v80f','v81','v81f','v82','v82f','v83','v84','v84f','v85','cp_pct','filter__','sizecat','joyce_st','longitude','latitude','georesult','georstclp','fid_1','correction','long_adj','lati_adj',the_geom)
> VALUES ('0','IL','2','024','001','02','60100','1420240010260100','ALBION
> POLICE
> DEPT','88','ALBION','17047',NULL,'1933','EDWARDS','03',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'9999','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','3','0','3','0','3','999999','9','3','0','0','0','0','0','0','0','0','0','0','0','3','0','0','0','2','3','0','3','0','3','88888','8','88888','8','88888','8','88888','8','88888','8','1.00e+010','9','0','1.00e+010','9','0','0.00','1','1.00','0.00','-88.056282000','38.377029000','N','N','1856.000000',NULL,'-8.80562820000e+001','38.377029000','SRID=4326;01010000009F56D11F9A0356C061527C7C42304340');

New description:

 The numeric casting hack that I have in there will definitely not work in
 the dump mode of output, so maybe shp2pgsql will have to convert the
 numeric fields to an integer?

 There is what appears to be a slight issue with the .dbf part of a
 shapefile generated by ArcEditor 9.0 or 9.1 that has a value in an integer
 field of 10 billion or more if that shapefile is then used with shp2pgsql.
 Attempting to load the resulting sql file results in a casting error. One
 possible solution is to cast the field to numeric in the sql output,
 before
 pgsql attempts to store it in a int8 field.


 If a shapefile has an integer column with a value of 10 billion, some
 versions of ERSI ArcEditor will store that into the .dbf as '1.00e+010'.
 As
 far as I can tell, that is an invalid string to store in a dBase III file,
 since the only characters allowed in a Numeric field are {+-.0123456789}.

 shp2pgsql doesn't do any coversions on what is stored in the .dbf file, so
 that gets put directly into the .sql file that is generated. It does set
 the types of the fields in the sql file, of course, so that affects the
 castings that pgsql is going to do. '1.00e+010' is a valid numeric type,
 and that can be casted to an int8, with any necessary rounding done. It is
 not a valid int8 value, even if it will fit into an int8 field. shp2pgsql
 deduces correctly that an int8 will be needed to store the resulting
 number, but I think that is only based on the field length from the header
 of the .dbf file.


 One solution is that for any field that has a type of an integer, to add
 an
 explicit cast to numeric in the sql file, then psql will be able to handle
 this case. This is a simple solution, since it is just adding a
 '::numeric'
 after any integer field value. Patching shp2pgsql and adding a
 'printf('::numeric);' is the easiest way to do that I can see.


 Here is what the current code will tell psql to do, and the resulting
 error:
 # select '1.00e+010'::integer;
 ERROR:  invalid input syntax for integer: '1.00e+010'



 However, if that value is cast to ::numeric first, then it will work as an
 int8:
 # select '1.00e+010'::numeric;
   numeric  -------------
 10000000000
 (1 row)

 # select '1.00e+010'::numeric::int8;
    int8    -------------
 10000000000
 (1 row)


 The command line we use is 'shp2pgsql -g the_geom -W UTF8 -s 4326 -c -N
 skip SAPEFILE TABLE > SQL'
 Running this sql code that shp2pgsql created generates the error,
 'PGError:
 ERROR:  invalid input syntax for integer: '1.00e+010' ':


 CREATE TABLE 'data_staging'.'shp_121336819166268_20867' (gid serial
 PRIMARY
 KEY,
 'flag' int2,
 'v1' varchar(2),
 'v2' varchar(1),
 'v3' varchar(3),
 'v4' varchar(3),
 'v5' varchar(2),
 'v6' varchar(5),
 'v6a' varchar(16),  'v7' varchar(50),
 'v7a' int4,
 'v8' varchar(45),
 'v9' varchar(5),
 'v10' varchar(6),
 'v11' int8,
 'v12' varchar(32),
 'v13' varchar(2),
 'v17' varchar(50),
 'v18' varchar(50),
 'address' varchar(45),
 'city' varchar(32),
 'state' varchar(2),
 'zipcode' varchar(5),
 'zip_p4' varchar(4),
 'v25' varchar(9),
 'zip_p4_a' varchar(10),
 'v33' varchar(10),
 'v34' int2,
 'v38' int2,
 'v39' int2,
 'v40' int2,
 'v41' int2,
 'v42' int2,
 'v43' int2,
 'v44' int2,
 'v45' int2,
 'v46' int2,
 'v47' int2,
 'v52' int2,
 'v53' int2,
 'v54' int2,
 'v55' int2,
 'v56' int2,
 'v57' int2,
 'v58' int2,
 'v59' int2,
 'v60' int2,
 'v61' int2,
 'v62' int2,
 'v62f' int2,
 'v63' int2,
 'v63f' int2,
 'v64' int2,
 'v64f' int2,
 'v65' int4,
 'v65f' int2,
 'v66' int4,
 'v66f' int2,
 'v67' int4,
 'v67f' int2,
 'v68' int4,
 'v68f' int2,
 'v69' int4,
 'v69f' int2,
 'v70' int4,
 'v70f' int2,
 'v71' int4,
 'v71f' int2,
 'v72' int4,
 'v72f' int2,
 'v73' int4,
 'v73f' int2,
 'v74' int4,
 'v74f' int2,
 'v75' int4,
 'v75f' int2,
 'v76' int4,
 'v76f' int2,
 'v77' int4,
 'v77f' int2,
 'v78' int4,
 'v78f' int2,
 'v79' int4,
 'v79f' int2,
 'v80' int4,
 'v80f' int2,
 'v81' int4,
 'v81f' int2,
 'v82' int8,
 'v82f' int2,
 'v83' int2,
 'v84' int8,
 'v84f' int2,
 'v85' int4,
 'cp_pct' float8,
 'filter__' int2,
 'sizecat' float8,
 'joyce_st' float8,
 'longitude' numeric,
 'latitude' numeric,
 'georesult' varchar(11),
 'georstclp' varchar(2),
 'fid_1' numeric,
 'correction' varchar(50),
 'long_adj' numeric,
 'lati_adj' numeric);
 SELECT
 AddGeometryColumn('data_staging','shp_121336819166268_20867','the_geom','4326','POINT',2);

 INSERT INTO 'data_staging'.'shp_121336819166268_20867'
 ('flag','v1','v2','v3','v4','v5','v6','v6a','v7','v7a','v8','v9','v10','v11','v12','v13','v17','v18','address','city','state','zipcode','zip_p4','v25','zip_p4_a','v33','v34','v38','v39','v40','v41','v42','v43','v44','v45','v46','v47','v52','v53','v54','v55','v56','v57','v58','v59','v60','v61','v62','v62f','v63','v63f','v64','v64f','v65','v65f','v66','v66f','v67','v67f','v68','v68f','v69','v69f','v70','v70f','v71','v71f','v72','v72f','v73','v73f','v74','v74f','v75','v75f','v76','v76f','v77','v77f','v78','v78f','v79','v79f','v80','v80f','v81','v81f','v82','v82f','v83','v84','v84f','v85','cp_pct','filter__','sizecat','joyce_st','longitude','latitude','georesult','georstclp','fid_1','correction','long_adj','lati_adj',the_geom)
 VALUES ('0','IL','2','024','001','02','60100','1420240010260100','ALBION
 POLICE
 DEPT','88','ALBION','17047',NULL,'1933','EDWARDS','03',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'9999','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','3','0','3','0','3','999999','9','3','0','0','0','0','0','0','0','0','0','0','0','3','0','0','0','2','3','0','3','0','3','88888','8','88888','8','88888','8','88888','8','88888','8','1.00e+010','9','0','1.00e+010','9','0','0.00','1','1.00','0.00','-88.056282000','38.377029000','N','N','1856.000000',NULL,'-8.80562820000e+001','38.377029000','SRID=4326;01010000009F56D11F9A0356C061527C7C42304340');

-- 
Ticket URL: <http://trac.osgeo.org/postgis/ticket/38#comment:1>
PostGIS <http://trac.osgeo.org/postgis/>
PostGIS


More information about the postgis-devel mailing list