[postgis-users] edits to postgis_restore.pl

James Marca jmarca at translab.its.uci.edu
Wed Dec 15 13:00:28 PST 2004


At approximately Wed, Dec 15, 2004 at 10:37:04AM +0100, strk at refractions.net wrote:
...
> 
> The postgis_restore.pl 'strips' postgis objects out of a dump.

in looking at the postgis_restore.pl included in the 0.9.1 release, I
noticed a typo.  Then I added use strict; etc. and got a little
carried away.

I was intending to make some changes to address the new database
issue, as well as the database user question which isn't addressed
(presumes -U postgres, no password, etc), but haven't gotten that far.

please review/test the attached patchfile.  I don't have nearly enough
test cases.  In fact, all I have left is 0.9.0 and 0.9.1 PostGIS dumps.

James

-------------- next part --------------
--- postgis-0.9.1/utils/postgis_restore.pl.orig	2004-08-20 07:52:10.000000000 -0700
+++ postgis-0.9.1/utils/postgis_restore.pl	2004-12-15 12:42:21.000000000 -0800
@@ -37,20 +37,78 @@
 #	pg_dump-800/pg800 => pg_restore-800/pg800
 #
 
-eval "exec perl $0 $@"
+eval "exec perl -w $0 $@"
 	if (0);
+use strict;
+use warnings;
+use Carp;
+
+sub strip_whitespace_and_public {
+    #print "ARG1: [$arg]\n";
+    my $arg = lc(shift);
+    foreach ($arg){
+	s/^\s+//;
+	s/\s+$//;
+	s/^public\.//;
+	s/^opaque$/internal/;
+	s/^int[48]?$/integer/;
+	s/^float4$/real/;
+	s/^float8$/double precision/;
+	s/^varchar$/character varying/
+    }
+    return $arg;
+}
+
+
+my %obsolete_funcnames = (
+    'plpgsql_call_handler' => 1,
+    'unite_finalfunc'=>1,
+    'postgisgistcostestimate'=>1,
+    'wkb_in'=>1,
+    'wkb_out'=>1,
+    'ggeometry_consistent'=>1,
+    'ggeometry_compress'=>1,
+    'ggeometry_picksplit'=>1,
+    'gbox_picksplit'=>1,
+    'ggeometry_union'=>1,
+    'gbox_union'=>1,
+    'ggeometry_same'=>1,
+    'gbox_same'=>1,
+    'rtree_decompress'=>1,
+    'ggeometry_penalty'=>1,
+    'gbox_penalty'=>1,
+    'geometry_size'=>1, 
+);
+
+my %obsolete_funcids = (
+    'geometry_union(geometry, geometry)'=>1,
+    'geometry_inter(geometry, geometry)'=>1,
+    'create_histogram2d(box3d, integer)'=>1,
+    'estimate_histogram2d(histogram2d, box)'=>1,
+);
+
+my %obsolete_aggregate_names = (
+    'fastunion'=>1,
+    'mem_collect'=>1,
+);
+
+my %obsolete_types = (
+    'wkb'=>1,
+);
+
+
 
 (@ARGV == 3) || die "Usage: postgis_restore.pl <postgis.sql> <db> <dump>\nRestore a custom dump (pg_dump -Fc) of a postgis enabled database.\n";
 
-$DEBUG=1;
+my $DEBUG=1;
 
-my %aggs = {};
+my %aggs = ();
 my %fncasts = ();
 my %casts = ();
-my %funcs = {};
-my %types = {};
-my %opclass = {};
-my %ops = {};
+my %funcs = ();
+my %types = ();
+my %opclass = ();
+my %ops = ();
 
 my $postgissql = $ARGV[0];
 my $dbname = $ARGV[1];
@@ -58,7 +116,7 @@ my $dump = $ARGV[2];
 my $dumplist=$dump.".list";
 my $dumpascii=$dump.".ascii";
 
-print "postgis.sql is $postgsisql\n";
+print "postgis.sql is $postgissql\n";
 print "dbname is $dbname\n";
 print "dumpfile is $dump\n";
 
@@ -67,77 +125,50 @@ print "dumpfile is $dump\n";
 #
 print "Scanning $postgissql\n"; 
 open( INPUT, $postgissql ) || die "Couldn't open file: $postgissql\n";
-while( my $line = <INPUT>)
+ POSTGISSQL:
+    while( my $line = <INPUT>)
 {
 	$line =~ s/[\r\n]//g;
 	#print "LINE: $line\n";
 
-	next if $line =~ /^ *--/;
+	next POSTGISSQL if $line =~ /^\s*--/;
 
 	if ($line =~ /^ *create (or replace)? function ([^ ]*) *\((.*)\)/i)
 	{
 		my $name = lc($2);
 		my @args = split(",", $3);
 		my $geomfound = 0;
-		for (my $i=0; $i<@args; $i++)
+		foreach my $arg (@args)
 		{
-			$arg = lc($args[$i]);
-			#print "ARG1: [$arg]\n";
-			$arg =~ s/^ *//;
-			$arg =~ s/ *$//;
-			#print "ARG2: [$arg]\n";
-			if ( $arg =~ /^int[48]?$/ ) {
-				$args[$i] = 'integer';
-				next;
-			}
-			if ( $arg eq 'float4' ) {
-				$args[$i] = 'real';
-				next;
-			}
-			if ( $arg eq 'float8' ) {
-				$args[$i] = 'double precision';
-				next;
-			}
-			if ( $arg eq 'varchar' ) {
-				$args[$i] = 'character varying';
-				next;
-			}
-			if ( $arg eq 'opaque' ) {
-				$args[$i] = 'internal';
-				next;
-			}
-			$args[$i] = $arg;
-			$geomfound++ if ( $arg eq 'oldgeometry' );
+			$arg = strip_whitespace_and_public($arg);
+			$geomfound++ if ( $arg =~ /oldgeometry/ );
 		}
 		my $id = $name."(".join(", ", @args).")";
 		$funcs{$id} = 1;
 		print "SQLFUNC: $id\n" if $DEBUG;
 		if ( $geomfound )
 		{
-			for (my $i=0; $i<@args; $i++)
-			{
-				$arg = $args[$i];
-				$arg = 'geometry' if ($arg eq 'oldgeometry');
-				$args[$i] = $arg;
-			}
-			my $id = $name."(".join(", ", @args).")";
-			$funcs{$id} = 1;
-			print "SQLFUNC: $id\n" if $DEBUG;
+		    foreach  (@args)
+		    {
+			s/^oldgeometry$/geometry/ ;
+		    }
+		    my $id = $name."(".join(", ", @args).")";
+		    $funcs{$id} = 1;
+		    print "SQLFUNC: $id\n" if $DEBUG;
 		}
-		next;
+		next POSTGISSQL;
 	}
 	if ($line =~ /^create type +([^ ]+)/i)
 	{
 		my $type = $1;
 		$types{$type} = 1;
 		print "SQLTYPE $type\n" if $DEBUG;
-		if ( $type eq 'oldgeometry' )
+		if ( $type =~ s/^oldgeometry$/geometry/ )
 		{
-			$type = 'geometry';
 			$types{$type} = 1;
 			print "SQLTYPE $type\n" if $DEBUG;
 		}
-		next;
+		next POSTGISSQL;
 	}
 	if ($line =~ /^create aggregate *([^ ]*) *\(/i)
 	{
@@ -152,23 +183,22 @@ while( my $line = <INPUT>)
 			}
 			last if $subline =~ /;[\t ]*$/;
 		}
-		if ( $type eq undef )
+		unless ( defined $type )
 		{
-			print "Could not find base type for aggregate $name\n";
-			print "($line)\n";
-			exit 1;
+			warn "Could not find base type for aggregate $name";
+			warn "($line)";
+			confess;
 		}
 		my $id = $name.'('.$type.')';
 		print "SQLAGG $id\n" if $DEBUG;
 		$aggs{$id} = 1;
-		if ( $type eq 'oldgeometry' )
+		if ( $type =~ s/^oldgeometry$/geometry/ )
 		{
-			$type = 'geometry';
 			my $id = $name.'('.$type.')';
 			$aggs{$id} = 1;
 			print "SQLAGG $id\n" if $DEBUG;
 		}
-		next;
+		next POSTGISSQL;
 	}
 
 	# CAST
@@ -190,20 +220,17 @@ while( my $line = <INPUT>)
 			print "SQLFNCAST $id\n" if $DEBUG;
 		}
 
-		my $id = $from.','.$to;
+		$id = $from.','.$to;
 		$casts{$id} = 1;
 		print "SQLCAST $id\n" if $DEBUG;
-		if ( $from eq 'oldgeometry' || $to eq 'oldgeometry' )
+		if(map {  s/^oldgeometry$/geometry/ } ($from,$to) )
 		{
-			$from = 'geometry' if $from eq 'geometry';
-			$to = 'geometry' if $to eq 'geometry';
-			my $id = $from.','.$to;
-			$casts{$id} = 1;
-			print "SQLCAST $id\n" if $DEBUG;
+		    my $id = $from.','.$to;
+		    $casts{$id} = 1;
+		    print "SQLCAST $id\n" if $DEBUG;
 		}
 
-
-		next;
+		next POSTGISSQL;
 	}
 
 	# OPERATOR CLASS
@@ -212,7 +239,7 @@ while( my $line = <INPUT>)
 		my $id = lc($1);
 		print "SQLOPCLASS $id\n" if $DEBUG;
 		$opclass{$id} = 1;
-		next;
+		next POSTGISSQL;
 	}
 
 	# OPERATOR 
@@ -233,199 +260,121 @@ while( my $line = <INPUT>)
 				$rarg=lc($1);
 			}
 		}
-		my $id = $name.','.$larg.','.$rarg;
+		my $id = join (',',$name,$larg,$rarg);
 		print "SQLOP $id\n" if $DEBUG;
 		$ops{$id} = 1;
-		if ( $larg eq 'oldgeometry' || $rarg eq 'oldgeometry' )
+		if(map {  s/^oldgeometry$/geometry/ } ($larg,$rarg) )
 		{
-			$larg = 'geometry' if $larg eq 'oldgeometry';
-			$rarg = 'geometry' if $rarg eq 'oldgeometry';
-			my $id = $name.','.$larg.','.$rarg;
-			print "SQLOP $id\n" if $DEBUG;
-			$ops{$id} = 1;
+		    my $id = join (',',$name,$larg,$rarg);
+		    # my $id = $name.','.$larg.','.$rarg;
+		    print "SQLOP $id\n" if $DEBUG;
+		    $ops{$id} = 1;
 		}
-		next;
+		next POSTGISSQL;
 	}
 }
 close( INPUT );
 #exit;
 
 
+
 #
 # Scan dump list
 #
 print "Scanning $dump list\n"; 
 open( OUTPUT, ">$dumplist") || die "Can't write to ".$dump.".list\n";
 open( INPUT, "pg_restore -l $dump |") || die "Couldn't run pg_restore -l $dump\n";
+DUMPLIST:
 while( my $line = <INPUT> )
 {
-	next if $line =~ /^;/;
-	next if $line =~ /^ *--/;
+	next DUMPLIST if $line =~ /^;/;
+	next DUMPLIST if $line =~ /^\s*--/;
 
-	if ($line =~ / FUNCTION *([^ ]*) *\(([^)]*)\)/)
+	if ($line =~ /FUNCTION\s*([^ ]*)\s*\(([^)]*)\)/)
 	{
 		my $funcname = $1;
+		# shortcircuit $funcnames on the naughty list
+		if (defined $obsolete_funcnames{$funcname}){
+		    print "SKIPPING FUNCTION:  [$funcname($2)] --- name on the obsolete name list"  if $DEBUG;
+		    next DUMPLIST ;
+		}
 		#print "FUNCNAME: [$funcname]\n";
 		my @args = split(",", $2);
 		#print "ARGS: [". at args."]\n";
 		my $wkbinvolved = 0;
-		for (my $i=0; $i<@args; $i++)
+		foreach my $arg (@args)
 		{
-			$arg = lc($args[$i]);
-			$arg =~ s/^ *//;
-			$arg =~ s/ *$//;
-			$arg =~ s/^public.//;
-			if ( $arg eq 'opaque' ) {
-				$args[$i] = 'internal';
-				next;
-			}
-			$args[$i] = $arg;
-			$wkbinvolved++ if ( $arg eq 'wkb' );
+		    $arg = strip_whitespace_and_public($arg);
+		    $wkbinvolved++ if ( $arg =~ /wkb/ );
 		}
-
-		$args = join(', ', @args);
+		my $args = join(', ', @args);
 		#print "ARGS SCALAR: [$args]\n";
 		my $id = $funcname."(".$args.")";
 		#print "ID: [$id]\n";
-
 		# WKB type is obsoleted
 		if ( $wkbinvolved )
 		{
-			print "SKIPPING FUNC $id\n" if $DEBUG;
-			next;
-		}
-
-		if ( $funcname eq 'plpgsql_call_handler' )
-		{
-			print "SKIPPING FUNC $id\n" if $DEBUG;
-			next;
-		}
-		# This is an old postgis function which might
-		# still be in a dump
-		if ( $funcname eq 'unite_finalfunc' )
-		{
-			print "SKIPPING FUNC $id\n" if $DEBUG;
-			next;
-		}
-
-		# This are old postgis functions which might
-		# still be in a dump
-		if ( $funcname eq 'postgisgistcostestimate' )
-		{
-			print "SKIPPING FUNC $id\n" if $DEBUG;
-			next;
-		}
-		if ( $funcname eq 'wkb_in' || $funcname eq 'wkb_out' )
-		{
-			print "SKIPPING FUNC $id\n" if $DEBUG;
-			next;
+			print "SKIPPING FUNCTION:  [$id] --- wkb type is obsoleted\n" if $DEBUG;
+			next DUMPLIST;
 		}
-		if ( $funcname eq 'ggeometry_consistent' ||
-			$funcname eq 'ggeometry_compress' ||
-			$funcname eq 'ggeometry_picksplit' ||
-			$funcname eq 'gbox_picksplit' ||
-			$funcname eq 'ggeometry_union' ||
-			$funcname eq 'gbox_union' ||
-			$funcname eq 'ggeometry_same' ||
-			$funcname eq 'gbox_same' ||
-			$funcname eq 'rtree_decompress' ||
-			$funcname eq 'ggeometry_penalty' ||
-			$funcname eq 'gbox_penalty' ||
-			$id eq 'geometry_union(geometry, geometry)' ||
-			$id eq 'geometry_inter(geometry, geometry)' ||
-			$funcname eq 'geometry_size' )
-		{
-			print "SKIPPING FUNC $id\n" if $DEBUG;
-			next;
-		}
-
-		if ( $id eq 'create_histogram2d(box3d, integer)' ||
-			$id eq 'estimate_histogram2d(histogram2d, box)' )
-		{
-			print "SKIPPING FUNC $id\n" if $DEBUG;
-			next;
-		}
-
-		if ( $funcs{$id} )
-		{
-			print "SKIPPING PGIS FUNC $id\n" if $DEBUG;
-			next;
+		if (defined $obsolete_funcnames{$funcname}){
+		    print "SKIPPING FUNCTION:  [$id] --- id on the obsolete id list"  if $DEBUG;
+		    next DUMPLIST ;
 		}
 		print "KEEPING FUNCTION: [$id]\n" if $DEBUG;
-		#next;
 	}
 	elsif ($line =~ / AGGREGATE (.*)\((.*)\)/)
 	{
 		my $name = $1;
 		my @args = split(",", $2);
-		for (my $i=0; $i<@args; $i++)
+		foreach my $arg (@args)
 		{
-			$arg = lc($args[$i]);
-			$arg =~ s/^ *//;
-			$arg =~ s/ *$//;
-			$arg =~ s/^public.//;
-			if ( $arg eq 'opaque' ) {
-				$args[$i] = 'internal';
-				next;
-			}
-			$args[$i] = $arg;
+		    $arg = strip_whitespace_and_public($arg);
 		}
-		$args = join(', ', @args);
+		my $args = join(', ', @args);
 		my $id = $name."(".$args.")";
 		if ( $aggs{$id} )
 		{
-			print "SKIPPING PGIS AGG $id\n" if $DEBUG;
-			next;
-		}
-		# This is an old postgis aggregate
-		if ( $name eq 'fastunion' )
-		{
-			print "SKIPPING old PGIS AGG $id\n" if $DEBUG;
-			next;
+			print "SKIPPING PGIS AGG $id --- already in\n" if $DEBUG;
+			next DUMPLIST;
 		}
-
 		# This is an old postgis aggregate
-		if ( $name eq 'mem_collect' )
+		if ( defined $obsolete_aggregate_names{$name}  )
 		{
-			print "SKIPPING old PGIS AGG $id\n" if $DEBUG;
-			next;
+			print "SKIPPING old PGIS AGG $id --- name is on obsolete aggregate list\n" if $DEBUG;
+			next DUMPLIST;
 		}
-
 		print "KEEPING AGGREGATE [$id]\n" if $DEBUG;
-		#next;
 	}
 	elsif ($line =~ / TYPE (.*) .*/)
 	{
 		my $type = lc($1);
-		if ( $type eq 'wkb' )
+		if ( $types{$type} )
 		{
-			print "SKIPPING PGIS TYPE $type\n" if $DEBUG;
-			next;
+			print "SKIPPING PGIS TYPE $type --- already included \n" if $DEBUG;
+			next DUMPLIST;
 		}
-		if ( $types{$type} )
+		if (  defined $obsolete_types{$type} )
 		{
-			print "SKIPPING PGIS TYPE $type\n" if $DEBUG;
-			next;
+			print "SKIPPING PGIS TYPE $type --- type is obsolete \n" if $DEBUG;
+			next DUMPLIST;
 		}
 		print "KEEPING TYPE [$type]\n" if $DEBUG;
-		#next;
 	}
 	elsif ($line =~ / PROCEDURAL LANGUAGE plpgsql/)
 	{
 		print "SKIPPING PROCLANG plpgsql\n" if $DEBUG;
-		next;
+		next DUMPLIST;
 	}
 
 	# spatial_ref_sys and geometry_columns
 	elsif ($line =~ / TABLE geometry_columns/)
 	{
 		#print "SKIPPING geometry_columns schema\n" if $DEBUG;
-		#next;
 	}
 	elsif ($line =~ / TABLE spatial_ref_sys/)
 	{
 		#print "SKIPPING spatial_ref_sys schema\n" if $DEBUG;
-		#next;
 	}
 
 	elsif ($line =~ / OPERATOR CLASS *([^ ]*)/)
@@ -434,9 +383,10 @@ while( my $line = <INPUT> )
 
 		if ( $opclass{$id} )
 		{
-			print "SKIPPING PGIS OPCLASS $id\n" if $DEBUG;
-			next;
+			print "SKIPPING PGIS OPCLASS $id --- already included \n" if $DEBUG;
+			next DUMPLIST;
 		}
+		# no obsoletes?
 		print "KEEPING OPCLASS [$id]\n" if $DEBUG;
 	}
 
@@ -450,18 +400,27 @@ while( my $line = <INPUT> )
 		my $id = $arg1."(".$arg2.")";
 		if ( $fncasts{$id} )
 		{
-			print "SKIPPING PGIS FNCAST $id\n" if $DEBUG;
-			next;
+			print "SKIPPING PGIS FNCAST $id ---  already included\n" if $DEBUG;
+			next DUMPLIST;
 		}
 		#if ($arg1 eq 'box3d' || $arg2 eq 'geometry')
 		#{
 			#print "SKIPPING PGIS FNCAST $id\n" if $DEBUG;
-			#next;
+			#next DUMPLIST;
 		#}
-		if ($arg1 eq 'wkb' || $arg2 eq 'wkb')
+		if ( my @badtypes = grep {defined $obsolete_types{$_}} ($arg1,$arg2))
 		{
-			print "SKIPPING PGIS FNCAST $id\n" if $DEBUG;
-			next;
+		    if ($DEBUG){
+			print "SKIPPING PGIS FNCAST $id --- type ";
+			print join ("and type ", @badtypes);
+			if(@badtypes>1){
+			    print " are ";
+			} else {
+			    print " is ";
+			} 
+			print "obsoleted \n";
+		    }
+		    next DUMPLIST;
 		}
 		print "KEEPING FNCAST $id (see CAST)\n" if $DEBUG;
 	}
@@ -476,19 +435,33 @@ while( my $line = <INPUT> )
 		my $id = $arg1.",".$arg2;
 		if ( $casts{$id} )
 		{
-			print "SKIPPING PGIS CAST $id\n" if $DEBUG;
-			next;
+			print "SKIPPING PGIS CAST $id --- already included\n" if $DEBUG;
+			next DUMPLIST;
 		}
 		#if ($arg1 eq 'box3d' || $arg2 eq 'geometry')
 		#{
 			#print "SKIPPING PGIS CAST $id\n" if $DEBUG;
-			#next;
+			#next DUMPLIST;
 		#}
-		if ($arg1 eq 'wkb' || $arg2 eq 'wkb')
+		if ( my @badtypes = grep {defined $obsolete_types{$_}} ($arg1,$arg2))
 		{
-			print "SKIPPING PGIS CAST $id\n" if $DEBUG;
-			next;
-		}
+		    if ($DEBUG){
+			print "SKIPPING PGIS CAST $id --- type ";
+			print join ("and type ", @badtypes);
+			if(@badtypes>1){
+			    print " are ";
+			} else {
+			    print " is ";
+			} 
+			print "obsoleted \n";
+		    }
+		    next DUMPLIST;
+		}
+# 		if ($arg1 eq 'wkb' || $arg2 eq 'wkb')
+# 		{
+# 			print "SKIPPING PGIS CAST $id\n" if $DEBUG;
+# 			next DUMPLIST;
+# 		}
 		print "KEEPING CAST $id\n" if $DEBUG;
 	}
 	print OUTPUT $line;
@@ -500,9 +473,10 @@ close(OUTPUT);
 print "Producing ascii dump $dumpascii\n"; 
 open( INPUT, "pg_restore -L $dumplist $dump |") || die "Can't run pg_restore\n";
 open( OUTPUT, ">$dumpascii") || die "Can't write to $dumpascii\n";
-while( my $line = <INPUT> )
+ ASCIIDUMP:
+    while( my $line = <INPUT> )
 {
-	next if $line =~ /^ *--/;
+	next ASCIIDUMP if $line =~ /^ *--/;
 
 	if ( $line =~ /^SET search_path/ )
 	{
@@ -539,11 +513,11 @@ while( my $line = <INPUT> )
 		if ( $ops{$id} )
 		{
 			print "SKIPPING PGIS OP $id\n" if $DEBUG;
-			next;
+			next ASCIIDUMP;
 		}
 		print "KEEPING OP $id\n" if $DEBUG;
 		print OUTPUT @sublines;
-		next;
+		next ASCIIDUMP;
 	}
 
 	print OUTPUT $line;


More information about the postgis-users mailing list