[postgis-users] edits to  postgis_restore.pl
    strk at refractions.net 
    strk at refractions.net
       
    Thu Dec 16 01:06:43 PST 2004
    
    
  
On Wed, Dec 15, 2004 at 01:00:28PM -0800, James Marca wrote:
> At approximately Wed, Dec 15, 2004 at 10:37:04AM +0100, strk at refractions.net wrote:
> ...
> > 
> > The postgis_restore.pl 'strips' postgis objects out of a dump.
> 
> in looking at the postgis_restore.pl included in the 0.9.1 release, I
> noticed a typo.  Then I added use strict; etc. and got a little
> carried away.
> 
> I was intending to make some changes to address the new database
> issue, as well as the database user question which isn't addressed
> (presumes -U postgres, no password, etc), but haven't gotten that far.
> 
> please review/test the attached patchfile.  I don't have nearly enough
> test cases.  In fact, all I have left is 0.9.0 and 0.9.1 PostGIS dumps.
> 
> James
Thank you James, I'll take a look at it.
--strk;
> 
> --- postgis-0.9.1/utils/postgis_restore.pl.orig	2004-08-20 07:52:10.000000000 -0700
> +++ postgis-0.9.1/utils/postgis_restore.pl	2004-12-15 12:42:21.000000000 -0800
> @@ -37,20 +37,78 @@
>  #	pg_dump-800/pg800 => pg_restore-800/pg800
>  #
>  
> -eval "exec perl $0 $@"
> +eval "exec perl -w $0 $@"
>  	if (0);
> +use strict;
> +use warnings;
> +use Carp;
> +
> +sub strip_whitespace_and_public {
> +    #print "ARG1: [$arg]\n";
> +    my $arg = lc(shift);
> +    foreach ($arg){
> +	s/^\s+//;
> +	s/\s+$//;
> +	s/^public\.//;
> +	s/^opaque$/internal/;
> +	s/^int[48]?$/integer/;
> +	s/^float4$/real/;
> +	s/^float8$/double precision/;
> +	s/^varchar$/character varying/
> +    }
> +    return $arg;
> +}
> +
> +
> +my %obsolete_funcnames = (
> +    'plpgsql_call_handler' => 1,
> +    'unite_finalfunc'=>1,
> +    'postgisgistcostestimate'=>1,
> +    'wkb_in'=>1,
> +    'wkb_out'=>1,
> +    'ggeometry_consistent'=>1,
> +    'ggeometry_compress'=>1,
> +    'ggeometry_picksplit'=>1,
> +    'gbox_picksplit'=>1,
> +    'ggeometry_union'=>1,
> +    'gbox_union'=>1,
> +    'ggeometry_same'=>1,
> +    'gbox_same'=>1,
> +    'rtree_decompress'=>1,
> +    'ggeometry_penalty'=>1,
> +    'gbox_penalty'=>1,
> +    'geometry_size'=>1, 
> +);
> +
> +my %obsolete_funcids = (
> +    'geometry_union(geometry, geometry)'=>1,
> +    'geometry_inter(geometry, geometry)'=>1,
> +    'create_histogram2d(box3d, integer)'=>1,
> +    'estimate_histogram2d(histogram2d, box)'=>1,
> +);
> +
> +my %obsolete_aggregate_names = (
> +    'fastunion'=>1,
> +    'mem_collect'=>1,
> +);
> +
> +my %obsolete_types = (
> +    'wkb'=>1,
> +);
> +
> +
>  
>  (@ARGV == 3) || die "Usage: postgis_restore.pl <postgis.sql> <db> <dump>\nRestore a custom dump (pg_dump -Fc) of a postgis enabled database.\n";
>  
> -$DEBUG=1;
> +my $DEBUG=1;
>  
> -my %aggs = {};
> +my %aggs = ();
>  my %fncasts = ();
>  my %casts = ();
> -my %funcs = {};
> -my %types = {};
> -my %opclass = {};
> -my %ops = {};
> +my %funcs = ();
> +my %types = ();
> +my %opclass = ();
> +my %ops = ();
>  
>  my $postgissql = $ARGV[0];
>  my $dbname = $ARGV[1];
> @@ -58,7 +116,7 @@ my $dump = $ARGV[2];
>  my $dumplist=$dump.".list";
>  my $dumpascii=$dump.".ascii";
>  
> -print "postgis.sql is $postgsisql\n";
> +print "postgis.sql is $postgissql\n";
>  print "dbname is $dbname\n";
>  print "dumpfile is $dump\n";
>  
> @@ -67,77 +125,50 @@ print "dumpfile is $dump\n";
>  #
>  print "Scanning $postgissql\n"; 
>  open( INPUT, $postgissql ) || die "Couldn't open file: $postgissql\n";
> -while( my $line = <INPUT>)
> + POSTGISSQL:
> +    while( my $line = <INPUT>)
>  {
>  	$line =~ s/[\r\n]//g;
>  	#print "LINE: $line\n";
>  
> -	next if $line =~ /^ *--/;
> +	next POSTGISSQL if $line =~ /^\s*--/;
>  
>  	if ($line =~ /^ *create (or replace)? function ([^ ]*) *\((.*)\)/i)
>  	{
>  		my $name = lc($2);
>  		my @args = split(",", $3);
>  		my $geomfound = 0;
> -		for (my $i=0; $i<@args; $i++)
> +		foreach my $arg (@args)
>  		{
> -			$arg = lc($args[$i]);
> -			#print "ARG1: [$arg]\n";
> -			$arg =~ s/^ *//;
> -			$arg =~ s/ *$//;
> -			#print "ARG2: [$arg]\n";
> -			if ( $arg =~ /^int[48]?$/ ) {
> -				$args[$i] = 'integer';
> -				next;
> -			}
> -			if ( $arg eq 'float4' ) {
> -				$args[$i] = 'real';
> -				next;
> -			}
> -			if ( $arg eq 'float8' ) {
> -				$args[$i] = 'double precision';
> -				next;
> -			}
> -			if ( $arg eq 'varchar' ) {
> -				$args[$i] = 'character varying';
> -				next;
> -			}
> -			if ( $arg eq 'opaque' ) {
> -				$args[$i] = 'internal';
> -				next;
> -			}
> -			$args[$i] = $arg;
> -			$geomfound++ if ( $arg eq 'oldgeometry' );
> +			$arg = strip_whitespace_and_public($arg);
> +			$geomfound++ if ( $arg =~ /oldgeometry/ );
>  		}
>  		my $id = $name."(".join(", ", @args).")";
>  		$funcs{$id} = 1;
>  		print "SQLFUNC: $id\n" if $DEBUG;
>  		if ( $geomfound )
>  		{
> -			for (my $i=0; $i<@args; $i++)
> -			{
> -				$arg = $args[$i];
> -				$arg = 'geometry' if ($arg eq 'oldgeometry');
> -				$args[$i] = $arg;
> -			}
> -			my $id = $name."(".join(", ", @args).")";
> -			$funcs{$id} = 1;
> -			print "SQLFUNC: $id\n" if $DEBUG;
> +		    foreach  (@args)
> +		    {
> +			s/^oldgeometry$/geometry/ ;
> +		    }
> +		    my $id = $name."(".join(", ", @args).")";
> +		    $funcs{$id} = 1;
> +		    print "SQLFUNC: $id\n" if $DEBUG;
>  		}
> -		next;
> +		next POSTGISSQL;
>  	}
>  	if ($line =~ /^create type +([^ ]+)/i)
>  	{
>  		my $type = $1;
>  		$types{$type} = 1;
>  		print "SQLTYPE $type\n" if $DEBUG;
> -		if ( $type eq 'oldgeometry' )
> +		if ( $type =~ s/^oldgeometry$/geometry/ )
>  		{
> -			$type = 'geometry';
>  			$types{$type} = 1;
>  			print "SQLTYPE $type\n" if $DEBUG;
>  		}
> -		next;
> +		next POSTGISSQL;
>  	}
>  	if ($line =~ /^create aggregate *([^ ]*) *\(/i)
>  	{
> @@ -152,23 +183,22 @@ while( my $line = <INPUT>)
>  			}
>  			last if $subline =~ /;[\t ]*$/;
>  		}
> -		if ( $type eq undef )
> +		unless ( defined $type )
>  		{
> -			print "Could not find base type for aggregate $name\n";
> -			print "($line)\n";
> -			exit 1;
> +			warn "Could not find base type for aggregate $name";
> +			warn "($line)";
> +			confess;
>  		}
>  		my $id = $name.'('.$type.')';
>  		print "SQLAGG $id\n" if $DEBUG;
>  		$aggs{$id} = 1;
> -		if ( $type eq 'oldgeometry' )
> +		if ( $type =~ s/^oldgeometry$/geometry/ )
>  		{
> -			$type = 'geometry';
>  			my $id = $name.'('.$type.')';
>  			$aggs{$id} = 1;
>  			print "SQLAGG $id\n" if $DEBUG;
>  		}
> -		next;
> +		next POSTGISSQL;
>  	}
>  
>  	# CAST
> @@ -190,20 +220,17 @@ while( my $line = <INPUT>)
>  			print "SQLFNCAST $id\n" if $DEBUG;
>  		}
>  
> -		my $id = $from.','.$to;
> +		$id = $from.','.$to;
>  		$casts{$id} = 1;
>  		print "SQLCAST $id\n" if $DEBUG;
> -		if ( $from eq 'oldgeometry' || $to eq 'oldgeometry' )
> +		if(map {  s/^oldgeometry$/geometry/ } ($from,$to) )
>  		{
> -			$from = 'geometry' if $from eq 'geometry';
> -			$to = 'geometry' if $to eq 'geometry';
> -			my $id = $from.','.$to;
> -			$casts{$id} = 1;
> -			print "SQLCAST $id\n" if $DEBUG;
> +		    my $id = $from.','.$to;
> +		    $casts{$id} = 1;
> +		    print "SQLCAST $id\n" if $DEBUG;
>  		}
>  
> -
> -		next;
> +		next POSTGISSQL;
>  	}
>  
>  	# OPERATOR CLASS
> @@ -212,7 +239,7 @@ while( my $line = <INPUT>)
>  		my $id = lc($1);
>  		print "SQLOPCLASS $id\n" if $DEBUG;
>  		$opclass{$id} = 1;
> -		next;
> +		next POSTGISSQL;
>  	}
>  
>  	# OPERATOR 
> @@ -233,199 +260,121 @@ while( my $line = <INPUT>)
>  				$rarg=lc($1);
>  			}
>  		}
> -		my $id = $name.','.$larg.','.$rarg;
> +		my $id = join (',',$name,$larg,$rarg);
>  		print "SQLOP $id\n" if $DEBUG;
>  		$ops{$id} = 1;
> -		if ( $larg eq 'oldgeometry' || $rarg eq 'oldgeometry' )
> +		if(map {  s/^oldgeometry$/geometry/ } ($larg,$rarg) )
>  		{
> -			$larg = 'geometry' if $larg eq 'oldgeometry';
> -			$rarg = 'geometry' if $rarg eq 'oldgeometry';
> -			my $id = $name.','.$larg.','.$rarg;
> -			print "SQLOP $id\n" if $DEBUG;
> -			$ops{$id} = 1;
> +		    my $id = join (',',$name,$larg,$rarg);
> +		    # my $id = $name.','.$larg.','.$rarg;
> +		    print "SQLOP $id\n" if $DEBUG;
> +		    $ops{$id} = 1;
>  		}
> -		next;
> +		next POSTGISSQL;
>  	}
>  }
>  close( INPUT );
>  #exit;
>  
>  
> +
>  #
>  # Scan dump list
>  #
>  print "Scanning $dump list\n"; 
>  open( OUTPUT, ">$dumplist") || die "Can't write to ".$dump.".list\n";
>  open( INPUT, "pg_restore -l $dump |") || die "Couldn't run pg_restore -l $dump\n";
> +DUMPLIST:
>  while( my $line = <INPUT> )
>  {
> -	next if $line =~ /^;/;
> -	next if $line =~ /^ *--/;
> +	next DUMPLIST if $line =~ /^;/;
> +	next DUMPLIST if $line =~ /^\s*--/;
>  
> -	if ($line =~ / FUNCTION *([^ ]*) *\(([^)]*)\)/)
> +	if ($line =~ /FUNCTION\s*([^ ]*)\s*\(([^)]*)\)/)
>  	{
>  		my $funcname = $1;
> +		# shortcircuit $funcnames on the naughty list
> +		if (defined $obsolete_funcnames{$funcname}){
> +		    print "SKIPPING FUNCTION:  [$funcname($2)] --- name on the obsolete name list"  if $DEBUG;
> +		    next DUMPLIST ;
> +		}
>  		#print "FUNCNAME: [$funcname]\n";
>  		my @args = split(",", $2);
>  		#print "ARGS: [". at args."]\n";
>  		my $wkbinvolved = 0;
> -		for (my $i=0; $i<@args; $i++)
> +		foreach my $arg (@args)
>  		{
> -			$arg = lc($args[$i]);
> -			$arg =~ s/^ *//;
> -			$arg =~ s/ *$//;
> -			$arg =~ s/^public.//;
> -			if ( $arg eq 'opaque' ) {
> -				$args[$i] = 'internal';
> -				next;
> -			}
> -			$args[$i] = $arg;
> -			$wkbinvolved++ if ( $arg eq 'wkb' );
> +		    $arg = strip_whitespace_and_public($arg);
> +		    $wkbinvolved++ if ( $arg =~ /wkb/ );
>  		}
> -
> -		$args = join(', ', @args);
> +		my $args = join(', ', @args);
>  		#print "ARGS SCALAR: [$args]\n";
>  		my $id = $funcname."(".$args.")";
>  		#print "ID: [$id]\n";
> -
>  		# WKB type is obsoleted
>  		if ( $wkbinvolved )
>  		{
> -			print "SKIPPING FUNC $id\n" if $DEBUG;
> -			next;
> -		}
> -
> -		if ( $funcname eq 'plpgsql_call_handler' )
> -		{
> -			print "SKIPPING FUNC $id\n" if $DEBUG;
> -			next;
> -		}
> -		# This is an old postgis function which might
> -		# still be in a dump
> -		if ( $funcname eq 'unite_finalfunc' )
> -		{
> -			print "SKIPPING FUNC $id\n" if $DEBUG;
> -			next;
> -		}
> -
> -		# This are old postgis functions which might
> -		# still be in a dump
> -		if ( $funcname eq 'postgisgistcostestimate' )
> -		{
> -			print "SKIPPING FUNC $id\n" if $DEBUG;
> -			next;
> -		}
> -		if ( $funcname eq 'wkb_in' || $funcname eq 'wkb_out' )
> -		{
> -			print "SKIPPING FUNC $id\n" if $DEBUG;
> -			next;
> +			print "SKIPPING FUNCTION:  [$id] --- wkb type is obsoleted\n" if $DEBUG;
> +			next DUMPLIST;
>  		}
> -		if ( $funcname eq 'ggeometry_consistent' ||
> -			$funcname eq 'ggeometry_compress' ||
> -			$funcname eq 'ggeometry_picksplit' ||
> -			$funcname eq 'gbox_picksplit' ||
> -			$funcname eq 'ggeometry_union' ||
> -			$funcname eq 'gbox_union' ||
> -			$funcname eq 'ggeometry_same' ||
> -			$funcname eq 'gbox_same' ||
> -			$funcname eq 'rtree_decompress' ||
> -			$funcname eq 'ggeometry_penalty' ||
> -			$funcname eq 'gbox_penalty' ||
> -			$id eq 'geometry_union(geometry, geometry)' ||
> -			$id eq 'geometry_inter(geometry, geometry)' ||
> -			$funcname eq 'geometry_size' )
> -		{
> -			print "SKIPPING FUNC $id\n" if $DEBUG;
> -			next;
> -		}
> -
> -		if ( $id eq 'create_histogram2d(box3d, integer)' ||
> -			$id eq 'estimate_histogram2d(histogram2d, box)' )
> -		{
> -			print "SKIPPING FUNC $id\n" if $DEBUG;
> -			next;
> -		}
> -
> -		if ( $funcs{$id} )
> -		{
> -			print "SKIPPING PGIS FUNC $id\n" if $DEBUG;
> -			next;
> +		if (defined $obsolete_funcnames{$funcname}){
> +		    print "SKIPPING FUNCTION:  [$id] --- id on the obsolete id list"  if $DEBUG;
> +		    next DUMPLIST ;
>  		}
>  		print "KEEPING FUNCTION: [$id]\n" if $DEBUG;
> -		#next;
>  	}
>  	elsif ($line =~ / AGGREGATE (.*)\((.*)\)/)
>  	{
>  		my $name = $1;
>  		my @args = split(",", $2);
> -		for (my $i=0; $i<@args; $i++)
> +		foreach my $arg (@args)
>  		{
> -			$arg = lc($args[$i]);
> -			$arg =~ s/^ *//;
> -			$arg =~ s/ *$//;
> -			$arg =~ s/^public.//;
> -			if ( $arg eq 'opaque' ) {
> -				$args[$i] = 'internal';
> -				next;
> -			}
> -			$args[$i] = $arg;
> +		    $arg = strip_whitespace_and_public($arg);
>  		}
> -		$args = join(', ', @args);
> +		my $args = join(', ', @args);
>  		my $id = $name."(".$args.")";
>  		if ( $aggs{$id} )
>  		{
> -			print "SKIPPING PGIS AGG $id\n" if $DEBUG;
> -			next;
> -		}
> -		# This is an old postgis aggregate
> -		if ( $name eq 'fastunion' )
> -		{
> -			print "SKIPPING old PGIS AGG $id\n" if $DEBUG;
> -			next;
> +			print "SKIPPING PGIS AGG $id --- already in\n" if $DEBUG;
> +			next DUMPLIST;
>  		}
> -
>  		# This is an old postgis aggregate
> -		if ( $name eq 'mem_collect' )
> +		if ( defined $obsolete_aggregate_names{$name}  )
>  		{
> -			print "SKIPPING old PGIS AGG $id\n" if $DEBUG;
> -			next;
> +			print "SKIPPING old PGIS AGG $id --- name is on obsolete aggregate list\n" if $DEBUG;
> +			next DUMPLIST;
>  		}
> -
>  		print "KEEPING AGGREGATE [$id]\n" if $DEBUG;
> -		#next;
>  	}
>  	elsif ($line =~ / TYPE (.*) .*/)
>  	{
>  		my $type = lc($1);
> -		if ( $type eq 'wkb' )
> +		if ( $types{$type} )
>  		{
> -			print "SKIPPING PGIS TYPE $type\n" if $DEBUG;
> -			next;
> +			print "SKIPPING PGIS TYPE $type --- already included \n" if $DEBUG;
> +			next DUMPLIST;
>  		}
> -		if ( $types{$type} )
> +		if (  defined $obsolete_types{$type} )
>  		{
> -			print "SKIPPING PGIS TYPE $type\n" if $DEBUG;
> -			next;
> +			print "SKIPPING PGIS TYPE $type --- type is obsolete \n" if $DEBUG;
> +			next DUMPLIST;
>  		}
>  		print "KEEPING TYPE [$type]\n" if $DEBUG;
> -		#next;
>  	}
>  	elsif ($line =~ / PROCEDURAL LANGUAGE plpgsql/)
>  	{
>  		print "SKIPPING PROCLANG plpgsql\n" if $DEBUG;
> -		next;
> +		next DUMPLIST;
>  	}
>  
>  	# spatial_ref_sys and geometry_columns
>  	elsif ($line =~ / TABLE geometry_columns/)
>  	{
>  		#print "SKIPPING geometry_columns schema\n" if $DEBUG;
> -		#next;
>  	}
>  	elsif ($line =~ / TABLE spatial_ref_sys/)
>  	{
>  		#print "SKIPPING spatial_ref_sys schema\n" if $DEBUG;
> -		#next;
>  	}
>  
>  	elsif ($line =~ / OPERATOR CLASS *([^ ]*)/)
> @@ -434,9 +383,10 @@ while( my $line = <INPUT> )
>  
>  		if ( $opclass{$id} )
>  		{
> -			print "SKIPPING PGIS OPCLASS $id\n" if $DEBUG;
> -			next;
> +			print "SKIPPING PGIS OPCLASS $id --- already included \n" if $DEBUG;
> +			next DUMPLIST;
>  		}
> +		# no obsoletes?
>  		print "KEEPING OPCLASS [$id]\n" if $DEBUG;
>  	}
>  
> @@ -450,18 +400,27 @@ while( my $line = <INPUT> )
>  		my $id = $arg1."(".$arg2.")";
>  		if ( $fncasts{$id} )
>  		{
> -			print "SKIPPING PGIS FNCAST $id\n" if $DEBUG;
> -			next;
> +			print "SKIPPING PGIS FNCAST $id ---  already included\n" if $DEBUG;
> +			next DUMPLIST;
>  		}
>  		#if ($arg1 eq 'box3d' || $arg2 eq 'geometry')
>  		#{
>  			#print "SKIPPING PGIS FNCAST $id\n" if $DEBUG;
> -			#next;
> +			#next DUMPLIST;
>  		#}
> -		if ($arg1 eq 'wkb' || $arg2 eq 'wkb')
> +		if ( my @badtypes = grep {defined $obsolete_types{$_}} ($arg1,$arg2))
>  		{
> -			print "SKIPPING PGIS FNCAST $id\n" if $DEBUG;
> -			next;
> +		    if ($DEBUG){
> +			print "SKIPPING PGIS FNCAST $id --- type ";
> +			print join ("and type ", @badtypes);
> +			if(@badtypes>1){
> +			    print " are ";
> +			} else {
> +			    print " is ";
> +			} 
> +			print "obsoleted \n";
> +		    }
> +		    next DUMPLIST;
>  		}
>  		print "KEEPING FNCAST $id (see CAST)\n" if $DEBUG;
>  	}
> @@ -476,19 +435,33 @@ while( my $line = <INPUT> )
>  		my $id = $arg1.",".$arg2;
>  		if ( $casts{$id} )
>  		{
> -			print "SKIPPING PGIS CAST $id\n" if $DEBUG;
> -			next;
> +			print "SKIPPING PGIS CAST $id --- already included\n" if $DEBUG;
> +			next DUMPLIST;
>  		}
>  		#if ($arg1 eq 'box3d' || $arg2 eq 'geometry')
>  		#{
>  			#print "SKIPPING PGIS CAST $id\n" if $DEBUG;
> -			#next;
> +			#next DUMPLIST;
>  		#}
> -		if ($arg1 eq 'wkb' || $arg2 eq 'wkb')
> +		if ( my @badtypes = grep {defined $obsolete_types{$_}} ($arg1,$arg2))
>  		{
> -			print "SKIPPING PGIS CAST $id\n" if $DEBUG;
> -			next;
> -		}
> +		    if ($DEBUG){
> +			print "SKIPPING PGIS CAST $id --- type ";
> +			print join ("and type ", @badtypes);
> +			if(@badtypes>1){
> +			    print " are ";
> +			} else {
> +			    print " is ";
> +			} 
> +			print "obsoleted \n";
> +		    }
> +		    next DUMPLIST;
> +		}
> +# 		if ($arg1 eq 'wkb' || $arg2 eq 'wkb')
> +# 		{
> +# 			print "SKIPPING PGIS CAST $id\n" if $DEBUG;
> +# 			next DUMPLIST;
> +# 		}
>  		print "KEEPING CAST $id\n" if $DEBUG;
>  	}
>  	print OUTPUT $line;
> @@ -500,9 +473,10 @@ close(OUTPUT);
>  print "Producing ascii dump $dumpascii\n"; 
>  open( INPUT, "pg_restore -L $dumplist $dump |") || die "Can't run pg_restore\n";
>  open( OUTPUT, ">$dumpascii") || die "Can't write to $dumpascii\n";
> -while( my $line = <INPUT> )
> + ASCIIDUMP:
> +    while( my $line = <INPUT> )
>  {
> -	next if $line =~ /^ *--/;
> +	next ASCIIDUMP if $line =~ /^ *--/;
>  
>  	if ( $line =~ /^SET search_path/ )
>  	{
> @@ -539,11 +513,11 @@ while( my $line = <INPUT> )
>  		if ( $ops{$id} )
>  		{
>  			print "SKIPPING PGIS OP $id\n" if $DEBUG;
> -			next;
> +			next ASCIIDUMP;
>  		}
>  		print "KEEPING OP $id\n" if $DEBUG;
>  		print OUTPUT @sublines;
> -		next;
> +		next ASCIIDUMP;
>  	}
>  
>  	print OUTPUT $line;
> _______________________________________________
> postgis-users mailing list
> postgis-users at postgis.refractions.net
> http://postgis.refractions.net/mailman/listinfo/postgis-users
    
    
More information about the postgis-users
mailing list