[gdal-dev] GeoPDF translation

jsschmidt jschmidt at fs.fed.us
Mon Aug 12 15:09:17 PDT 2013


Here's a Python3.3 script to convert multiple historic USGS GeoPDF's to
GeoTiffs in a UTM10, NAD83 map projection at 300 dpi. Intermediate Geotiffs
are output in the native USGS map projection - polyconic - then converted to
GeoTiffs in UTM.  Script requires access to the GDAL utilities, which I
obtained by installing MS4W and running the script from the MS4W command
prompt.
Modify the script to point to the directory where your historic GeoPDFs are
stored, to change the output map projection, and to modify the name of the
output GeoTiff files. (Script currently assumes PDF files begin with 'CA'
and final output files will be named CA_UTM....tif.)









#Python 3.3 script to convert historic USGS PDF maps to GeoTiff format in
UTM coordinates.
#Relies on GDAL (Geospatial Data Abstraction Library)
import glob, os
import subprocess
import pyproj

def GetLatLon(line):
    coords = line.split(') (')[1]
    coords = coords[:-1]
    LonStr, LatStr = coords.split(',')
    # Longitude
    LonStr = LonStr.split('d')    # Get the degrees, and the rest
    LonD = int(LonStr[0])
    LonStr = LonStr[1].split('\'')# Get the arc-m, and the rest
    LonM = int(LonStr[0])
    LonStr = LonStr[1].split('"') # Get the arc-s, and the rest
    LonS = float(LonStr[0])
    Lon = LonD + LonM/60. + LonS/3600.
    if LonStr[1] in ['W', 'w']:
        Lon = -1*Lon
    # Same for Latitude
    LatStr = LatStr.split('d')
    LatD = int(LatStr[0])
    LatStr = LatStr[1].split('\'')
    LatM = int(LatStr[0])
    LatStr = LatStr[1].split('"')
    LatS = float(LatStr[0])
    Lat = LatD + LatM/60. + LatS/3600.
    if LatStr[1] in ['S', 's']:
        Lat = -1*Lat
    return Lat, Lon

EPSGin = "EPSG:4267"                   # Lat-Lon, NAD27 - Assumed coordinate
system of lat-lon corner coordinates as defined by EPSG code
EPSGout = "EPSG:26910"                 # UTM10, NAD83 - Output coordinate
system for Geotiff files as defined by EPSG code
#EPSGout = "EPSG:26911"                 # UTM11, NAD83 - Output coordinate
system for Geotiff files as defined by EPSG code

dirname = "C:\\GeoPDF\\"  # Location of input pdf files Also used for output
directory
print(dirname)

p1=pyproj.Proj(init= EPSGin)           # Define input projection
p2=pyproj.Proj(init= EPSGout)          # Define output projection

list_of_files1 = glob.glob(dirname +'*.pdf')           # create the list of
PDF files


for file_name in list_of_files1:
  os.rename(file_name, file_name.replace(" ", "_"))    #replace any blanks
in PDF filenames with underscore character

list_of_files2 = glob.glob(dirname +'*.pdf')           # create the list of
renamed PDF files

for file_name in list_of_files2:

  FIPDF = file_name #Input GeoPDF filename
  FOTIFF1 = FIPDF.replace('pdf', 'tif') # Output Filename for intermediate
GeoTiff file in original map coordinates
  FOTIFF2 = FOTIFF1.replace('CA_', 'CA_UTM_') # Output Filename for final
GeoTiff file in UTM Coordinates
  print(FIPDF)
  print(FOTIFF1)
  print(FOTIFF2)

# Export geoPDF to Geotiff at 300 dpi in original map projection coordinates
(polyconic for USGS historic maps)
# Use JPEG Compression in the GeoTiff

  os.system('gdal_translate.exe  -of GTiff --config GDAL_PDF_DPI 300 -co
COMPRESS=JPEG ' + FIPDF + ' '+ FOTIFF1)

  CornerLats = [0,0,0,0]
  CornerLons = [0,0,0,0]
  Cornerx = [0,0,0,0]
  Cornery = [0,0,0,0]
  gcp = ['','','','']
  xstr = ['','','','']
  ystr = ['','','','']

# Run GDALinfo to read the header information from each intermediate GeoTiff
file

  GdalInfo = subprocess.check_output('gdalinfo
{}'.format(FOTIFF1),shell=True,universal_newlines=True)# Runs Gdalinfo on
exported GeoTiff
  GdalInfo = GdalInfo.splitlines() # Creates a line by line list.
  GotUL, GotUR, GotLL, GotLR, GotSize, GotPixel, Proj, NeedReproj = False,
False, False, False, False, False, False, True
  
  for line in GdalInfo:
        print(line)  
        if line.startswith("Size"):
# Get the row-column size of the GeoTiff 
            size = line.split()
            R = size[3]
            size2 = size[2].split(',')
            C = size2[0]
            CornerRow = ['0', R, '0', R]
            CornerCol = ['0', '0', C, C]
            GotSize = True
        
        if line.startswith("Pixel Size"):
# Calculate the size of each pixel in map units (assumed to be meters)
            pix1 = line.split('(')
            pix2 = pix1[1].split(',')
            pixsize = float(pix2[0])
            halfpix = pixsize/2 # Adjustment to move coordinates to cell
center from edge
            GotPixel = True
             
        if line.startswith("Upper Left"):
            CornerLats[0], CornerLons[0] = GetLatLon(line)
            Cornerx[0], Cornery[0] =
pyproj.transform(p1,p2,CornerLons[0],CornerLats[0]) #Reproject edge coords
in UL to output Map proj
            Cornerx[0] = Cornerx[0] - halfpix # Adjustment to pixel edge
            Cornery[0] = Cornery[0] + halfpix 
            GotUL = True
        
        if line.startswith("Lower Left"):
            CornerLats[1], CornerLons[1] = GetLatLon(line)
            Cornerx[1], Cornery[1] =
pyproj.transform(p1,p2,CornerLons[1],CornerLats[1]) #Reproject edge coords
in LL to output Map proj
            Cornerx[1] = Cornerx[1] - halfpix
            Cornery[1] = Cornery[1] - halfpix 
            GotLL = True

        if line.startswith("Upper Right"):
            CornerLats[2], CornerLons[2] = GetLatLon(line)
            Cornerx[2], Cornery[2] =
pyproj.transform(p1,p2,CornerLons[2],CornerLats[2]) #Reproject edge coords
in UR to output Map proj
            Cornerx[2] = Cornerx[2] + halfpix
            Cornery[2] = Cornery[2] + halfpix 
            GotUR = True

        if line.startswith("Lower Right"):
            CornerLats[3], CornerLons[3] = GetLatLon(line)
            Cornerx[3], Cornery[3] =
pyproj.transform(p1,p2,CornerLons[3],CornerLats[3]) #Reproject edge coords
in LR to output Map proj
            Cornerx[3] = Cornerx[3] + halfpix
            Cornery[3] = Cornery[3] - halfpix 
            GotLR = True

        if GotUL and GotUR and GotLL and GotLR and GotSize and GotPixel and
NeedReproj:
            xstr[0] = str(Cornerx[0])
            xstr[1] = str(Cornerx[1])
            xstr[2] = str(Cornerx[2])
            xstr[3] = str(Cornerx[3])
            ystr[0] = str(Cornery[0])
            ystr[1] = str(Cornery[1])
            ystr[2] = str(Cornery[2])
            ystr[3] = str(Cornery[3])

            gcp[0] = ' -gcp '+ CornerCol[0]+ ' ' + CornerRow[0]+ ' ' +
xstr[0]+ ' ' + ystr[0]
            gcp[1] = ' -gcp '+ CornerCol[1]+ ' ' + CornerRow[1]+ ' ' +
xstr[1]+ ' ' + ystr[1]
            gcp[2] = ' -gcp '+ CornerCol[2]+ ' ' + CornerRow[2]+ ' ' +
xstr[2]+ ' ' + ystr[2]
            gcp[3] = ' -gcp '+ CornerCol[3]+ ' ' + CornerRow[3]+ ' ' +
xstr[3]+ ' ' + ystr[3]

            # Reproject to new geotiff file using control points
 
            gdalstr = 'gdal_translate.exe -of GTIFF -co COMPRESS=JPEG ' +
FOTIFF1 + gcp[0] + gcp[1] + gcp[2] + gcp[3]+ ' -a_srs ' + EPSGout + ' '+
FOTIFF2
            print(gdalstr) 
            os.system(gdalstr)
            NeedReproj = False
            Proj = True

        if GotUL and GotUR and GotLL and GotLR and GotSize and GotPixel and
Proj:
            break

         
           
  







--
View this message in context: http://osgeo-org.1560.x6.nabble.com/gdal-dev-GeoPDF-translation-tp3742508p5072151.html
Sent from the GDAL - Dev mailing list archive at Nabble.com.


More information about the gdal-dev mailing list