[GRASS-user] Interfacing with Numpy

Joshua Arnott josh at snorfalorpagus.net
Thu Jul 14 10:04:52 EDT 2011


Hi everyone,

Recently I've been doing a lot of work using both GRASS databases 
(x,y,z,etc vector data) and Numpy. To simplify things I've written a 
python function to extract data from a database using db.select into a 
numpy array, making an educated guess at the data type using 
db.describe. I'm wondering if this would be any use to anyone else? Any 
feedback would be appreciated.

#!/usr/bin/env python

import numpy
import grass.script as grass
import tempfile as pytempfile

def db_select_numpy(tables,sql):
     """
     Read data from a grass database into a dictionary of numpy arrays, 
making
     an educated guess at the dtype from table descriptions.

     example:

 >>> data = db_select('gpspoints','SELECT x,y,z FROM gpspoints')
 >>> print len(data['x']), data['x'].dtype
     23 float64
     """
     # fetch dtypes from table descriptions
     if type(tables) == str:
         tables = [tables]
     cols = {}
     for table in tables:
         ret = grass.db.db_describe(table)
         for n in range(0,len(ret['cols'])):
             if ret['cols'][n][0] in cols:
                 warnings.warn(ret['cols'][n][0] + ' overwriting 
previous instance')
             cols[ret['cols'][n][0]] = {'type': ret['cols'][n][1], 
'len': ret['cols'][n][2]}
     # dtypes translations
     dtypes = {'DOUBLE PRECISION':numpy.float64, 'INTEGER':numpy.int, 
'CHARACTER':'S'}
     # query database
     ofile = pytempfile.NamedTemporaryFile(mode = 'w+b')
     ofile.close
     ret = grass.core.run_command('db.select', overwrite = True, quiet = 
True, sql = sql, output = ofile.name)
     database = ofile.readlines()
     # allocate empty numpy arrays
     data = {}
     columns = database[0].rstrip().split('|')
     for m in range(0,len(columns)):
         if columns[m] not in cols:
             raise NameError('column "'+columns[m]+'" not in specified 
tables')
         else:
             d = dtypes[cols[columns[m]]['type']]
             if d == 'S':
                 d = 'S' + cols[columns[m]]['len'] # e.g., 'S5'
             data[columns[m]] = numpy.empty([len(database)-1],dtype=d)
     # copy data into numpy arrays
     for n in range(1,len(database)):
         elements = database[n].rstrip().split('|')
         for m in range(0,len(columns)):
             if type(data[columns[m]][n-1]) == numpy.string_ or 
len(elements[m]) > 0:
                 data[columns[m]][n-1] = elements[m]
             else:
                 warnings.warn('numeric cells cannot be empty')
     return data


More information about the grass-user mailing list