[GRASS-SVN] r71016 - grass-addons/grass7/raster/r.learn.ml
svn_grass at osgeo.org
svn_grass at osgeo.org
Wed May 3 13:52:06 PDT 2017
Author: spawley
Date: 2017-05-03 13:52:05 -0700 (Wed, 03 May 2017)
New Revision: 71016
Modified:
grass-addons/grass7/raster/r.learn.ml/r.learn.ml.html
grass-addons/grass7/raster/r.learn.ml/r.learn.ml.py
grass-addons/grass7/raster/r.learn.ml/rlearn_rasters.py
Log:
r.learn.ml updated raster prediction to use multiprocessing
Modified: grass-addons/grass7/raster/r.learn.ml/r.learn.ml.html
===================================================================
--- grass-addons/grass7/raster/r.learn.ml/r.learn.ml.html 2017-05-03 15:42:48 UTC (rev 71015)
+++ grass-addons/grass7/raster/r.learn.ml/r.learn.ml.html 2017-05-03 20:52:05 UTC (rev 71016)
@@ -78,7 +78,7 @@
<p>Then we can use these training pixels to perform a classification on the more recently obtained landsat 7 image:</p>
<div class="code"><pre>
r.learn.ml group=lsat7_2000 trainingmap=landclass96_roi output=rf_classification \
- classifier=RandomForestClassifier n_estimators=500 max_features=-1 min_samples_split=2 randst=1 lines=25
+ classifier=RandomForestClassifier n_estimators=500 randst=1 lines=25
# copy category labels from landclass training map to result
r.category rf_classification raster=landclass96_roi
Modified: grass-addons/grass7/raster/r.learn.ml/r.learn.ml.py
===================================================================
--- grass-addons/grass7/raster/r.learn.ml/r.learn.ml.py 2017-05-03 15:42:48 UTC (rev 71015)
+++ grass-addons/grass7/raster/r.learn.ml/r.learn.ml.py 2017-05-03 20:52:05 UTC (rev 71016)
@@ -889,27 +889,27 @@
if prob_only is False:
gscript.message('Predicting classification/regression raster...')
predict(estimator=clf, predictors=maplist, output=output,
- predict_type='raw', rowincr=rowincr)
+ predict_type='raw', rowincr=rowincr, n_jobs=n_jobs)
if predict_resamples is True:
for i in range(cv):
resample_name = output + '_Resample' + str(i)
predict(estimator=models[i], predictors=maplist,
output=resample_name, predict_type='raw',
- rowincr=rowincr)
+ rowincr=rowincr, n_jobs=n_jobs)
# predict class probabilities
if probability is True:
gscript.message('Predicting class probabilities...')
predict(estimator=clf, predictors=maplist, output=output, predict_type='prob',
- index=indexes, rowincr=rowincr)
+ index=indexes, rowincr=rowincr, n_jobs=n_jobs)
if predict_resamples is True:
for i in range(cv):
resample_name = output + '_Resample' + str(i)
predict(estimator=models[i], predictors=maplist,
output=resample_name, predict_type='prob',
- index=indexes, rowincr=rowincr)
+ index=indexes, rowincr=rowincr, n_jobs=n_jobs)
else:
gscript.message("Model built and now exiting")
Modified: grass-addons/grass7/raster/r.learn.ml/rlearn_rasters.py
===================================================================
--- grass-addons/grass7/raster/r.learn.ml/rlearn_rasters.py 2017-05-03 15:42:48 UTC (rev 71015)
+++ grass-addons/grass7/raster/r.learn.ml/rlearn_rasters.py 2017-05-03 20:52:05 UTC (rev 71016)
@@ -166,7 +166,7 @@
def predict(estimator, predictors, output, predict_type='raw',
- index=None, rowincr=25):
+ index=None, rowincr=25, n_jobs=-2):
"""
Prediction on list of GRASS rasters using a fitted scikit learn model
@@ -179,8 +179,11 @@
'prob' for class probabilities
index: Optional, list of class indices to export
rowincr: Integer of raster rows to process at one time
+ n_jobs: Number of processing cores
"""
+ from sklearn.externals.joblib import Parallel, delayed
+
# convert potential single index to list
if isinstance(index, int): index = [index]
@@ -198,92 +201,69 @@
" does not exist.... exiting")
# -------------------------------------------------------------------------
- # Prediction using blocks of rows per iteration
+ # parallel prediction
# -------------------------------------------------------------------------
- for rowblock in range(0, current.rows, rowincr):
- gscript.percent(rowblock, current.rows, rowincr)
+ # create lists of row increments
+ row_mins, row_maxs = [], []
+ for row in range(0, current.rows, rowincr):
+ if row+rowincr > current.rows:
+ rowincr = current.rows - row
+ row_mins.append(row)
+ row_maxs.append(row+rowincr)
- # check that the row increment does not exceed the number of rows
- if rowblock+rowincr > current.rows:
- rowincr = current.rows - rowblock
- img_np_row = np.zeros((rowincr, current.cols, n_features))
+ # perform predictions on lists of row increments in parallel
+ predictions = Parallel(n_jobs=n_jobs)(
+ delayed(__predict_parallel)
+ (estimator, predictors, predict_type, current, row_min, row_max)
+ for row_min, row_max in zip(row_mins, row_maxs))
- # loop through each row, and each band and add to 2D img_np_row
- for row in range(rowblock, rowblock+rowincr, 1):
- for band in range(n_features):
- img_np_row[row-rowblock, :, band] = \
- np.array(rasstack[band][row])
+ # unpack the results
+ results = []
+ ftypes = []
+ for block in predictions:
+ results.append(block[0])
+ ftypes.append(block[1])
- # create mask
- img_np_row[img_np_row == -2147483648] = np.nan
- mask = np.zeros((img_np_row.shape[0], img_np_row.shape[1]))
- for feature in range(n_features):
- invalid_indexes = np.nonzero(np.isnan(img_np_row[:, :, feature]))
- mask[invalid_indexes] = np.nan
+ # -------------------------------------------------------------------------
+ # writing of predicted results for classification
+ # -------------------------------------------------------------------------
+ if predict_type == 'raw':
+ classification = RasterRow(output)
+ classification.open('w', ftypes[0], overwrite=True)
- # reshape each row-band matrix into a n*m array
- nsamples = rowincr * current.cols
- flat_pixels = img_np_row.reshape((nsamples, n_features))
-
- # remove NaNs prior to passing to scikit-learn predict
- flat_pixels = np.nan_to_num(flat_pixels)
-
- # perform prediction for classification/regression
- if predict_type == 'raw':
- result = estimator.predict(flat_pixels)
- result = result.reshape((rowincr, current.cols))
-
- # determine nodata value and grass raster type
- if result.dtype == 'float':
- nodata = np.nan
- ftype = 'FCELL'
- else:
- nodata = -2147483648
- ftype = 'CELL'
-
- # replace NaN values so that the prediction does not have a border
- result[np.nonzero(np.isnan(mask))] = nodata
-
- # on first iteration create the RasterRow object
- if rowblock == 0:
- if predict_type == 'raw':
- classification = RasterRow(output)
- classification.open('w', ftype, overwrite=True)
-
- # write the classification result
- for row in range(rowincr):
- newrow = Buffer((result.shape[1],), mtype=ftype)
- newrow[:] = result[row, :]
+ # write the classification result
+ for result_block in results:
+ for row in range(result_block.shape[0]):
+ newrow = Buffer((result_block.shape[1],), mtype=ftypes[0])
+ newrow[:] = result_block[row, :]
classification.put_row(newrow)
- # perform prediction for class probabilities
- if predict_type == 'prob':
- result_proba = estimator.predict_proba(flat_pixels)
+ # -------------------------------------------------------------------------
+ # writing of predicted results for probabilities
+ # -------------------------------------------------------------------------
+ if predict_type == 'prob':
+ # determine number of classes
+ if index is None:
+ index = range(results[0].shape[2])
+ n_classes = len(index)
+ else:
+ n_classes = len(np.unique(index))
- # on first loop determine number of probability classes
- # and open rasterrow objects for writing
- if rowblock == 0:
- if index is None:
- index = range(result_proba.shape[1])
- n_classes = len(index)
- else:
- n_classes = len(np.unique(index))
+ # create and open RasterRow objects for probabilities
+ prob_out_raster = [0] * n_classes
+ prob = [0] * n_classes
+ for iclass, label in enumerate(index):
+ prob_out_raster[iclass] = output + '_classPr' + str(label)
+ prob[iclass] = RasterRow(prob_out_raster[iclass])
+ prob[iclass].open('w', 'FCELL', overwrite=True)
- # create and open RasterRow objects for probabilities
- prob_out_raster = [0] * n_classes
- prob = [0] * n_classes
- for iclass, label in enumerate(index):
- prob_out_raster[iclass] = output + '_classPr' + str(label)
- prob[iclass] = RasterRow(prob_out_raster[iclass])
- prob[iclass].open('w', 'FCELL', overwrite=True)
-
+ # write the class probability results
+ for results_proba_block in results:
for iclass, label in enumerate(index):
- result_proba_class = result_proba[:, label]
- result_proba_class = result_proba_class.reshape((rowincr, current.cols))
- result_proba_class[np.nonzero(np.isnan(mask))] = np.nan
+ result_proba_class = results_proba_block[:, :, label]
- for row in range(rowincr):
+ for row in range(result_proba_class.shape[0]):
newrow = Buffer((result_proba_class.shape[1],), mtype='FCELL')
newrow[:] = result_proba_class[row, :]
prob[iclass].put_row(newrow)
@@ -291,11 +271,94 @@
# -------------------------------------------------------------------------
# close all maps
# -------------------------------------------------------------------------
- for i in range(n_features): rasstack[i].close()
if predict_type == 'raw': classification.close()
if predict_type == 'prob':
try:
for iclass in range(n_classes):
prob[iclass].close()
except:
- pass
\ No newline at end of file
+ pass
+
+
+def __predict_parallel(estimator, predictors, predict_type, current, row_min, row_max):
+ """
+ Performs prediction on range of rows in grass rasters
+
+ Args
+ ----
+ estimator: scikit-learn estimator object
+ predictors: list of GRASS rasters
+ predict_type: character, 'raw' for classification/regression;
+ 'prob' for class probabilities
+ current: current region settings
+ row_min, row_max: Range of rows of grass rasters to perform predictions
+
+ Returns
+ -------
+ result: 2D (classification) or 3D numpy array (class probabilities) of predictions
+ ftypes: data storage type
+ """
+
+ # initialize output
+ result, ftype, mask = None, None, None
+
+ # open grass rasters
+ n_features = len(predictors)
+ rasstack = [0] * n_features
+
+ for i in range(n_features):
+ rasstack[i] = RasterRow(predictors[i])
+ if rasstack[i].exist() is True:
+ rasstack[i].open('r')
+ else:
+ gscript.fatal("GRASS raster " + predictors[i] +
+ " does not exist.... exiting")
+
+ # loop through each row, and each band and add to 2D img_np_row
+ img_np_row = np.zeros((row_max-row_min, current.cols, n_features))
+ for row in range(row_min, row_max):
+ for band in range(n_features):
+ img_np_row[row-row_min, :, band] = np.array(rasstack[band][row])
+
+ # create mask
+ img_np_row[img_np_row == -2147483648] = np.nan
+ mask = np.zeros((img_np_row.shape[0], img_np_row.shape[1]))
+ for feature in range(n_features):
+ invalid_indexes = np.nonzero(np.isnan(img_np_row[:, :, feature]))
+ mask[invalid_indexes] = np.nan
+
+ # reshape each row-band matrix into a n*m array
+ nsamples = (row_max-row_min) * current.cols
+ flat_pixels = img_np_row.reshape((nsamples, n_features))
+
+ # remove NaNs prior to passing to scikit-learn predict
+ flat_pixels = np.nan_to_num(flat_pixels)
+
+ # perform prediction for classification/regression
+ if predict_type == 'raw':
+ result = estimator.predict(flat_pixels)
+ result = result.reshape((row_max-row_min, current.cols))
+
+ # determine nodata value and grass raster type
+ if result.dtype == 'float':
+ nodata = np.nan
+ ftype = 'FCELL'
+ else:
+ nodata = -2147483648
+ ftype = 'CELL'
+
+ # replace NaN values so that the prediction does not have a border
+ result[np.nonzero(np.isnan(mask))] = nodata
+
+ # perform prediction for class probabilities
+ if predict_type == 'prob':
+ result = estimator.predict_proba(flat_pixels)
+ result = result.reshape((row_max-row_min, current.cols, result.shape[1]))
+ result[np.nonzero(np.isnan(mask))] = np.nan
+
+ # close maps
+ for i in range(n_features):
+ rasstack[i].close()
+
+ return (result, ftype)
+
More information about the grass-commit
mailing list