[GRASS-SVN] r69696 - grass-addons/grass7/raster/r.randomforest

Fri Oct 14 21:28:42 PDT 2016

Author: spawley
Date: 2016-10-14 21:28:42 -0700 (Fri, 14 Oct 2016)
New Revision: 69696

Modified:
   grass-addons/grass7/raster/r.randomforest/r.randomforest.py
Log:
fixed bug in loading model in r.randomforest

Modified: grass-addons/grass7/raster/r.randomforest/r.randomforest.py
===================================================================

--- grass-addons/grass7/raster/r.randomforest/r.randomforest.py	2016-10-14 10:18:18 UTC (rev 69695)
+++ grass-addons/grass7/raster/r.randomforest/r.randomforest.py	2016-10-15 04:28:42 UTC (rev 69696)
@@ -294,9 +294,12 @@
 
 #%rules
 #% exclusive: roi,loadfile
+#% exclusive: roi,load_training
 #% exclusive: save_training,load_training
 #%end
 
+
+
 # import standard modules
 import atexit, random, string, re, os
 import numpy as np
@@ -307,14 +310,35 @@
 from grass.pygrass.gis.region import Region
 from grass.pygrass.raster.buffer import Buffer
 from grass.pygrass.modules.shortcuts import imagery as im
-       
+
+
+
 def cleanup():
 
     grass.run_command("g.remove", name='clfmasktmp', flags="f",
                       type="raster", quiet=True)
 
+
+
 def sample_predictors_byrow(response, predictors):
 
+    """
+    Samples a list of GRASS rasters using a labelled raster
+    Row-by-row sampling
+    
+    Parameters
+    ----------
+    response: String; GRASS raster with labelled pixels
+    predictors: List of GRASS rasters containing explanatory variables
+
+    Returns
+    -------
+    
+    training_data: Numpy array of extracted raster values
+    training_labels: Numpy array of labels
+    
+    """
+
     # create response rasterrow and open
     roi_raster = RasterRow(response)
     roi_raster.open('r')
@@ -401,8 +425,27 @@
     
     return(training_data, training_labels)
 
+
+
 def sample_predictors(response, predictors):
     
+    """
+    Samples a list of GRASS rasters using a labelled raster
+    Per raster sampling
+    
+    Parameters
+    ----------
+    response: String; GRASS raster with labelled pixels
+    predictors: List of GRASS rasters containing explanatory variables
+
+    Returns
+    -------
+    
+    training_data: Numpy array of extracted raster values
+    training_labels: Numpy array of labels
+    
+    """
+    
     # open response raster as rasterrow and read as np array
     if RasterRow(response).exist() == True:
         roi_gr = RasterRow(response)
@@ -451,12 +494,26 @@
     # return X and y data
     return(training_data[:, 0:n_features], training_data[:, n_features])
 
+
+
 def prediction(clf, predictors, class_probabilities,
-               rowincr, output, mode, labels):
+               rowincr, output, mode):
     
-    class_list = np.unique(labels)
-    nclasses = len(class_list)
+    """
+    Prediction on list of GRASS rasters using a fitted scikit learn model
     
+    Parameters
+    ----------
+    clf: Scikit learn estimator object
+    predictors: List of paths to GDAL rasters that represent the predictor variables
+    output: Name of GRASS raster to output classification results
+    rowincr: Integer of raster rows to process at one time
+    class_probabilties: Boolean of whether probabilities of each class should also be predicted
+    mode: String, classification or regression mode
+    labels: Numpy array of the labels used for the classification
+    
+    """
+    
     # create a list of rasterrow objects for predictors
     n_features = len(predictors)
     rasstack = [0] * n_features
@@ -549,7 +606,7 @@
         if class_probabilities == True and mode == 'classification':
             result_proba = clf.predict_proba(flat_pixels)
 
-            for iclass in range(nclasses):
+            for iclass in range(result_proba.shape[1]):
                 result_proba_class = result_proba[:, iclass]
                 result_proba_class = \
                     result_proba_class.reshape((rowincr, current.cols))
@@ -571,8 +628,26 @@
     if class_probabilities == True and mode == 'classification':
         for iclass in range(nclasses): prob[iclass].close()
 
+
+
 def shuffle_data(X, y, rstate):
 
+    """
+    Uses scikit learn to shuffle data
+    
+    Parameters
+    ----------
+    X: Numpy array containing predictor values
+    y: Numpy array containing labels
+    rstate: Seed for random generator
+    
+    Returns
+    -------
+    X: Numpy array containing predictor values
+    y: Numpy array containing labels
+
+    """
+
     from sklearn.utils import shuffle
 
     # combine XY data into a single numpy array
@@ -588,12 +663,30 @@
     
     return(X, y)
 
+
+
 def cross_val_classification(clf, X, y, cv, rstate):
-    # custom function to calculate classification metrics
-    # eliminates need to calculate each metric  using cross_val_score
-    # returns: a 1D list of accuracy, kappa and auc scores
-    # returns: mean precision/recall and std precision/recall per class
+    
+    """
+    Stratified Kfold cross-validation
+    Generates several scoring_metrics without the need to repeatedly use cross_val_score
+    Also produces by-class scores
+    
+    Parameters
+    ----------
+    clf: Scikit learn estimator object
+    X: Numpy array containing predictor values
+    y: Numpy array containing labels
+    cv: Integer of cross-validation folds
+    rstate: Seed to pass to the random number generator
+    
+    Returns
+    -------
+    cmstats: Dictionary of global accuracy measures per fold
+    byclass_metrics: Dictionary of by-class accuracy measures per fold
 
+    """
+    
     from sklearn import cross_validation, metrics
     
     class_list = np.unique(y)
@@ -686,23 +779,55 @@
     return(cmstats, byclass_metrics)
 
 def save_training_data(X, y, file):
-    # append X and y and save to csv
+
+    """
+    Saves any extracted training data to a csv file
+    
+    Parameters
+    ----------
+    X: Numpy array containing predictor values
+    y: Numpy array containing labels
+    file: Path to a csv file to save data to
+
+    """
+
     training_data = np.zeros((y.shape[0], X.shape[1]+1))
     training_data[:, 0:X.shape[1]] = X
     training_data[:, X.shape[1]] = y
     np.savetxt(file, training_data, delimiter = ',')
 
+
+
 def load_training_data(file):
+    
+    """
+    Loads training data and labels from a csv file
+    
+    Parameters
+    ----------
+    file: Path to a csv file to save data to
+    
+    Returns
+    -------
+    X: Numpy array containing predictor values
+    y: Numpy array containing labels
+
+    """
+    
     training_data = np.loadtxt(file, delimiter = ',')
     n_features = training_data.shape[1]
     X = training_data[:, 0:n_features-1]
     y = training_data[:, n_features-1]
+    
     return(X, y)
 
 def main():
+    
     """
     GRASS options and flags
+    -----------------------
     """
+    
     # General options and flags
     igroup = options['igroup']
     roi = options['roi']
@@ -760,8 +885,10 @@
     else:
         mode = 'regression'
 
+
     """
     Error checking for valid input parameters
+    -----------------------------------------
     """
 
     # decision trees
@@ -813,8 +940,10 @@
     else:
         weighting = None
 
+
     """
     Obtain information about GRASS rasters to be classified
+    -------------------------------------------------------
     """
     
     # fetch individual raster names from group
@@ -835,9 +964,11 @@
         warnings.filterwarnings("ignore")
     except:
         grass.fatal("Scikit-learn python module is not installed...exiting")
-    
+
+
     """
     Sample training data using training ROI
+    ---------------------------------------
     """
     
     # load the model or training data
@@ -867,9 +998,11 @@
     if m_features_dt > n_features: m_features_dt = n_features
     if m_features_rf > n_features: m_features_rf = n_features
     if max_features_gtb > n_features: max_features_gtb = n_features
-    
+
+
     """
     Train the classifier
+    --------------------
     """
 
     # define classifier unless model is to be loaded from file
@@ -947,8 +1080,10 @@
         clf.fit(X, y)
         grass.message(_("Model built with: " + model))
         
+        
         """
         Cross Validation
+        ----------------
         """
 
         # output internal performance measures for random forests
@@ -1032,11 +1167,13 @@
         if modelonly == True:
             grass.fatal("Model built and now exiting")
 
+
     """
     Prediction on the rest of the GRASS rasters in the imagery group
+    ----------------------------------------------------------------
     """
     
-    prediction(clf, maplist, class_probabilities, rowincr, output, mode, y)
+    prediction(clf, maplist, class_probabilities, rowincr, output, mode)
     
 
 if __name__ == "__main__":