[GRASS-SVN] r70994 - grass-addons/grass7/raster/r.learn.ml

Mon May 1 14:08:41 PDT 2017

Author: spawley
Date: 2017-05-01 14:08:41 -0700 (Mon, 01 May 2017)
New Revision: 70994

Modified:
   grass-addons/grass7/raster/r.learn.ml/r.learn.ml.py
   grass-addons/grass7/raster/r.learn.ml/r_learn_utils.py
Log:
r.learn.ml added option to save cross-validation predictions to text file

Modified: grass-addons/grass7/raster/r.learn.ml/r.learn.ml.py
===================================================================

--- grass-addons/grass7/raster/r.learn.ml/r.learn.ml.py	2017-05-01 18:20:07 UTC (rev 70993)
+++ grass-addons/grass7/raster/r.learn.ml/r.learn.ml.py	2017-05-01 21:08:41 UTC (rev 70994)
@@ -267,6 +267,13 @@
 #%end
 
 #%option G_OPT_F_OUTPUT
+#% key: preds_file
+#% label: Save cross-validation predictions to csv
+#% required: no
+#% guisection: Cross validation
+#%end
+
+#%option G_OPT_F_OUTPUT
 #% key: fimp_file
 #% label: Save feature importances to csv
 #% required: no
@@ -472,6 +479,7 @@
     importances = flags['f']
     n_permutations = int(options['n_permutations'])
     errors_file = options['errors_file']
+    preds_file = options['preds_file']
     fimp_file = options['fimp_file']
     param_file = options['param_file']
 
@@ -573,7 +581,7 @@
 
         # Sample training data and group id
         if load_training != '':
-            X, y, group_id = load_training_data(load_training)
+            X, y, group_id, sample_coords = load_training_data(load_training)
         else:
             gscript.message('Extracting training data')
 
@@ -642,7 +650,7 @@
 
             # optionally save extracted data to .csv file
             if save_training != '':
-                save_training_data(X, y, group_id, save_training)
+                save_training_data(X, y, group_id, sample_coords, save_training)
 
         # ---------------------------------------------------------------------
         # define the inner search resampling method
@@ -792,9 +800,10 @@
                     scoring.append('matthews_corrcoef')
 
                 # perform the cross-validatation
-                scores, cscores, fimp, models = cross_val_scores(
+                scores, cscores, fimp, models, preds = cross_val_scores(
                     clf, X, y, group_id, class_weights, outer, scoring,
                     importances, n_permutations, predict_resamples, random_state)
+                preds = np.hstack((preds, sample_coords))
 
                 # global scores
                 for method, val in scores.iteritems():
@@ -822,6 +831,15 @@
                     errors = pd.DataFrame(scores)
                     errors.to_csv(errors_file, mode='w')
 
+                # write cross-validation predictions to csv file
+                if preds_file != '':
+                    preds = pd.DataFrame(preds)
+                    preds.columns = ['y_true', 'y_pred', 'fold', 'x', 'y']
+                    preds.to_csv(preds_file, mode='w')
+                    text_file = open(preds_file + 't', "w")
+                    text_file.write('"Integer","Real","Real","integer","Real","Real"')
+                    text_file.close()
+
                 # feature importances
                 if importances is True:
                     gscript.message(os.linesep)

Modified: grass-addons/grass7/raster/r.learn.ml/r_learn_utils.py
===================================================================
--- grass-addons/grass7/raster/r.learn.ml/r_learn_utils.py	2017-05-01 18:20:07 UTC (rev 70993)
+++ grass-addons/grass7/raster/r.learn.ml/r_learn_utils.py	2017-05-01 21:08:41 UTC (rev 70994)
@@ -14,7 +14,7 @@
 from grass.pygrass.modules.shortcuts import imagery as im
 from grass.pygrass.vector import VectorTopo
 from grass.pygrass.vector.table import Link
-from grass.pygrass.utils import get_raster_for_points
+from grass.pygrass.utils import get_raster_for_points, pixel2coor
 import grass.script as gscript
 from subprocess import PIPE
 
@@ -199,7 +199,11 @@
     else:
         k_fold = cv.split(X, y, groups=groups)
 
+    # store predictions and indices
+    predictions = np.zeros((len(y), 3)) # y_true, y_pred, fold
+
     # train on k-1 folds and test of k folds
+    fold = 0
     for train_indices, test_indices in k_fold:
 
         # create training and test folds
@@ -224,6 +228,9 @@
 
         # prediction of test fold
         y_pred = estimator.predict(X_test)
+        predictions[test_indices, 0] = y_test
+        predictions[test_indices, 1] = y_pred
+        predictions[test_indices, 2] = fold
 
         # calculate global performance metrics
         for m in scores.keys():
@@ -271,8 +278,9 @@
                         estimator, X_test, y_test,
                         n_permutations, scoring_methods[scoring[0]],
                         random_state)))
+        fold += 1
 
-    return(scores, byclass_scores, fimp, fitted_models)
+    return(scores, byclass_scores, fimp, fitted_models, predictions)
 
 
 def predict(estimator, predictors, output, predict_type='raw',
@@ -588,7 +596,7 @@
     return (clf, mode)
 
 
-def save_training_data(X, y, groups, file):
+def save_training_data(X, y, groups, coords, file):
     """
     Saves any extracted training data to a csv file
 
@@ -597,6 +605,7 @@
     X: Numpy array containing predictor values
     y: Numpy array containing labels
     groups: Numpy array of group labels
+    coords: Numpy array containing xy coordinates of samples
     file: Path to a csv file to save data to
     """
 
@@ -605,7 +614,7 @@
         groups = np.empty((y.shape[0]))
         groups[:] = np.nan
 
-    training_data = np.column_stack([X, y, groups])
+    training_data = np.column_stack([coords, X, y, groups])
     np.savetxt(file, training_data, delimiter=',')
 
 
@@ -622,6 +631,7 @@
     X: Numpy array containing predictor values
     y: Numpy array containing labels
     groups: Numpy array of group labels, or None
+    coords: Numpy array containing x,y coordinates of samples
     """
 
     training_data = np.loadtxt(file, delimiter=',')
@@ -636,10 +646,11 @@
         groups = None
 
     # fetch X and y
-    X = training_data[:, 0:last_Xcol]
+    coords = training_data[:, 0:2]
+    X = training_data[:, 2:last_Xcol]
     y = training_data[:, -2]
 
-    return(X, y, groups)
+    return(X, y, groups, coords)
 
 
 def extract(response, predictors, lowmem=False):
@@ -727,6 +738,8 @@
 
     # convert indexes of training pixels from tuple to n*2 np array
     is_train = np.array(is_train).T
+    for i in range(is_train.shape[0]):
+        is_train[i, :] = np.array(pixel2coor(tuple(is_train[i]), current))
 
     # close the response map
     roi_gr.close()