[GRASS-SVN] r70994 - grass-addons/grass7/raster/r.learn.ml
svn_grass at osgeo.org
svn_grass at osgeo.org
Mon May 1 14:08:41 PDT 2017
Author: spawley
Date: 2017-05-01 14:08:41 -0700 (Mon, 01 May 2017)
New Revision: 70994
Modified:
grass-addons/grass7/raster/r.learn.ml/r.learn.ml.py
grass-addons/grass7/raster/r.learn.ml/r_learn_utils.py
Log:
r.learn.ml added option to save cross-validation predictions to text file
Modified: grass-addons/grass7/raster/r.learn.ml/r.learn.ml.py
===================================================================
--- grass-addons/grass7/raster/r.learn.ml/r.learn.ml.py 2017-05-01 18:20:07 UTC (rev 70993)
+++ grass-addons/grass7/raster/r.learn.ml/r.learn.ml.py 2017-05-01 21:08:41 UTC (rev 70994)
@@ -267,6 +267,13 @@
#%end
#%option G_OPT_F_OUTPUT
+#% key: preds_file
+#% label: Save cross-validation predictions to csv
+#% required: no
+#% guisection: Cross validation
+#%end
+
+#%option G_OPT_F_OUTPUT
#% key: fimp_file
#% label: Save feature importances to csv
#% required: no
@@ -472,6 +479,7 @@
importances = flags['f']
n_permutations = int(options['n_permutations'])
errors_file = options['errors_file']
+ preds_file = options['preds_file']
fimp_file = options['fimp_file']
param_file = options['param_file']
@@ -573,7 +581,7 @@
# Sample training data and group id
if load_training != '':
- X, y, group_id = load_training_data(load_training)
+ X, y, group_id, sample_coords = load_training_data(load_training)
else:
gscript.message('Extracting training data')
@@ -642,7 +650,7 @@
# optionally save extracted data to .csv file
if save_training != '':
- save_training_data(X, y, group_id, save_training)
+ save_training_data(X, y, group_id, sample_coords, save_training)
# ---------------------------------------------------------------------
# define the inner search resampling method
@@ -792,9 +800,10 @@
scoring.append('matthews_corrcoef')
# perform the cross-validatation
- scores, cscores, fimp, models = cross_val_scores(
+ scores, cscores, fimp, models, preds = cross_val_scores(
clf, X, y, group_id, class_weights, outer, scoring,
importances, n_permutations, predict_resamples, random_state)
+ preds = np.hstack((preds, sample_coords))
# global scores
for method, val in scores.iteritems():
@@ -822,6 +831,15 @@
errors = pd.DataFrame(scores)
errors.to_csv(errors_file, mode='w')
+ # write cross-validation predictions to csv file
+ if preds_file != '':
+ preds = pd.DataFrame(preds)
+ preds.columns = ['y_true', 'y_pred', 'fold', 'x', 'y']
+ preds.to_csv(preds_file, mode='w')
+ text_file = open(preds_file + 't', "w")
+ text_file.write('"Integer","Real","Real","integer","Real","Real"')
+ text_file.close()
+
# feature importances
if importances is True:
gscript.message(os.linesep)
Modified: grass-addons/grass7/raster/r.learn.ml/r_learn_utils.py
===================================================================
--- grass-addons/grass7/raster/r.learn.ml/r_learn_utils.py 2017-05-01 18:20:07 UTC (rev 70993)
+++ grass-addons/grass7/raster/r.learn.ml/r_learn_utils.py 2017-05-01 21:08:41 UTC (rev 70994)
@@ -14,7 +14,7 @@
from grass.pygrass.modules.shortcuts import imagery as im
from grass.pygrass.vector import VectorTopo
from grass.pygrass.vector.table import Link
-from grass.pygrass.utils import get_raster_for_points
+from grass.pygrass.utils import get_raster_for_points, pixel2coor
import grass.script as gscript
from subprocess import PIPE
@@ -199,7 +199,11 @@
else:
k_fold = cv.split(X, y, groups=groups)
+ # store predictions and indices
+ predictions = np.zeros((len(y), 3)) # y_true, y_pred, fold
+
# train on k-1 folds and test of k folds
+ fold = 0
for train_indices, test_indices in k_fold:
# create training and test folds
@@ -224,6 +228,9 @@
# prediction of test fold
y_pred = estimator.predict(X_test)
+ predictions[test_indices, 0] = y_test
+ predictions[test_indices, 1] = y_pred
+ predictions[test_indices, 2] = fold
# calculate global performance metrics
for m in scores.keys():
@@ -271,8 +278,9 @@
estimator, X_test, y_test,
n_permutations, scoring_methods[scoring[0]],
random_state)))
+ fold += 1
- return(scores, byclass_scores, fimp, fitted_models)
+ return(scores, byclass_scores, fimp, fitted_models, predictions)
def predict(estimator, predictors, output, predict_type='raw',
@@ -588,7 +596,7 @@
return (clf, mode)
-def save_training_data(X, y, groups, file):
+def save_training_data(X, y, groups, coords, file):
"""
Saves any extracted training data to a csv file
@@ -597,6 +605,7 @@
X: Numpy array containing predictor values
y: Numpy array containing labels
groups: Numpy array of group labels
+ coords: Numpy array containing xy coordinates of samples
file: Path to a csv file to save data to
"""
@@ -605,7 +614,7 @@
groups = np.empty((y.shape[0]))
groups[:] = np.nan
- training_data = np.column_stack([X, y, groups])
+ training_data = np.column_stack([coords, X, y, groups])
np.savetxt(file, training_data, delimiter=',')
@@ -622,6 +631,7 @@
X: Numpy array containing predictor values
y: Numpy array containing labels
groups: Numpy array of group labels, or None
+ coords: Numpy array containing x,y coordinates of samples
"""
training_data = np.loadtxt(file, delimiter=',')
@@ -636,10 +646,11 @@
groups = None
# fetch X and y
- X = training_data[:, 0:last_Xcol]
+ coords = training_data[:, 0:2]
+ X = training_data[:, 2:last_Xcol]
y = training_data[:, -2]
- return(X, y, groups)
+ return(X, y, groups, coords)
def extract(response, predictors, lowmem=False):
@@ -727,6 +738,8 @@
# convert indexes of training pixels from tuple to n*2 np array
is_train = np.array(is_train).T
+ for i in range(is_train.shape[0]):
+ is_train[i, :] = np.array(pixel2coor(tuple(is_train[i]), current))
# close the response map
roi_gr.close()
More information about the grass-commit
mailing list