[GRASS-SVN] r69983 - grass-addons/grass7/raster/r.randomforest

Fri Dec 2 20:41:46 PST 2016

Author: spawley
Date: 2016-12-02 20:41:46 -0800 (Fri, 02 Dec 2016)
New Revision: 69983

Modified:
   grass-addons/grass7/raster/r.randomforest/ml_utils.py
   grass-addons/grass7/raster/r.randomforest/r.randomforest.py
Log:
bug fix to r.randomforest

Modified: grass-addons/grass7/raster/r.randomforest/ml_utils.py
===================================================================

--- grass-addons/grass7/raster/r.randomforest/ml_utils.py	2016-12-02 19:02:29 UTC (rev 69982)
+++ grass-addons/grass7/raster/r.randomforest/ml_utils.py	2016-12-03 04:41:46 UTC (rev 69983)
@@ -1,4 +1,3 @@
-import os
 import numpy as np
 import grass.script as grass
 import tempfile
@@ -65,21 +64,20 @@
     """
 
     training_data = np.loadtxt(file, delimiter=',')
+    n_features = training_data.shape[1]-1
 
     # check to see if last column contains group labels or nans
-    lastcol = training_data[:, training_data.shape[1]-1]
+    groups = training_data[:, -1]
+    training_data = training_data[:, 0:n_features]
 
-    if np.isnan(lastcol).all() is True:
-        n_features = training_data.shape[1]-1
-        groups = lastcol
-    else:
-        n_features = training_data.shape[1]
+    if np.isnan(groups).all() is True:
+        # if all nans then ignore last column
         groups = None
 
-    # retreave X and y
+    # fetch X and y
     X = training_data[:, 0:n_features-1]
-    y = training_data[:, n_features-1]
-
+    y = training_data[:, -1]
+    
     return(X, y, groups)
 
 
@@ -479,6 +477,13 @@
                 X, y, sample_coords = sample_predictors(
                     response=roi, predictors=maplist, shuffle_data=True,
                     lowmem=lowmem, random_state=random_state)
+                
+                # perform kmeans clustering on point coordinates
+                if cv > 1 and cvtype == 'kmeans':
+                    clusters = KMeans(
+                        n_clusters=cv, random_state=random_state, n_jobs=-1)
+                    clusters.fit(sample_coords)
+                    Id = clusters.labels_
 
             if save_training != '':
                 save_training_data(X, y, Id, save_training)
@@ -486,11 +491,4 @@
             if model_save != '':
                 save_training_data(X, y, Id, model_save + ".csv")
 
-    # perform kmeans clustering on point coordinates
-    if cv > 1 and cvtype == 'kmeans':
-        clusters = KMeans(
-            n_clusters=cv, random_state=random_state, n_jobs=-1)
-        clusters.fit(sample_coords)
-        Id = clusters.labels_
-
     return (X, y, Id, clf)

Modified: grass-addons/grass7/raster/r.randomforest/r.randomforest.py
===================================================================
--- grass-addons/grass7/raster/r.randomforest/r.randomforest.py	2016-12-02 19:02:29 UTC (rev 69982)
+++ grass-addons/grass7/raster/r.randomforest/r.randomforest.py	2016-12-03 04:41:46 UTC (rev 69983)
@@ -367,8 +367,7 @@
     X, y, Id, clf = sample_training_data(roi, maplist, cv, cvtype, model_load,
                                          model_save, load_training,
                                          save_training, lowmem, random_state)
-                            
-                                         
+
     # determine the number of class labels using np.unique
     labels = np.unique(y)
 
@@ -489,7 +488,7 @@
 
             if fimp_file != '':
                 fimp_output = pd.DataFrame(
-                    {'grass raster': maplist, 'importance': clfimp})
+                    {'grass raster': maplist, 'importance': clfimp[:, 0]})
                 fimp_output.to_csv(
                     path_or_buf=fimp_file,
                     header=['grass raster', 'importance'])