[GRASS-SVN] r70580 - grass-addons/grass7/raster/r.learn.ml

svn_grass at osgeo.org svn_grass at osgeo.org
Wed Feb 15 06:34:25 PST 2017


Author: spawley
Date: 2017-02-15 06:34:25 -0800 (Wed, 15 Feb 2017)
New Revision: 70580

Modified:
   grass-addons/grass7/raster/r.learn.ml/raster_learning.py
Log:
r.learn.ml bug fix to onehotencoding

Modified: grass-addons/grass7/raster/r.learn.ml/raster_learning.py
===================================================================
--- grass-addons/grass7/raster/r.learn.ml/raster_learning.py	2017-02-15 11:56:53 UTC (rev 70579)
+++ grass-addons/grass7/raster/r.learn.ml/raster_learning.py	2017-02-15 14:34:25 UTC (rev 70580)
@@ -93,16 +93,12 @@
         self.categorical_var = categorical_var
         self.category_values = None
 
-        if self.categorical_var:
-            self.__onehotencode()
-
         # for preprocessing of data
         self.sampling = sampling
         self.preprocessing = preprocessing
 
         # for cross-validation scores
         self.scores = None
-        self.scores_cm = None
         self.fimp = None
         self.mean_tpr = None
         self.mean_fpr = None
@@ -148,15 +144,14 @@
 
         # Balance classes prior to fitting
         if self.sampling is not None:
-            # balance samples
-            y_original = deepcopy(y)
-            X, y = self.sampling.fit_sample(X, y)
+            if groups is None:
+                X, y = self.sampling.fit_sample(X, y)
+            else:
+                X = np.hstack((X, groups.reshape(-1, 1)))
+                X, y = self.sampling.fit_sample(X, y)
+                groups = X[:, -1]
+                X = X[:, :-1]
 
-            # balance groups if present
-            if groups is not None:
-                groups, _ = self.sampling.fit_sample(
-                        groups.reshape(-1, 1), y_original)
-
         if self.preprocessing is not None:
             X = self.__preprocessor(X)
 
@@ -387,16 +382,18 @@
             if groups is not None:
                 groups_train = groups[train_indices]
 
-            # balance the fold
+            # balance the training fold
             if self.sampling is not None:
-                y_train_original = deepcopy(y_train)
-                X_train, y_train = self.sampling.fit_sample(
-                    X_train, y_train)
+                if groups is None:
+                    X_train, y_train = self.sampling.fit_sample(
+                            X_train, y_train)
+                else:
+                    X_train = np.hstack((X_train, groups_train.reshape(-1, 1)))
+                    X_train, y_train = self.sampling.fit_sample(
+                            X_train, y_train)
+                    groups_train = X_train[:, -1]
+                    X_train = X_train[:, :-1]
 
-                if groups is not None:
-                    groups_train, _ = self.sampling.fit_sample(
-                        groups_train.reshape(-1, 1), y_train_original)
-
             else:
                 # also get indices of groups for the training partition
                 if groups is not None:



More information about the grass-commit mailing list