[GRASS-SVN] r58638 - in grass-addons/grass7/vector: . v.class.ml

svn_grass at osgeo.org svn_grass at osgeo.org
Tue Jan 7 08:18:40 PST 2014


Author: zarch
Date: 2014-01-07 08:18:40 -0800 (Tue, 07 Jan 2014)
New Revision: 58638

Added:
   grass-addons/grass7/vector/v.class.ml/
   grass-addons/grass7/vector/v.class.ml/Makefile
   grass-addons/grass7/vector/v.class.ml/ml_classifiers.py
   grass-addons/grass7/vector/v.class.ml/ml_functions.py
   grass-addons/grass7/vector/v.class.ml/npy2table.py
   grass-addons/grass7/vector/v.class.ml/sqlite2npy.py
   grass-addons/grass7/vector/v.class.ml/test_rpc.py
   grass-addons/grass7/vector/v.class.ml/training_extraction.py
   grass-addons/grass7/vector/v.class.ml/v.class.ml.html
   grass-addons/grass7/vector/v.class.ml/v.class.ml.py
Log:
Add a new module for machine learning classification of vector maps

Added: grass-addons/grass7/vector/v.class.ml/Makefile
===================================================================
--- grass-addons/grass7/vector/v.class.ml/Makefile	                        (rev 0)
+++ grass-addons/grass7/vector/v.class.ml/Makefile	2014-01-07 16:18:40 UTC (rev 58638)
@@ -0,0 +1,10 @@
+MODULE_TOPDIR = ../..
+
+PGM = v.class.ml
+
+ETCFILES = training_extraction ml_classifiers ml_functions sqlite2npy npy2table
+
+include $(MODULE_TOPDIR)/include/Make/Script.make
+include $(MODULE_TOPDIR)/include/Make/Python.make
+
+default: script

Added: grass-addons/grass7/vector/v.class.ml/ml_classifiers.py
===================================================================
--- grass-addons/grass7/vector/v.class.ml/ml_classifiers.py	                        (rev 0)
+++ grass-addons/grass7/vector/v.class.ml/ml_classifiers.py	2014-01-07 16:18:40 UTC (rev 58638)
@@ -0,0 +1,393 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Nov  6 15:08:38 2013
+
+ at author: pietro
+"""
+from __future__ import (absolute_import, division, print_function,
+                        unicode_literals)
+from gettext import lgettext as _
+
+from sklearn.linear_model import SGDClassifier
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.neighbors import (NearestNeighbors,
+                               KNeighborsClassifier,
+                               RadiusNeighborsClassifier,
+                               NearestCentroid)
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.naive_bayes import GaussianNB
+
+
+from grass.pygrass.messages import Messenger
+
+MSGR = Messenger()
+
+try:
+    import mlpy
+except ImportError:
+    MSGR.warning(_("MLPY not found in the current python path"
+                   "check that is installed or set the python path."
+                   "Only `sklearn` will be used."))
+    mlpy = None
+
+
+COLS = [('cat', 'INTEGER PRIMARY KEY'),
+        ('class', 'INTEGER'),
+        ('color', 'VARCHAR(11)'), ]
+
+
+# Unsupervisioned
+#nbrs = NearestNeighbors(n_neighbors=8,
+#                        algorithm='ball_tree').fit(data)
+#distances, indices = nbrs.kneighbors(data)
+
+
+CLASSIFIERS = [
+    #
+    # Stochastic Gradient Descent (SGD)
+    #
+    {'name': 'sgd_hinge_l2', 'classifier': SGDClassifier,
+     'kwargs': {'loss': "hinge", 'penalty': "l2"}},
+    {'name': 'sgd_huber_l2', 'classifier': SGDClassifier,
+     'kwargs': {'loss': "modified_huber", 'penalty': "l2"}},
+    {'name': 'sgd_log_l2', 'classifier': SGDClassifier,
+     'kwargs': {'loss': "log", 'penalty': "l2"}},
+    {'name': 'sgd_hinge_l1', 'classifier': SGDClassifier,
+     'kwargs': {'loss': "hinge", 'penalty': "l1"}},
+    {'name': 'sgd_huber_l1', 'classifier': SGDClassifier,
+     'kwargs': {'loss': "modified_huber", 'penalty': "l1"}},
+    {'name': 'sgd_log_l1', 'classifier': SGDClassifier,
+     'kwargs': {'loss': "log", 'penalty': "l1"}},
+    {'name': 'sgd_hinge_elastic', 'classifier': SGDClassifier,
+     'kwargs': {'loss': "hinge", 'penalty': "elasticnet"}},
+    {'name': 'sgd_huber_elastic', 'classifier': SGDClassifier,
+     'kwargs': {'loss': "modified_huber", 'penalty': "elasticnet"}},
+    {'name': 'sgd_log_elastic', 'classifier': SGDClassifier,
+     'kwargs': {'loss': "log", 'penalty': "elasticnet"}},
+    #
+    # K-NN
+    #
+    # uniform
+    {'name': 'knn2_uniform', 'classifier': KNeighborsClassifier,
+     'kwargs': {'n_neighbors': 2, 'weights': 'uniform'}},
+    {'name': 'knn4_uniform', 'classifier': KNeighborsClassifier,
+     'kwargs': {'n_neighbors': 4, 'weights': 'uniform'}},
+    {'name': 'knn8_uniform', 'classifier': KNeighborsClassifier,
+     'kwargs': {'n_neighbors': 8, 'weights': 'uniform'}},
+    {'name': 'knn16_uniform', 'classifier': KNeighborsClassifier,
+     'kwargs': {'n_neighbors': 16, 'weights': 'uniform'}},
+    # distance
+    {'name': 'knn2_distance', 'classifier': KNeighborsClassifier,
+     'kwargs': {'n_neighbors': 2, 'weights': 'distance'}},
+    {'name': 'knn4_distance', 'classifier': KNeighborsClassifier,
+     'kwargs': {'n_neighbors': 4, 'weights': 'distance'}},
+    {'name': 'knn8_distance', 'classifier': KNeighborsClassifier,
+     'kwargs': {'n_neighbors': 8, 'weights': 'distance'}},
+    {'name': 'knn16_distance', 'classifier': KNeighborsClassifier,
+     'kwargs': {'n_neighbors': 16, 'weights': 'distance'}},
+    # radius
+    {'name': 'knn_radius_0p5_uniform',
+     'classifier': RadiusNeighborsClassifier,
+     'kwargs': {'radius': 0.5, 'weights': 'uniform'}},
+    {'name': 'knn_radius_1_uniform',
+     'classifier': RadiusNeighborsClassifier,
+     'kwargs': {'radius': 1., 'weights': 'uniform'}},
+    {'name': 'knn_radius_1p5_uniform',
+     'classifier': RadiusNeighborsClassifier,
+     'kwargs': {'radius': 1.5, 'weights': 'uniform'}},
+    {'name': 'knn_radius_2_uniform',
+     'classifier': RadiusNeighborsClassifier,
+     'kwargs': {'radius': 2., 'weights': 'uniform'}},
+    {'name': 'knn_radius_2p5_uniform',
+     'classifier': RadiusNeighborsClassifier,
+     'kwargs': {'radius': 2.5, 'weights': 'uniform'}},
+    {'name': 'knn_radius_5_uniform',
+     'classifier': RadiusNeighborsClassifier,
+     'kwargs': {'radius': 5., 'weights': 'uniform'}},
+
+    {'name': 'knn_radius_0p5_distance',
+     'classifier': RadiusNeighborsClassifier,
+     'kwargs': {'radius': 0.5, 'weights': 'distance'}},
+    {'name': 'knn_radius_1_distance',
+     'classifier': RadiusNeighborsClassifier,
+     'kwargs': {'radius': 1., 'weights': 'distance'}},
+    {'name': 'knn_radius_1p5_distance',
+     'classifier': RadiusNeighborsClassifier,
+     'kwargs': {'radius': 1.5, 'weights': 'distance'}},
+    {'name': 'knn_radius_2_distance',
+     'classifier': RadiusNeighborsClassifier,
+     'kwargs': {'radius': 2., 'weights': 'distance'}},
+    {'name': 'knn_radius_2p5_distance',
+     'classifier': RadiusNeighborsClassifier,
+     'kwargs': {'radius': 2.5, 'weights': 'distance'}},
+    {'name': 'knn_radius_5_distance',
+     'classifier': RadiusNeighborsClassifier,
+     'kwargs': {'radius': 5., 'weights': 'distance'}},
+    # centroid
+    # ‘euclidean’, ‘l2’, ‘l1’, ‘manhattan’, ‘cityblock’
+    #  [‘braycurtis’, ‘canberra’, ‘chebyshev’, ‘correlation’, ‘cosine’, ‘dice’, ‘hamming’, ‘jaccard’, ‘kulsinski’, ‘mahalanobis’, ‘matching’, ‘minkowski’, ‘rogerstanimoto’, ‘russellrao’, ‘seuclidean’, ‘sokalmichener’, ‘sokalsneath’, ‘sqeuclidean’, ‘yule’]
+    {'name': 'knn_centroid_euclidean_none', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'euclidean', 'shrink_threshold ': None}},
+    {'name': 'knn_centroid_euclidean_0p5', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'euclidean', 'shrink_threshold ': 0.5}},
+    {'name': 'knn_centroid_euclidean_1', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'euclidean', 'shrink_threshold ': 1.0}},
+    {'name': 'knn_centroid_euclidean_1p5', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'euclidean', 'shrink_threshold ': 1.5}},
+    {'name': 'knn_centroid_euclidean_2', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'euclidean', 'shrink_threshold ': 2.0}},
+
+    {'name': 'knn_centroid_l2_none', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'l2', 'shrink_threshold ': None}},
+    {'name': 'knn_centroid_l2_0p5', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'l2', 'shrink_threshold ': 0.5}},
+    {'name': 'knn_centroid_l2_1', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'l2', 'shrink_threshold ': 1.0}},
+    {'name': 'knn_centroid_l2_1p5', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'l2', 'shrink_threshold ': 1.5}},
+    {'name': 'knn_centroid_l2_2', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'l2', 'shrink_threshold ': 2.0}},
+
+    {'name': 'knn_centroid_l1_none', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'l1', 'shrink_threshold ': None}},
+    {'name': 'knn_centroid_l1_0p5', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'l1', 'shrink_threshold ': 0.5}},
+    {'name': 'knn_centroid_l1_1', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'l1', 'shrink_threshold ': 1.0}},
+    {'name': 'knn_centroid_l1_1p5', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'l1', 'shrink_threshold ': 1.5}},
+    {'name': 'knn_centroid_l1_2', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'l1', 'shrink_threshold ': 2.0}},
+
+    {'name': 'knn_centroid_manhattan_none', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'manhattan', 'shrink_threshold ': None}},
+    {'name': 'knn_centroid_manhattan_0p5', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'manhattan', 'shrink_threshold ': 0.5}},
+    {'name': 'knn_centroid_manhattan_1', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'manhattan', 'shrink_threshold ': 1.0}},
+    {'name': 'knn_centroid_manhattan_1p5', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'manhattan', 'shrink_threshold ': 1.5}},
+    {'name': 'knn_centroid_manhattan_2', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'manhattan', 'shrink_threshold ': 2.0}},
+
+    {'name': 'knn_centroid_cityblock_none', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'cityblock', 'shrink_threshold ': None}},
+    {'name': 'knn_centroid_cityblock_0p5', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'cityblock', 'shrink_threshold ': 0.5}},
+    {'name': 'knn_centroid_cityblock_1', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'cityblock', 'shrink_threshold ': 1.0}},
+    {'name': 'knn_centroid_cityblock_1p5', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'cityblock', 'shrink_threshold ': 1.5}},
+    {'name': 'knn_centroid_cityblock_2', 'classifier': NearestCentroid,
+     'kwargs': {'metric': 'cityblock', 'shrink_threshold ': 2.0}},
+    #
+    # Tree
+    #
+    {'name': 'd_tree_gini', 'classifier': DecisionTreeClassifier,
+     'kwargs': {'criterion': 'gini', 'splitter': 'best', 'max_depth': None,
+                'min_samples_split': 2, 'min_samples_leaf': 1,
+                'max_features': None, 'random_state': None,
+                'min_density': None}},
+    {'name': 'd_tree_gini_sqrt', 'classifier': DecisionTreeClassifier,
+     'kwargs': {'criterion': 'gini', 'max_depth': 'sqrt'}},
+    {'name': 'd_tree_gini_log2', 'classifier': DecisionTreeClassifier,
+     'kwargs': {'criterion': 'gini', 'max_depth': 'log2'}},
+    {'name': 'd_tree_gini_0p25', 'classifier': DecisionTreeClassifier,
+     'kwargs': {'criterion': 'gini', 'max_depth': 0.25}},
+    {'name': 'd_tree_gini_0p50', 'classifier': DecisionTreeClassifier,
+     'kwargs': {'criterion': 'gini', 'max_depth': 0.5}},
+    {'name': 'd_tree_gini_0p75', 'classifier': DecisionTreeClassifier,
+     'kwargs': {'criterion': 'gini', 'max_depth': 0.75}},
+
+    {'name': 'd_tree_entropy', 'classifier': DecisionTreeClassifier,
+     'kwargs': {'criterion': 'entropy', 'splitter': 'best', 'max_depth': None,
+                'min_samples_split': 2, 'min_samples_leaf': 1,
+                'max_features': None, 'random_state': None,
+                'min_density': None}},
+    {'name': 'd_tree_entropy_sqrt', 'classifier': DecisionTreeClassifier,
+     'kwargs': {'criterion': 'entropy', 'max_depth': 'sqrt'}},
+    {'name': 'd_tree_entropy_log2', 'classifier': DecisionTreeClassifier,
+     'kwargs': {'criterion': 'entropy', 'max_depth': 'log2'}},
+    {'name': 'd_tree_entropy_0p25', 'classifier': DecisionTreeClassifier,
+     'kwargs': {'criterion': 'entropy', 'max_depth': 0.25}},
+    {'name': 'd_tree_entropy_0p50', 'classifier': DecisionTreeClassifier,
+     'kwargs': {'criterion': 'entropy', 'max_depth': 0.5}},
+    {'name': 'd_tree_entropy_0p75', 'classifier': DecisionTreeClassifier,
+     'kwargs': {'criterion': 'entropy', 'max_depth': 0.75}},
+
+    {'name': 'rand_tree_gini', 'classifier': RandomForestClassifier,
+     'kwargs': {'criterion': 'gini', 'splitter': 'best', 'max_depth': None,
+                'min_samples_split': 2, 'min_samples_leaf': 1,
+                'max_features': None, 'random_state': None,
+                'min_density': None}},
+    {'name': 'rand_tree_gini_sqrt', 'classifier': RandomForestClassifier,
+     'kwargs': {'criterion': 'gini', 'max_depth': 'sqrt'}},
+    {'name': 'rand_tree_gini_log2', 'classifier': RandomForestClassifier,
+     'kwargs': {'criterion': 'gini', 'max_depth': 'log2'}},
+    {'name': 'rand_tree_gini_0p25', 'classifier': RandomForestClassifier,
+     'kwargs': {'criterion': 'gini', 'max_depth': 0.25}},
+    {'name': 'rand_tree_gini_0p50', 'classifier': RandomForestClassifier,
+     'kwargs': {'criterion': 'gini', 'max_depth': 0.5}},
+    {'name': 'rand_tree_gini_0p75', 'classifier': RandomForestClassifier,
+     'kwargs': {'criterion': 'gini', 'max_depth': 0.75}},
+
+    {'name': 'rand_tree_entropy', 'classifier': RandomForestClassifier,
+     'kwargs': {'criterion': 'entropy', 'splitter': 'best', 'max_depth': None,
+                'min_samples_split': 2, 'min_samples_leaf': 1,
+                'max_features': None, 'random_state': None,
+                'min_density': None}},
+    {'name': 'rand_tree_entropy_sqrt', 'classifier': RandomForestClassifier,
+     'kwargs': {'criterion': 'entropy', 'max_depth': 'sqrt'}},
+    {'name': 'rand_tree_entropy_log2', 'classifier': RandomForestClassifier,
+     'kwargs': {'criterion': 'entropy', 'max_depth': 'log2'}},
+    {'name': 'rand_tree_entropy_0p25', 'classifier': RandomForestClassifier,
+     'kwargs': {'criterion': 'entropy', 'max_depth': 0.25}},
+    {'name': 'rand_tree_entropy_0p50', 'classifier': RandomForestClassifier,
+     'kwargs': {'criterion': 'entropy', 'max_depth': 0.5}},
+    {'name': 'rand_tree_entropy_0p75', 'classifier': RandomForestClassifier,
+     'kwargs': {'criterion': 'entropy', 'max_depth': 0.75}},
+
+    #
+    # Gausian
+    #
+    {'name': 'gaussianNB', 'classifier': GaussianNB},
+]
+
+
+class MLPYWrapper(object):
+    def __init__(self, cls):
+        self.cls = cls
+        self.mlcls = None
+        self.wrap = dict(fit='learn', predict='pred')
+
+    def __getattr__(self, name):
+        if self.mlcls and name in self.wrap.keys():
+            return getattr(self.mlcls, self.wrap[name])
+        return super(MLPYWrapper, self).__getattr__(self, name)
+
+    def __call__(self, *args, **kwargs):
+        self.mlcls = self.cls(*args, **kwargs)
+
+
+if mlpy is not None:
+    MLPY_CLS = [
+        #
+        # Linear Discriminant Analysis Classifier (LDAC)
+        #
+        {'name': 'mlpy_LDAC_1', 'classifier': MLPYWrapper(mlpy.LDAC)},
+        #
+        # Elastic Net Classifier
+        #
+        {'name': 'mlpy_ElasticNetC_0.1_0.1',
+         'classifier': MLPYWrapper(mlpy.ElasticNetC),
+         'kwargs': {'lmb': 0.1, 'eps': 0.1, 'supp': True, 'tol': 0.01}},
+        {'name': 'mlpy_ElasticNetC_0.1_0.01',
+         'classifier': MLPYWrapper(mlpy.ElasticNetC),
+         'kwargs': {'lmb': 0.1, 'eps': 0.01, 'supp': True, 'tol': 0.01}},
+        {'name': 'mlpy_ElasticNetC_0.1_0.001',
+         'classifier': MLPYWrapper(mlpy.ElasticNetC),
+         'kwargs': {'lmb': 0.1, 'eps': 0.001, 'supp': True, 'tol': 0.01}},
+        {'name': 'mlpy_ElasticNetC_0.01_0.1',
+         'classifier': MLPYWrapper(mlpy.ElasticNetC),
+         'kwargs': {'lmb': 0.01, 'eps': 0.1, 'supp': True, 'tol': 0.01}},
+        {'name': 'mlpy_ElasticNetC_0.01_0.01',
+         'classifier': MLPYWrapper(mlpy.ElasticNetC),
+         'kwargs': {'lmb': 0.01, 'eps': 0.01, 'supp': True, 'tol': 0.01}},
+        {'name': 'mlpy_ElasticNetC_0.01_0.001',
+         'classifier': MLPYWrapper(mlpy.ElasticNetC),
+         'kwargs': {'lmb': 0.01, 'eps': 0.001, 'supp': True, 'tol': 0.01}},
+        {'name': 'mlpy_ElasticNetC_0.001_0.1',
+         'classifier': MLPYWrapper(mlpy.ElasticNetC),
+         'kwargs': {'lmb': 0.001, 'eps': 0.1, 'supp': True, 'tol': 0.01}},
+        {'name': 'mlpy_ElasticNetC_0.001_0.01',
+         'classifier': MLPYWrapper(mlpy.ElasticNetC),
+         'kwargs': {'lmb': 0.001, 'eps': 0.01, 'supp': True, 'tol': 0.01}},
+        {'name': 'mlpy_ElasticNetC_0.001_0.001',
+         'classifier': MLPYWrapper(mlpy.ElasticNetC),
+         'kwargs': {'lmb': 0.001, 'eps': 0.001, 'supp': True, 'tol': 0.01}},
+        #
+        # Diagonal Linear Discriminant Analysis (DLDA)
+        #
+        {'name': 'mlpy_DLDA_0.01', 'classifier': MLPYWrapper(mlpy.DLDA),
+         'kwargs': {'delta': 0.01}},
+        {'name': 'mlpy_DLDA_0.05', 'classifier': MLPYWrapper(mlpy.DLDA),
+         'kwargs': {'delta': 0.05}},
+        {'name': 'mlpy_DLDA_0.1', 'classifier': MLPYWrapper(mlpy.DLDA),
+         'kwargs': {'delta': 0.1}},
+        {'name': 'mlpy_DLDA_0.5', 'classifier': MLPYWrapper(mlpy.DLDA),
+         'kwargs': {'delta': 0.5}},
+        #
+        # mlpy.Golub
+        #
+        {'name': 'mlpy_Golub', 'classifier': MLPYWrapper(mlpy.Golub)},
+        #
+        # LibLinear
+        #
+        {'name': 'mlpy_liblin_l2r_lr',
+         'classifier': MLPYWrapper(mlpy.LibLinear),
+         'kwargs': {'solver_type': 'l2r_lr', 'C': 1, 'eps': 0.01}},
+        {'name': 'mlpy_liblin_l2r_l2loss_svc',
+         'classifier': MLPYWrapper(mlpy.LibLinear),
+         'kwargs': {'solver_type': 'l2r_l2loss_svc', 'C': 1, 'eps': 0.01}},
+        {'name': 'mlpy_liblin_l2r_l1loss_svc_dual',
+         'classifier': MLPYWrapper(mlpy.LibLinear),
+         'kwargs': {'solver_type': 'l2r_l1loss_svc_dual',
+                    'C': 1, 'eps': 0.01}},
+        {'name': 'mlpy_liblin_mcsvm_cs',
+         'classifier': MLPYWrapper(mlpy.LibLinear),
+         'kwargs': {'solver_type': 'mcsvm_cs', 'C': 1, 'eps': 0.01}},
+        {'name': 'mlpy_liblin_l1r_l2loss_svc',
+         'classifier': MLPYWrapper(mlpy.LibLinear),
+         'kwargs': {'solver_type': 'l1r_l2loss_svc', 'C': 1, 'eps': 0.01}},
+        {'name': 'mlpy_liblin_l1r_lr',
+         'classifier': MLPYWrapper(mlpy.LibLinear),
+         'kwargs': {'solver_type': 'l1r_lr', 'C': 1, 'eps': 0.01}},
+        {'name': 'mlpy_liblin_l2r_lr_dual',
+         'classifier': MLPYWrapper(mlpy.LibLinear),
+         'kwargs': {'solver_type': 'l2r_lr_dual', 'C': 1, 'eps': 0.01}},
+        #
+        # K-NN
+        #
+        {'name': 'mlpy_KNN_1', 'classifier': MLPYWrapper(mlpy.KNN),
+         'kwargs': {'k': 1}},
+        {'name': 'mlpy_KNN_2', 'classifier': MLPYWrapper(mlpy.KNN),
+         'kwargs': {'k': 2}},
+        {'name': 'mlpy_KNN_3', 'classifier': MLPYWrapper(mlpy.KNN),
+         'kwargs': {'k': 3}},
+        {'name': 'mlpy_KNN_4', 'classifier': MLPYWrapper(mlpy.KNN),
+         'kwargs': {'k': 4}},
+        {'name': 'mlpy_KNN_8', 'classifier': MLPYWrapper(mlpy.KNN),
+         'kwargs': {'k': 8}},
+        {'name': 'mlpy_KNN_8', 'classifier': MLPYWrapper(mlpy.KNN),
+         'kwargs': {'k': 16}},
+        #
+        # Tree
+        #
+        {'name': 'mlpy_tree_0_0', 'classifier': MLPYWrapper(mlpy.ClassTree),
+         'kwargs': {'stumps': 0, 'minsize': 0}},
+        {'name': 'mlpy_tree_0_5', 'classifier': MLPYWrapper(mlpy.ClassTree),
+         'kwargs': {'stumps': 0, 'minsize': 5}},
+        {'name': 'mlpy_tree_0_10', 'classifier': MLPYWrapper(mlpy.ClassTree),
+         'kwargs': {'stumps': 0, 'minsize': 10}},
+        {'name': 'mlpy_tree_0_20', 'classifier': MLPYWrapper(mlpy.ClassTree),
+         'kwargs': {'stumps': 0, 'minsize': 20}},
+        {'name': 'mlpy_tree_0_40', 'classifier': MLPYWrapper(mlpy.ClassTree),
+         'kwargs': {'stumps': 0, 'minsize': 40}},
+        {'name': 'mlpy_tree_1_', 'classifier': MLPYWrapper(mlpy.ClassTree),
+         'kwargs': {'stumps': 1, 'minsize': 0}},
+        {'name': 'mlpy_tree_1_5', 'classifier': MLPYWrapper(mlpy.ClassTree),
+         'kwargs': {'stumps': 1, 'minsize': 5}},
+        {'name': 'mlpy_tree_1_10', 'classifier': MLPYWrapper(mlpy.ClassTree),
+         'kwargs': {'stumps': 1, 'minsize': 10}},
+        {'name': 'mlpy_tree_1_20', 'classifier': MLPYWrapper(mlpy.ClassTree),
+         'kwargs': {'stumps': 1, 'minsize': 20}},
+        {'name': 'mlpy_tree_1_40', 'classifier': MLPYWrapper(mlpy.ClassTree),
+         'kwargs': {'stumps': 1, 'minsize': 40}},
+        #
+        # mlpy.MaximumLikelihoodC
+        #
+        #{'name': 'mlpy_maximumlike',
+        # 'classifier': MLPYWrapper(mlpy.MaximumLikelihoodC)},
+    ]
+    # add MLPY
+    CLASSIFIERS.extend(MLPY_CLS)

Added: grass-addons/grass7/vector/v.class.ml/ml_functions.py
===================================================================
--- grass-addons/grass7/vector/v.class.ml/ml_functions.py	                        (rev 0)
+++ grass-addons/grass7/vector/v.class.ml/ml_functions.py	2014-01-07 16:18:40 UTC (rev 58638)
@@ -0,0 +1,201 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Nov  6 15:08:38 2013
+
+ at author: pietro
+"""
+from __future__ import (absolute_import, division, print_function,
+                        unicode_literals)
+import time
+import random as rnd
+from gettext import lgettext as _
+import sys
+
+import numpy as np
+import pylab as pl
+
+
+from sklearn.metrics import accuracy_score
+from sklearn.cross_validation import StratifiedKFold
+from sklearn.grid_search import GridSearchCV
+from sklearn.svm import SVC
+
+from grass.pygrass.messages import Messenger
+
+MSGR = Messenger()
+
+
+COLS = [('cat', 'INTEGER PRIMARY KEY'),
+        ('class', 'INTEGER'),
+        ('color', 'VARCHAR(11)'), ]
+
+
+def print_cols(clss, sep=';', save=sys.stdout):
+    clsses = sorted(set(clss))
+    cols = ['ml_index', 'ml_name', 'fit_time', 'prediction_time',
+            'tot_accuracy']
+    cols += [str(cls) for cls in clsses]
+    cols += ['mean', ]
+    print(sep.join(cols), file=save)
+
+
+def print_test(cls, timefmt='%.4fs', accfmt='%.5f', sep=';', save=sys.stdout):
+    res = [str(cls['index']) if 'index' in cls else 'None',
+           cls['name'],
+           timefmt % (cls['fit_stop'] - cls['fit_start']),
+           timefmt % (cls['pred_stop'] - cls['pred_start']),
+           accfmt % cls['t_acc'],
+           sep.join([accfmt % acc for acc in cls['c_acc']]),
+           accfmt % cls['c_acc_mean']]
+    print(sep.join(res), file=save)
+
+
+def accuracy(sol, cls=None, data=None, clss=None, pred=None):
+    cls = cls if cls else dict()
+    clsses = clss if clss else sorted(set(sol))
+    if 'cls' in cls:
+        cls['pred_start'] = time.time()
+        pred = cls['cls'].predict(data)
+        cls['pred_stop'] = time.time()
+
+    cls['t_acc'] = accuracy_score(sol, pred, normalize=True)
+    c_acc = []
+    for c in clsses:
+        indx = sol == c
+        c_acc.append(accuracy_score(sol[indx], pred[indx],
+                                    normalize=True))
+    cls['c_acc'] = np.array(c_acc)
+    cls['c_acc_mean'] = cls['c_acc'].mean()
+    return cls
+
+
+def test_classifier(cls, Xt, Yt, Xd, Yd, clss, save=sys.stdout,
+                    verbose=True):
+    cls['cls'] = cls['classifier'](**cls.get('kwargs', {}))
+    cls['fit_start'] = time.time()
+    cls['cls'].fit(Xt, Yt)
+    cls['fit_stop'] = time.time()
+    try:
+        cls['params'] = cls['cls'].get_params()
+    except AttributeError:
+        cls['params'] = None
+    accuracy(Yd, cls, Xd, clss)
+    if verbose:
+        print_test(cls, save=save)
+
+
+def run_classifier(cls, Xt, Yt, Xd, Yd, clss, data,
+                   save=sys.stdout):
+    test_classifier(cls, Xt, Yt, Xd, Yd, clss, verbose=False)
+    cls['pred_start'] = time.time()
+    cls['predict'] = cls['cls'].predict(data)
+    cls['pred_stop'] = time.time()
+    print_test(cls, save=save)
+    np.save(cls['name'] + '.npy', cls['predict'])
+
+
+def reduce_cls(Yt, subs):
+    Yr = np.copy(Yt)
+    for k in subs:
+        indx = Yr == k
+        Yr[indx] = subs[k]
+    return Yr
+
+
+def balance_cls(data, num):
+    indx = np.random.randint(0, len(data), size=num)
+    return data[indx]
+
+
+def balance(tdata, tclss, num=None):
+    clss = sorted(set(tclss))
+    num = num if num else min([len(tclss[tclss == c]) for c in clss])
+    dt = []
+    for c in clss:
+        dt.extend([(c, d) for d in balance_cls(tdata[tclss == c], num)])
+    rnd.shuffle(dt)
+    bclss = np.array([r[0] for r in dt], dtype=int)
+    bdata = np.array([r[1] for r in dt])
+    return bdata, bclss
+
+
+def optimize_training(cls, tdata, tclss,
+                      scaler=None, num=None, maxiterations=1000):
+    best = cls.copy()
+    best['c_acc_mean'] = 0
+    means = []
+    for i in range(maxiterations):  # TODO: use multicore
+        MSGR.percent(i, maxiterations, 1)
+        Xt, Yt = balance(tdata, tclss, num)
+        if scaler:
+            scaler.fit(Xt, Yt)
+            sXt = scaler.transform(Xt)
+            stdata = scaler.transform(tdata)
+        else:
+            sXt, stdata = Xt, tdata
+        test_classifier(cls, sXt, Yt, stdata, tclss, None, verbose=False)
+        if cls['c_acc_mean'] > best['c_acc_mean']:
+            print("%f > %f" % (cls['c_acc_mean'], best['c_acc_mean']))
+            best = cls.copy()
+            bXt, bYt = Xt, Yt
+        means.append(cls['c_acc_mean'])
+    means = np.array(means)
+    print("best accuracy: %f, number of iterations: %d" % (best['c_acc_mean'],
+                                                           maxiterations))
+    print("mean of means: %f" % means.mean())
+    print("min of means: %f" % means.min())
+    print("max of means: %f" % means.max())
+    print("std of means: %f" % means.std())
+    return best, bXt, bYt
+
+
+def explorer_clsfiers(clsses, Xt, Yt, Xd, Yd, clss,
+                      indexes=None, csv=sys.stdout):
+    errors = []
+    gen = zip(indexes, clsses) if indexes else enumerate(clsses)
+    print_cols(Yt, sep=';', save=csv)
+    for ind, cls in gen:
+        print(cls['name'], ind)
+        cls['index'] = ind
+        try:
+            test_classifier(cls, Xt, Yt, Xd, Yd, clss, csv)
+        except:
+            errors.append(cls)
+    for err in errors:
+        print('Error in: %s' % err['name'])
+
+
+def plot_grid(grid, save=''):
+    C = grid.param_grid['C']
+    gamma = grid.param_grid['gamma']
+
+    for kernel in grid.param_grid['kernel']:
+        scores = [x[1] for x in grid.grid_scores_ if x[0]['kernel'] == kernel]
+        scores = np.array(scores).reshape(len(C), len(gamma))
+        # draw heatmap of accuracy as a function of gamma and C
+        pl.figure(figsize=(8, 6))
+        pl.subplots_adjust(left=0.05, right=0.95, bottom=0.15, top=0.95)
+        pl.imshow(scores, interpolation='nearest', cmap=pl.cm.spectral)
+        pl.xlabel(r'$\gamma$')
+        pl.ylabel('C')
+        pl.colorbar()
+        pl.xticks(np.arange(len(gamma)), gamma, rotation=45)
+        pl.yticks(np.arange(len(C)), C)
+        ic, igamma = np.unravel_index(np.argmax(scores), scores.shape)
+        pl.plot(igamma, ic, 'r.')
+        best = scores[igamma, ic]
+        titl = r"$best:\, %0.4f, \,C:\, %g, \,\gamma: \,%g$" % (best,
+                                                             C[ic],
+                                                             gamma[igamma])
+        pl.title(titl)
+        if save:
+            pl.savefig(save, dpi=600, trasparent=True, bbox_inches='tight')
+        pl.show()
+
+
+def explore_SVC(Xt, Yt, n_folds=3, n_jobs=1, **kwargs):
+    cv = StratifiedKFold(y=Yt, n_folds=n_folds)
+    grid = GridSearchCV(SVC(), param_grid=kwargs, cv=cv, n_jobs=n_jobs)
+    grid.fit(Xt, Yt)
+    print("The best classifier is: ", grid.best_estimator_)
+    return grid

Added: grass-addons/grass7/vector/v.class.ml/npy2table.py
===================================================================
--- grass-addons/grass7/vector/v.class.ml/npy2table.py	                        (rev 0)
+++ grass-addons/grass7/vector/v.class.ml/npy2table.py	2014-01-07 16:18:40 UTC (rev 58638)
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Nov 10 17:00:13 2013
+
+ at author: pietro
+"""
+from __future__ import print_function, division
+
+import pickle
+import numpy as np
+from grass.pygrass.vector import Vector
+from grass.pygrass.vector.table import Link, Table
+
+COLS = [('cat', 'INTEGER PRIMARY KEY'),
+        ('class', 'INTEGER'),
+        ('color', 'VARCHAR(11)'), ]
+
+
+def export2sqlite(table, cats, clsses, training=None):
+    cur = table.conn.cursor()
+    if training:
+        colors = np.zeros(clsses.shape, dtype=np.dtype(np.str))
+        cur = training.execute('SELECT cat, color FROM %s;' % training.name,
+                               cursor=cur)
+        trndict = dict([c for c in cur.fetchall()])
+        for key in trndict:
+            colors[clsses == key] = trndict[key]
+    print("Insert data")
+    table.insert(zip(cats, clsses, colors) if training else zip(cats, clsses),
+                 cursor=cur, many=True)
+    cur.close()
+    table.conn.commit()
+
+
+def export2onesqlite(table, cats, *clsses):
+    cur = table.conn.cursor()
+    print("Insert data")
+    table.insert(zip(cats, *clsses), cursor=cur, many=True)
+    cur.close()
+    table.conn.commit()
+
+
+def create_tab(vect, tab_name, cats, clsses, cols, training=None):
+    cur = vect.table.conn.cursor()
+    table = Table(tab_name, vect.table.conn)
+    add_link = True
+    if table.exist():
+        print("Table <%s> already exist, will be removed." % tab_name)
+        table.drop(cursor=cur)
+        add_link = False
+    print("Ceating a new table <%s>." % tab_name)
+    table.create(cols, cursor=cur)
+    export2sqlite(table, cats, clsses,
+                  Table(training, vect.table.conn) if training else None)
+    cur.close()
+    if add_link:
+        vect.dblinks.add(Link(layer=len(vect.dblinks) + 1,
+                              name=tab_name, table=tab_name))
+
+
+def export_results(vect_name, results, cats, rlayer,
+                   training=None, cols=None, overwrite=False, pkl=None):
+    if pkl:
+        res = open(pkl, 'w')
+        pickle.dump(results, res)
+        res.close()
+
+    # check if the link already exist
+    with Vector(vect_name, mode='r') as vct:
+        link = vct.dblinks.by_name(rlayer)
+        mode = 'r' if link else 'rw'
+
+    print("Opening vector <%s>" % vect_name)
+    with Vector(vect_name, mode=mode) as vect:
+        if cols:
+            cols.insert(0, COLS[0])
+            tab = link.table() if link else Table(rlayer, vect.table.conn)
+            if tab.exist():
+                print("Table <%s> already exist, will be removed." % tab.name)
+                tab.drop(force=overwrite)
+            print("Ceating a new table <%s>." % rlayer)
+            import ipdb; ipdb.set_trace()
+            tab.create(cols)
+            export2onesqlite(tab, cats, *[cls['predict'] for cls in results])
+            if mode == 'rw':
+                nlyr = len(vect.dblinks) + 1
+                link = Link(nlyr, tab.name, tab.name)
+                vect.dblinks.add(link)
+                vect.build()
+        else:
+            for cls in results:
+                create_tab(vect, cls['name'], cats, cls['predict'],
+                           training, COLS if training else COLS[:2])
+
+#create_tab(VECT, B1[:-4], cats, b1, TRAINING, COLS)

Added: grass-addons/grass7/vector/v.class.ml/sqlite2npy.py
===================================================================
--- grass-addons/grass7/vector/v.class.ml/sqlite2npy.py	                        (rev 0)
+++ grass-addons/grass7/vector/v.class.ml/sqlite2npy.py	2014-01-07 16:18:40 UTC (rev 58638)
@@ -0,0 +1,106 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Nov  2 23:40:22 2013
+
+ at author: pietro
+"""
+from __future__ import print_function, division
+
+import numpy as np
+from grass.pygrass.vector import VectorTopo
+
+FCATS = 'cats.npy'
+FDATA = 'data.npy'
+FINDX = 'indx.npy'
+FCLSS = 'training_classes.npy'
+FTDATA = 'training_data.npy'
+
+
+def cpdata(shape, iterator, msg=''):
+    """Avoid to create a python list and then convert the python list to a
+    numpy array. This function instantiate statically a numpy array and then
+    fill the numpy array with the data coming from the generator to reduce
+    the memory consumption."""
+    nrows = shape[0]
+    #msgr = ???
+    #msgr.message(msg)
+    print(msg)
+    dt = np.zeros(shape)
+    for i, data in enumerate(iterator):
+        #msgr.percent(i, nrows, 2)
+        dt[i] = data
+    return dt
+
+
+def save2npy(vect, l_data, l_trning,
+             fcats=FCATS, fdata=FDATA, findx=FINDX,
+             fclss=FCLSS, ftdata=FTDATA):
+    """Return 5 arrays:
+        - categories,
+        - data,
+        - a boolean array with the training,
+        - the training classes
+        - the training data
+    """
+    with VectorTopo(vect, mode='r') as vct:
+        # instantiate the tables
+        data = (vct.dblinks.by_layer(l_data).table() if l_data.isdigit()
+                else vct.dblinks.by_name(l_data).table())
+        trng = (vct.dblinks.by_layer(l_trning).table() if l_trning.isdigit()
+                else vct.dblinks.by_name(l_trning).table())
+
+        # check the dimensions
+        n_trng, n_data = trng.n_rows(), data.n_rows()
+        if n_trng != n_data:
+            msg = ('Different dimension between the training set (%d)'
+                   ' and the data set (%d)' % (n_trng, n_data))
+            print(msg)
+            raise
+
+        # extract the training
+        slct_trn = "SELECT class FROM {tname};".format(tname=trng.name)
+        trn_all = cpdata((n_data, ), (np.nan if a[0] is None else a[0]
+                                      for a in trng.execute(slct_trn)),
+                         msg=slct_trn)
+        # trn_all = np.array([np.nan if a[0] is None else a[0]
+        #                     for a in trng.execute(slct_trn)])
+        trn_indxs = ~np.isnan(trn_all)
+
+        # extract the data
+        data_cols = data.columns.names()
+        data_cols.remove(data.key)
+        cols = ', '.join(data_cols)
+        slct_data = "SELECT {cols} FROM {tname};".format(cols=cols,
+                                                         tname=data.name)
+        shape = (n_data, len(data_cols))
+        # use the function to be more memory efficient
+        dta = cpdata(shape, data.execute(slct_data), msg=slct_data)
+
+        # extract the cats
+        slct_cats = "SELECT {cat} FROM {tname};".format(cat=data.key,
+                                                        tname=data.name)
+        cats = cpdata((n_data, ), (c[0] for c in data.execute(slct_cats)),
+                      msg=slct_cats)
+        # cats = np.array([c[0] for c in data.execute(slct_cats)])
+
+        # training samples
+        trn_dta = dta[trn_indxs]
+        trn_ind = trn_all[trn_indxs]
+
+        # save
+        np.save(fcats, cats)
+        np.save(fdata, dta)
+        np.save(findx, trn_indxs)
+        np.save(fclss, trn_ind)
+        np.save(ftdata, trn_dta)
+        return cats, dta, trn_indxs, trn_ind, trn_dta
+
+
+def load_from_npy(fcats=FCATS, fdata=FDATA, findx=FINDX,
+                  fclss=FCLSS, ftdata=FTDATA):
+    cats = np.load(fcats)
+    data = np.load(fdata)
+    indx = np.load(findx)
+    Yt = np.load(fclss)
+    Xt = np.load(ftdata)
+    return cats, data, indx, Yt, Xt

Added: grass-addons/grass7/vector/v.class.ml/test_rpc.py
===================================================================
--- grass-addons/grass7/vector/v.class.ml/test_rpc.py	                        (rev 0)
+++ grass-addons/grass7/vector/v.class.ml/test_rpc.py	2014-01-07 16:18:40 UTC (rev 58638)
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Nov 23 01:47:42 2013
+
+ at author: pietro
+"""
+from grass.pygrass.vector import VectorTopo
+from grass.pygrass.vector.geometry import Point
+from grass.pygrass.function import get_mapset_vector
+
+
+def add_points(vname, vmapset='', *points):
+    """
+    >>> add_points('new', (1, 2), (2, 3), (3, 4))
+    """
+    mapset = get_mapset_vector(vname, vmapset)
+    mode = 'rw' if mapset else 'w'
+    with VectorTopo(vname, mapset, mode=mode) as vct:
+        for x, y in points:
+            vct.write(Point(x, y))
+
+
+ciface = RPCServer()
+check = ciface.call(function=add_points, args=('new', (1, 2), (2, 3), (3, 4)))
+

Added: grass-addons/grass7/vector/v.class.ml/training_extraction.py
===================================================================
--- grass-addons/grass7/vector/v.class.ml/training_extraction.py	                        (rev 0)
+++ grass-addons/grass7/vector/v.class.ml/training_extraction.py	2014-01-07 16:18:40 UTC (rev 58638)
@@ -0,0 +1,167 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Nov  2 13:30:33 2013
+
+ at author: pietro
+
+"""
+from __future__ import (absolute_import, division, print_function,
+                        unicode_literals)
+from gettext import lgettext as _
+import numpy as np
+
+from grass.script.core import overwrite
+from grass.pygrass.vector import VectorTopo, Vector
+from grass.pygrass.vector.table import Link, Table
+from grass.pygrass.vector.geometry import Area, intersects
+from grass.pygrass.vector.basic import Bbox, BoxList
+from grass.pygrass.messages import Messenger
+
+
+COLS = [('cat', 'INTEGER PRIMARY KEY'),
+        ('class', 'INTEGER'), ]
+
+UPDATE = "UPDATE {tname} SET class=? WHERE {cat}=?;"
+
+
+def update_lines(line, alist, cur=None, sql=None):
+    """Update lines using only the boundary
+    """
+    to_up = []
+    bbox = Bbox()
+    for area in alist:
+        bbox = area.bbox(bbox)
+        if ((intersects(area.boundary, line)) or
+                (area.contain_pnt(line[0], bbox))):
+            to_up.append((line.cat, area.cat))
+    if (cur is not None) and (sql is not None):
+        cur.executemany(sql, to_up)
+    return to_up
+
+
+def update_areas(trn_area, seg_area, ids, cur=None, sql=None):
+    """Update the table with the areas that contained/are contained or
+    intersect the training areas.
+    """
+    to_up = []
+    bbox = trn_area.bbox()
+    for s_id in ids:
+        seg_area.id = s_id
+        seg_area.read()
+        if ((intersects(seg_area.boundary, trn_area.boundary)) or
+                (trn_area.contain_pnt(seg_area.boundary[0], bbox)) or
+                (seg_area.contain_pnt(trn_area.boundary[0]))):
+            to_up.append((trn_area.cat, seg_area.cat))
+    if (cur is not None) and (sql is not None):
+        cur.executemany(sql, to_up)
+    return to_up
+
+
+def find_lines(table, trn, seg, msgr):
+    """Update the lines' table using the boundaries of the training areas"""
+    sql = UPDATE.format(tname=table.name, cat=table.key)
+    boxlist = BoxList()
+    n_bounds = len(trn)
+    cur = table.conn.cursor()
+    for i, bound in enumerate(trn):
+        msgr.percent(i, n_bounds, 1)
+        alist = seg.find['by_box'].areas(bound.bbox(), boxlist)
+        update_lines(bound, alist, cur, sql)
+    table.conn.commit()
+
+
+def find_area(table, trn_ids, trn_area, seg_area, n_areas, seg, msgr):
+    """Update the lines' table using the training areas"""
+    cur = table.conn.cursor()
+    msgr.message(_("Finding areas..."))
+    sql = UPDATE.format(tname=table.name, cat=table.key)
+    boxlist = BoxList()
+    for i, trn_id in enumerate(trn_ids):
+        msgr.percent(i, n_areas, 1)
+        trn_area.id = trn_id
+        trn_area.read()
+        bblist = seg.find['by_box'].areas(trn_area.boundary.bbox(), boxlist,
+                                          bboxlist_only=True)
+        update_areas(trn_area, seg_area, bblist.ids, cur, sql)
+    table.conn.commit()
+
+
+def make_new_table(vct, msgr, tname, cols=COLS, force=overwrite()):
+    """Check/remove/create a new table"""
+    create_link = True
+    # make a new table
+    table = Table(tname, vct.table.conn)
+    if table.exist():
+        if any([table.name == l.table_name for l in vct.dblinks]):
+            create_link = False
+        msg = _("Table <%s> already exist and will be removed.")
+        msgr.warning(msg % table.name)
+        table.drop(force=force)
+    table.create(cols)
+    # fill the new table with the segment cats
+    slct = vct.table.filters.select(vct.table.key)
+    cur = vct.table.execute(slct.get_sql())
+    table.insert(((cat[0], None) for cat in cur), many=True)
+    table.conn.commit()
+    return table, create_link
+
+
+def check_balance(table, trntab, msgr):
+    """Checking the balance between different training classes."""
+    msg = _('Checking the balance between different training classes.')
+    msgr.message(msg)
+    chk_balance = ("SELECT class, count(*) as num_of_segments "
+                   "FROM {tname} "
+                   "GROUP BY class ORDER BY num_of_segments;")
+    res = table.execute(chk_balance.format(tname=table.name))
+    cl_sql = "SELECT cat, class FROM {tname} ORDER BY cat;"
+    clss = dict(trntab.execute(cl_sql.format(tname=trntab.name)))
+    for cls, num in res.fetchall():
+        clname = clss.get(cls, str(cls))
+        msgr.message("    - %s (%d): %d" % (clname if clname else repr(clname),
+                                            cls if cls else 0, num))
+
+
+def extract_training_array(table):
+    """Return a numpy array with the class id or nan if not define"""
+    cur = table.execute("SELECT class FROM {tname}".format(tname=table.name))
+    return np.array([np.isnan if c is None else c[0] for c in cur])
+
+
+def get_layer_num_name(vect, tlayer):
+    layer_num = len(vect.dblinks)+1
+    layer_name = vect.name + '_training'
+    if '/' in tlayer:
+        layer_num, layer_name = tlayer.split('/')
+        layer_num = int(layer_num)
+    elif tlayer.isdigit():
+        layer_num = int(tlayer)
+    elif tlayer:
+        layer_name = tlayer
+    return layer_num, layer_name
+
+
+def extract_training(vect, tvect, tlayer):
+    """Assign a class to all the areas that contained, are contained
+    or intersect a training vector"""
+    msgr = Messenger()
+    with VectorTopo(tvect, mode='r') as trn:
+        with VectorTopo(vect, mode='r') as vct:
+            layer_num, layer_name = get_layer_num_name(vct, tlayer)
+            # instantiate the area objects
+            trn_area = Area(c_mapinfo=trn.c_mapinfo)
+            seg_area = Area(c_mapinfo=vct.c_mapinfo)
+            n_areas = trn.number_of('areas')
+            # check/remove/create a new table
+            table, create_link = make_new_table(vct, msgr, layer_name)
+            # find and save all the segments
+            find_area(table, trn.viter('areas', idonly=True),
+                      trn_area, seg_area, n_areas, vct, msgr)
+            check_balance(table, trn.table, msgr)
+
+    if create_link:
+        msgr.message(_("Connect the new table to the vector map..."))
+        with Vector(vect, mode='rw') as seg:
+            link = Link(layer_num, name=layer_name, table=table.name)
+            seg.dblinks.add(link)
+            seg.build()

Added: grass-addons/grass7/vector/v.class.ml/v.class.ml.html
===================================================================
Added: grass-addons/grass7/vector/v.class.ml/v.class.ml.py
===================================================================
--- grass-addons/grass7/vector/v.class.ml/v.class.ml.py	                        (rev 0)
+++ grass-addons/grass7/vector/v.class.ml/v.class.ml.py	2014-01-07 16:18:40 UTC (rev 58638)
@@ -0,0 +1,499 @@
+#!/usr/bin/env python
+# -- coding: utf-8 --
+#
+############################################################################
+#
+# MODULE:	    v.class.ml
+#
+# AUTHOR(S):   Pietro Zambelli (University of Trento)
+#
+# COPYRIGHT:	(C) 2013 by the GRASS Development Team
+#
+#		This program is free software under the GNU General Public
+#		License (>=v2). Read the file COPYING that comes with GRASS
+#		for details.
+#
+#############################################################################
+
+#%Module
+#%  description: Vector
+#%  keywords: machine learning
+#%  keywords: classification
+#%  overwrite: yes
+#%End
+#%option G_OPT_V_MAP
+#%  key: vector
+#%  description: Name of input vector map
+#%  required: yes
+#%end
+#%option G_OPT_V_MAP
+#%  key: vtraining
+#%  description: Name of training vector map
+#%  required: no
+#%end
+#%option
+#%  key: vlayer
+#%  type: string
+#%  multiple: no
+#%  description: layer name or number to use for the machine learning
+#%  required: no
+#%end
+#%option
+#%  key: tlayer
+#%  type: string
+#%  multiple: no
+#%  description: layer number/name for the training layer
+#%  required: no
+#%end
+#%option
+#%  key: rlayer
+#%  type: string
+#%  multiple: no
+#%  description: layer number/name for the ML results
+#%  required: no
+#%end
+#%option
+#%  key: npy_data
+#%  type: string
+#%  multiple: no
+#%  description: Data with statistics in npy format.
+#%  answer: data.npy
+#%  required: no
+#%end
+#%option
+#%  key: npy_cats
+#%  type: string
+#%  multiple: no
+#%  description: Numpy array with vector cats.
+#%  answer: cats.npy
+#%  required: no
+#%end
+#%option
+#%  key: npy_tdata
+#%  type: string
+#%  multiple: no
+#%  description: training npy file with training set, default: training_data.npy
+#%  answer: training_data.npy
+#%  required: no
+#%end
+#%option
+#%  key: npy_tclasses
+#%  type: string
+#%  multiple: no
+#%  description: training npy file with the classes, default: training_classes.npy
+#%  answer: training_classes.npy
+#%  required: no
+#%end
+#%option
+#%  key: npy_btdata
+#%  type: string
+#%  multiple: no
+#%  description: training npy file with training set, default: training_data.npy
+#%  answer: Xbt.npy
+#%  required: no
+#%end
+#%option
+#%  key: npy_btclasses
+#%  type: string
+#%  multiple: no
+#%  description: training npy file with the classes, default: training_classes.npy
+#%  answer: Ybt.npy
+#%  required: no
+#%end
+#%option
+#%  key: scalar
+#%  type: string
+#%  multiple: yes
+#%  description: scaler method, center the data before scaling, if no, not scale at all
+#%  required: no
+#%  answer: with_mean,with_std
+#%end
+#%option
+#%  key: n_training
+#%  type: integer
+#%  multiple: no
+#%  description: Number of random training to training the machine learning
+#%  required: no
+#%end
+#%option
+#%  key: pyclassifiers
+#%  type: string
+#%  multiple: no
+#%  description: a python file with classifiers
+#%  required: no
+#%end
+#%option
+#%  key: pyvar
+#%  type: string
+#%  multiple: no
+#%  description: name of the python variable that must be a list of dictionary
+#%  required: no
+#%end
+#%option
+#%  key: pyindx
+#%  type: string
+#%  multiple: no
+#%  description: specify the index of the classifiers that you want to use
+#%  required: no
+#%end
+#%option
+#%  key: pyindx_optimize
+#%  type: string
+#%  multiple: no
+#%  description: Index of the classifiers to optimize the training set
+#%  required: no
+#%end
+#%option
+#%  key: nan
+#%  type: double
+#%  multiple: no
+#%  description: Value to use to substitute NaN
+#%  required: no
+#%end
+#%option
+#%  key: inf
+#%  type: double
+#%  multiple: no
+#%  description: Value to use to substitute NaN
+#%  required: no
+#%end
+#%option
+#%  key: csv
+#%  type: string
+#%  multiple: no
+#%  description: csv file name with tha accuracy of different machine learning
+#%  required: no
+#%end
+#%option
+#%  key: svc_c_range
+#%  type: double
+#%  multiple: yes
+#%  description: C value list
+#%  required: no
+#%  answer: 1e-2,1e-1,1e0,1e1,1e2,1e3,1e4,1e5,1e6,1e7,1e8
+#%end
+#%option
+#%  key: svc_gamma_range
+#%  type: double
+#%  multiple: yes
+#%  description: gamma value list
+#%  required: no
+#%  answer: 1e-6,1e-5,1e-4,1e-3,1e-2,1e-1,1e0,1e1,1e2,1e3,1e4
+#%end
+#%option
+#%  key: svc_kernel_range
+#%  type: string
+#%  multiple: yes
+#%  description: kernel value list
+#%  required: no
+#%  answer: linear,poly,rbf,sigmoid
+#%end
+#%option
+#%  key: svc_n_jobs
+#%  type: integer
+#%  multiple: no
+#%  description: number of jobs
+#%  required: no
+#%  answer: 1
+#%end
+#%option
+#%  key: svc_c
+#%  type: double
+#%  multiple: no
+#%  description: C value
+#%  required: no
+#%end
+#%option
+#%  key: svc_gamma
+#%  type: double
+#%  multiple: no
+#%  description: gamma value
+#%  required: no
+#%end
+#%option
+#%  key: svc_kernel
+#%  type: string
+#%  multiple: no
+#%  description: Available kernel are: ‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’
+#%  required: no
+#%  answer: rbf
+#%end
+#%option
+#%  key: svc_img
+#%  type: string
+#%  multiple: no
+#%  description: filename with the image od SVC parameter
+#%  required: no
+#%  answer: domain_%s.svg
+#%end
+#%option
+#%  key: rst_names
+#%  type: string
+#%  multiple: no
+#%  description: filename with the image od SVC parameter
+#%  required: no
+#%  answer: %s
+#%end
+#-----------------------------------------------------
+#%flag
+#%  key: e
+#%  description: Extract the training set from the vtraining map
+#%end
+#%flag
+#%  key: n
+#%  description: Export to numpy files
+#%end
+#%flag
+#%  key: b
+#%  description: Balance the training using the class with the minor number of areas
+#%end
+#%flag
+#%  key: o
+#%  description: optimize the training samples
+#%end
+#%flag
+#%  key: c
+#%  description: Classify the whole dataset
+#%end
+#%flag
+#%  key: r
+#%  description: Export the classify resutls to raster maps
+#%end
+#%flag
+#%  key: t
+#%  description: Test different classification methods
+#%end
+#%flag
+#%  key: d
+#%  description: Explore the SVC domain
+#%end
+#-----------------------------------------------------
+"""
+v.category input=seg005_64 at pietro layer=1,2,3,4,5,6,7,8,9 type=point,line,centroid,area,face output=seg005_64_new option=transfer
+
+v.category input=seg005_64_new option=report
+
+i.pca -n input=Combabula_Nearmap.red at PERMANENT,Combabula_Nearmap.green at PERMANENT,Combabula_Nearmap.blue at PERMANENT output_prefix=pca
+PC1      2.78 ( 0.5757, 0.5957, 0.5601) [92.83%]
+PC2      0.20 ( 0.6002, 0.1572,-0.7842) [ 6.81%]
+PC3      0.01 ( 0.5552,-0.7877, 0.2670) [ 0.36%]
+
+time r.texture -a input=pca.1 at pietro prefix=pca5_ size=5 --o
+time r.texture -a input=pca.1 at pietro prefix=pca3_ size=3 --o
+echo finish
+"""
+from __future__ import (absolute_import, division, print_function,
+                        unicode_literals)
+import imp
+import sys
+import os
+
+import numpy as np
+from sklearn.preprocessing import StandardScaler
+from sklearn.svm import SVC
+
+from grass.pygrass.functions import get_lib_path
+from grass.pygrass.messages import Messenger
+from grass.pygrass.vector import Vector
+from grass.pygrass.modules import Module
+from grass.script.core import parser, overwrite
+
+path = get_lib_path("v.class.ml", "")
+if path is None:
+    raise ImportError("Not able to find the path %s directory." % path)
+
+sys.path.append(path)
+
+
+from training_extraction import extract_training
+from ml_classifiers import CLASSIFIERS
+from ml_functions import (balance, explorer_clsfiers, run_classifier,
+                          optimize_training, explore_SVC, plot_grid)
+from sqlite2npy import save2npy
+from npy2table import export_results
+
+
+def get_indexes(string, sep=',', rangesep='-'):
+    """
+    >>> indx = '1-5,34-36,40'
+    >>> [i for i in get_indexes(indx)]
+    [1, 2, 3, 4, 5, 34, 35, 36, 40]
+    """
+    for ind in string.split(sep):
+        if rangesep in ind:
+            start, stop = ind.split(rangesep)
+            for i in range(int(start), int(stop) + 1):
+                yield i
+        else:
+            yield int(ind)
+
+
+def get_colors(vtraining):
+    with Vector(vtraining, mode='r') as vct:
+        cur = vct.table.execute('SELECT cat, color FROM %s;' % vct.name)
+        return dict([c for c in cur.fetchall()])
+
+def main(opt, flg):
+    msgr = Messenger()
+    indexes = None
+    vect = opt['vector']
+    vtraining = opt['vtraining'] if opt['vtraining'] else None
+    scaler = None
+    vlayer = opt['vlayer'] if opt['vlayer'] else vect + '_stats'
+    tlayer = opt['tlayer'] if opt['tlayer'] else vect + '_training'
+    rlayer = opt['rlayer'] if opt['rlayer'] else vect + '_results'
+
+    if opt['scalar']:
+        scapar = opt['scalar'].split(',')
+        scaler = StandardScaler(with_mean='with_mean' in scapar,
+                                with_std='with_std' in scapar)
+    # if training extract training
+    if vtraining and flg['e']:
+        msgr.message("Extract training from: <%s>." % vtraining)
+        extract_training(vect, vtraining, tlayer)
+        flg['n'] = True
+
+    if flg['n']:
+        msgr.message("Save arrays to npy files.")
+        save2npy(vect, vlayer, tlayer)
+
+    # define the classifiers to use/test
+    if opt['pyclassifiers'] and opt['pyvar']:
+        # import classifiers to use
+        mycls = imp.load_source("mycls", opt['pyclassifiers'])
+        classifiers = getattr(mycls, opt['pyvar'])
+    else:
+        classifiers = CLASSIFIERS
+
+    # Append the SVC classifier
+    if opt['svc_c'] and opt['svc_gamma']:
+            svc = {'name': 'SVC', 'classifier': SVC,
+                   'kwargs': {'C': float(opt['svc_c']),
+                              'gamma': float(opt['svc_gamma']),
+                              'kernel': opt['svc_kernel']}}
+            classifiers.append(svc)
+
+    # extract classifiers from pyindx
+    if opt['pyindx']:
+        indexes = [i for i in get_indexes(opt['pyindx'])]
+        classifiers = [classifiers[i] for i in indexes]
+
+    csv = open(opt['csv'], 'w') if opt['csv'] else sys.stdout
+    num = int(opt['n_training']) if opt['n_training'] else None
+
+    # load fron npy files
+    Xt = np.load(opt['npy_tdata'])
+    Yt = np.load(opt['npy_tclasses'])
+    clsses = sorted(set(Yt))
+
+    # Substitute NaN
+    if opt['nan']:
+        msgr.message("Substitute NaN values with: <%g>" % float(opt['nan']))
+        Xt[np.isnan(Xt)] = float(opt['nan'])
+    if opt['inf']:
+        msgr.message("Substitute Inf values with: <%g>" % float(opt['inf']))
+        Xt[np.isinf(Xt)] = float(opt['inf'])
+
+    # optimize the training set
+    if flg['o']:
+        ind_optimize = (int(opt['pyindx_optimize']) if opt['pyindx_optimize']
+                        else 0)
+        cls = classifiers[ind_optimize]
+        msgr.message("Find the optimum training set.")
+        best, Xbt, Ybt = optimize_training(cls, Xt, Yt, scaler,
+                                           num=num, maxiterations=1000)
+        msg = "    - save the optimum training data set to: %s."
+        msgr.message(msg % opt['npy_btdata'])
+        np.save(opt['npy_btdata'], Xbt)
+        msg = "    - save the optimum training classes set to: %s."
+        msgr.message(msg % opt['npy_btclasses'])
+        np.save(opt['npy_btclasses'], Ybt)
+
+    # balance the data
+    if flg['b']:
+        msg = "Balancing the training data set, each class have <%d> samples."
+        msgr.message(msg % num)
+        Xbt, Ybt = balance(Xt, Yt, num)
+    else:
+        if not flg['o']:
+            Xbt = (np.load(opt['npy_btdata'])
+                   if os.path.isfile(opt['npy_btdata']) else Xt)
+            Ybt = (np.load(opt['npy_btclasses'])
+                   if os.path.isfile(opt['npy_btclasses']) else Yt)
+
+    # scale the data
+    if scaler:
+        msgr.message("Scaling the training data set.")
+        scaler.fit(Xbt, Ybt)
+        Xt = scaler.transform(Xt)
+        Xbt = scaler.transform(Xbt)
+
+    if flg['d']:
+        C_range = [float(c) for c in opt['svc_c_range'].split(',')]
+        gamma_range = [float(g) for g in opt['svc_gamma_range'].split(',')]
+        kernel_range = [str(s) for s in opt['svc_kernel_range'].split(',')]
+        msgr.message("Exploring the SVC domain.")
+        grid = explore_SVC(Xbt, Ybt, n_folds=3, n_jobs=int(opt['svc_n_jobs']),
+                           C=C_range, gamma=gamma_range, kernel=kernel_range)
+        plot_grid(grid, save=opt['svc_img'])
+
+    # test the accuracy of different classifiers
+    if flg['t']:
+        # test different classifiers
+        msgr.message("Exploring different classifiers.")
+        explorer_clsfiers(classifiers, Xbt, Ybt, Xt, Yt, clsses, indexes, csv)
+
+    if flg['c']:
+        # classify
+        cols = []
+        data = np.load(opt['npy_data'])
+        if opt['nan']:
+            msg = "Substitute NaN values with: <%g>" % float(opt['nan'])
+            msgr.message(msg)
+            data[np.isnan(data)] = float(opt['nan'])
+        if opt['inf']:
+            msg = "Substitute Inf values with: <%g>" % float(opt['inf'])
+            msgr.message(msg)
+            data[np.isinf(data)] = float(opt['inf'])
+
+        msgr.message("Scaling the whole data set.")
+        data = scaler.transform(data) if scaler else data
+        cats = np.load(opt['npy_cats'])
+
+        for cls in classifiers:
+            run_classifier(cls, Xbt, Ybt, Xt, Yt, clsses, data, save=csv)
+            cols.append((cls['name'], 'INTEGER'))
+
+#        import pickle
+#        res = open('res.pkl', 'r')
+#        classifiers = pickle.load(res)
+        msgr.message("Export the results to layer: <%s>" % str(rlayer))
+        export_results(vect, classifiers, cats, rlayer, vtraining, cols,
+                       overwrite(), pkl='res.pkl')
+#        res.close()
+
+    if flg['r']:
+        rules = ('\n'.join(['%d %s' % (k, v)
+                            for k, v in get_colors(vtraining).items()])
+                 if vtraining else None)
+
+        msgr.message("Export the layer with results to raster")
+        with Vector(vect, mode='r') as vct:
+            tab = vct.dblinks.by_name(rlayer).table()
+            rasters = [c for c in tab.columns]
+            rasters.remove(tab.key)
+
+        import ipdb; ipdb.set_trace()
+        v2rst = Module('v.to.rast')
+        rclrs = Module('r.colors')
+        for rst in rasters:
+            v2rst(input=vect, layer=rlayer, type='area',
+                  use='attr', attrcolumn=rst, output=opt['rst_names'] % rst,
+                  rows=4096 * 4, overwrite=overwrite())
+            if rules:
+                rclrs(map=rst, rules='-', stdin_=rules)
+
+
+
+if __name__ == "__main__":
+    main(*parser())


Property changes on: grass-addons/grass7/vector/v.class.ml/v.class.ml.py
___________________________________________________________________
Added: svn:executable
   + *



More information about the grass-commit mailing list