[GRASS-SVN] r58638 - in grass-addons/grass7/vector: . v.class.ml
svn_grass at osgeo.org
svn_grass at osgeo.org
Tue Jan 7 08:18:40 PST 2014
Author: zarch
Date: 2014-01-07 08:18:40 -0800 (Tue, 07 Jan 2014)
New Revision: 58638
Added:
grass-addons/grass7/vector/v.class.ml/
grass-addons/grass7/vector/v.class.ml/Makefile
grass-addons/grass7/vector/v.class.ml/ml_classifiers.py
grass-addons/grass7/vector/v.class.ml/ml_functions.py
grass-addons/grass7/vector/v.class.ml/npy2table.py
grass-addons/grass7/vector/v.class.ml/sqlite2npy.py
grass-addons/grass7/vector/v.class.ml/test_rpc.py
grass-addons/grass7/vector/v.class.ml/training_extraction.py
grass-addons/grass7/vector/v.class.ml/v.class.ml.html
grass-addons/grass7/vector/v.class.ml/v.class.ml.py
Log:
Add a new module for machine learning classification of vector maps
Added: grass-addons/grass7/vector/v.class.ml/Makefile
===================================================================
--- grass-addons/grass7/vector/v.class.ml/Makefile (rev 0)
+++ grass-addons/grass7/vector/v.class.ml/Makefile 2014-01-07 16:18:40 UTC (rev 58638)
@@ -0,0 +1,10 @@
+MODULE_TOPDIR = ../..
+
+PGM = v.class.ml
+
+ETCFILES = training_extraction ml_classifiers ml_functions sqlite2npy npy2table
+
+include $(MODULE_TOPDIR)/include/Make/Script.make
+include $(MODULE_TOPDIR)/include/Make/Python.make
+
+default: script
Added: grass-addons/grass7/vector/v.class.ml/ml_classifiers.py
===================================================================
--- grass-addons/grass7/vector/v.class.ml/ml_classifiers.py (rev 0)
+++ grass-addons/grass7/vector/v.class.ml/ml_classifiers.py 2014-01-07 16:18:40 UTC (rev 58638)
@@ -0,0 +1,393 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Nov 6 15:08:38 2013
+
+ at author: pietro
+"""
+from __future__ import (absolute_import, division, print_function,
+ unicode_literals)
+from gettext import lgettext as _
+
+from sklearn.linear_model import SGDClassifier
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.neighbors import (NearestNeighbors,
+ KNeighborsClassifier,
+ RadiusNeighborsClassifier,
+ NearestCentroid)
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.naive_bayes import GaussianNB
+
+
+from grass.pygrass.messages import Messenger
+
+MSGR = Messenger()
+
+try:
+ import mlpy
+except ImportError:
+ MSGR.warning(_("MLPY not found in the current python path"
+ "check that is installed or set the python path."
+ "Only `sklearn` will be used."))
+ mlpy = None
+
+
+COLS = [('cat', 'INTEGER PRIMARY KEY'),
+ ('class', 'INTEGER'),
+ ('color', 'VARCHAR(11)'), ]
+
+
+# Unsupervisioned
+#nbrs = NearestNeighbors(n_neighbors=8,
+# algorithm='ball_tree').fit(data)
+#distances, indices = nbrs.kneighbors(data)
+
+
+CLASSIFIERS = [
+ #
+ # Stochastic Gradient Descent (SGD)
+ #
+ {'name': 'sgd_hinge_l2', 'classifier': SGDClassifier,
+ 'kwargs': {'loss': "hinge", 'penalty': "l2"}},
+ {'name': 'sgd_huber_l2', 'classifier': SGDClassifier,
+ 'kwargs': {'loss': "modified_huber", 'penalty': "l2"}},
+ {'name': 'sgd_log_l2', 'classifier': SGDClassifier,
+ 'kwargs': {'loss': "log", 'penalty': "l2"}},
+ {'name': 'sgd_hinge_l1', 'classifier': SGDClassifier,
+ 'kwargs': {'loss': "hinge", 'penalty': "l1"}},
+ {'name': 'sgd_huber_l1', 'classifier': SGDClassifier,
+ 'kwargs': {'loss': "modified_huber", 'penalty': "l1"}},
+ {'name': 'sgd_log_l1', 'classifier': SGDClassifier,
+ 'kwargs': {'loss': "log", 'penalty': "l1"}},
+ {'name': 'sgd_hinge_elastic', 'classifier': SGDClassifier,
+ 'kwargs': {'loss': "hinge", 'penalty': "elasticnet"}},
+ {'name': 'sgd_huber_elastic', 'classifier': SGDClassifier,
+ 'kwargs': {'loss': "modified_huber", 'penalty': "elasticnet"}},
+ {'name': 'sgd_log_elastic', 'classifier': SGDClassifier,
+ 'kwargs': {'loss': "log", 'penalty': "elasticnet"}},
+ #
+ # K-NN
+ #
+ # uniform
+ {'name': 'knn2_uniform', 'classifier': KNeighborsClassifier,
+ 'kwargs': {'n_neighbors': 2, 'weights': 'uniform'}},
+ {'name': 'knn4_uniform', 'classifier': KNeighborsClassifier,
+ 'kwargs': {'n_neighbors': 4, 'weights': 'uniform'}},
+ {'name': 'knn8_uniform', 'classifier': KNeighborsClassifier,
+ 'kwargs': {'n_neighbors': 8, 'weights': 'uniform'}},
+ {'name': 'knn16_uniform', 'classifier': KNeighborsClassifier,
+ 'kwargs': {'n_neighbors': 16, 'weights': 'uniform'}},
+ # distance
+ {'name': 'knn2_distance', 'classifier': KNeighborsClassifier,
+ 'kwargs': {'n_neighbors': 2, 'weights': 'distance'}},
+ {'name': 'knn4_distance', 'classifier': KNeighborsClassifier,
+ 'kwargs': {'n_neighbors': 4, 'weights': 'distance'}},
+ {'name': 'knn8_distance', 'classifier': KNeighborsClassifier,
+ 'kwargs': {'n_neighbors': 8, 'weights': 'distance'}},
+ {'name': 'knn16_distance', 'classifier': KNeighborsClassifier,
+ 'kwargs': {'n_neighbors': 16, 'weights': 'distance'}},
+ # radius
+ {'name': 'knn_radius_0p5_uniform',
+ 'classifier': RadiusNeighborsClassifier,
+ 'kwargs': {'radius': 0.5, 'weights': 'uniform'}},
+ {'name': 'knn_radius_1_uniform',
+ 'classifier': RadiusNeighborsClassifier,
+ 'kwargs': {'radius': 1., 'weights': 'uniform'}},
+ {'name': 'knn_radius_1p5_uniform',
+ 'classifier': RadiusNeighborsClassifier,
+ 'kwargs': {'radius': 1.5, 'weights': 'uniform'}},
+ {'name': 'knn_radius_2_uniform',
+ 'classifier': RadiusNeighborsClassifier,
+ 'kwargs': {'radius': 2., 'weights': 'uniform'}},
+ {'name': 'knn_radius_2p5_uniform',
+ 'classifier': RadiusNeighborsClassifier,
+ 'kwargs': {'radius': 2.5, 'weights': 'uniform'}},
+ {'name': 'knn_radius_5_uniform',
+ 'classifier': RadiusNeighborsClassifier,
+ 'kwargs': {'radius': 5., 'weights': 'uniform'}},
+
+ {'name': 'knn_radius_0p5_distance',
+ 'classifier': RadiusNeighborsClassifier,
+ 'kwargs': {'radius': 0.5, 'weights': 'distance'}},
+ {'name': 'knn_radius_1_distance',
+ 'classifier': RadiusNeighborsClassifier,
+ 'kwargs': {'radius': 1., 'weights': 'distance'}},
+ {'name': 'knn_radius_1p5_distance',
+ 'classifier': RadiusNeighborsClassifier,
+ 'kwargs': {'radius': 1.5, 'weights': 'distance'}},
+ {'name': 'knn_radius_2_distance',
+ 'classifier': RadiusNeighborsClassifier,
+ 'kwargs': {'radius': 2., 'weights': 'distance'}},
+ {'name': 'knn_radius_2p5_distance',
+ 'classifier': RadiusNeighborsClassifier,
+ 'kwargs': {'radius': 2.5, 'weights': 'distance'}},
+ {'name': 'knn_radius_5_distance',
+ 'classifier': RadiusNeighborsClassifier,
+ 'kwargs': {'radius': 5., 'weights': 'distance'}},
+ # centroid
+ # ‘euclidean’, ‘l2’, ‘l1’, ‘manhattan’, ‘cityblock’
+ # [‘braycurtis’, ‘canberra’, ‘chebyshev’, ‘correlation’, ‘cosine’, ‘dice’, ‘hamming’, ‘jaccard’, ‘kulsinski’, ‘mahalanobis’, ‘matching’, ‘minkowski’, ‘rogerstanimoto’, ‘russellrao’, ‘seuclidean’, ‘sokalmichener’, ‘sokalsneath’, ‘sqeuclidean’, ‘yule’]
+ {'name': 'knn_centroid_euclidean_none', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'euclidean', 'shrink_threshold ': None}},
+ {'name': 'knn_centroid_euclidean_0p5', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'euclidean', 'shrink_threshold ': 0.5}},
+ {'name': 'knn_centroid_euclidean_1', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'euclidean', 'shrink_threshold ': 1.0}},
+ {'name': 'knn_centroid_euclidean_1p5', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'euclidean', 'shrink_threshold ': 1.5}},
+ {'name': 'knn_centroid_euclidean_2', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'euclidean', 'shrink_threshold ': 2.0}},
+
+ {'name': 'knn_centroid_l2_none', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'l2', 'shrink_threshold ': None}},
+ {'name': 'knn_centroid_l2_0p5', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'l2', 'shrink_threshold ': 0.5}},
+ {'name': 'knn_centroid_l2_1', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'l2', 'shrink_threshold ': 1.0}},
+ {'name': 'knn_centroid_l2_1p5', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'l2', 'shrink_threshold ': 1.5}},
+ {'name': 'knn_centroid_l2_2', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'l2', 'shrink_threshold ': 2.0}},
+
+ {'name': 'knn_centroid_l1_none', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'l1', 'shrink_threshold ': None}},
+ {'name': 'knn_centroid_l1_0p5', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'l1', 'shrink_threshold ': 0.5}},
+ {'name': 'knn_centroid_l1_1', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'l1', 'shrink_threshold ': 1.0}},
+ {'name': 'knn_centroid_l1_1p5', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'l1', 'shrink_threshold ': 1.5}},
+ {'name': 'knn_centroid_l1_2', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'l1', 'shrink_threshold ': 2.0}},
+
+ {'name': 'knn_centroid_manhattan_none', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'manhattan', 'shrink_threshold ': None}},
+ {'name': 'knn_centroid_manhattan_0p5', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'manhattan', 'shrink_threshold ': 0.5}},
+ {'name': 'knn_centroid_manhattan_1', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'manhattan', 'shrink_threshold ': 1.0}},
+ {'name': 'knn_centroid_manhattan_1p5', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'manhattan', 'shrink_threshold ': 1.5}},
+ {'name': 'knn_centroid_manhattan_2', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'manhattan', 'shrink_threshold ': 2.0}},
+
+ {'name': 'knn_centroid_cityblock_none', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'cityblock', 'shrink_threshold ': None}},
+ {'name': 'knn_centroid_cityblock_0p5', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'cityblock', 'shrink_threshold ': 0.5}},
+ {'name': 'knn_centroid_cityblock_1', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'cityblock', 'shrink_threshold ': 1.0}},
+ {'name': 'knn_centroid_cityblock_1p5', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'cityblock', 'shrink_threshold ': 1.5}},
+ {'name': 'knn_centroid_cityblock_2', 'classifier': NearestCentroid,
+ 'kwargs': {'metric': 'cityblock', 'shrink_threshold ': 2.0}},
+ #
+ # Tree
+ #
+ {'name': 'd_tree_gini', 'classifier': DecisionTreeClassifier,
+ 'kwargs': {'criterion': 'gini', 'splitter': 'best', 'max_depth': None,
+ 'min_samples_split': 2, 'min_samples_leaf': 1,
+ 'max_features': None, 'random_state': None,
+ 'min_density': None}},
+ {'name': 'd_tree_gini_sqrt', 'classifier': DecisionTreeClassifier,
+ 'kwargs': {'criterion': 'gini', 'max_depth': 'sqrt'}},
+ {'name': 'd_tree_gini_log2', 'classifier': DecisionTreeClassifier,
+ 'kwargs': {'criterion': 'gini', 'max_depth': 'log2'}},
+ {'name': 'd_tree_gini_0p25', 'classifier': DecisionTreeClassifier,
+ 'kwargs': {'criterion': 'gini', 'max_depth': 0.25}},
+ {'name': 'd_tree_gini_0p50', 'classifier': DecisionTreeClassifier,
+ 'kwargs': {'criterion': 'gini', 'max_depth': 0.5}},
+ {'name': 'd_tree_gini_0p75', 'classifier': DecisionTreeClassifier,
+ 'kwargs': {'criterion': 'gini', 'max_depth': 0.75}},
+
+ {'name': 'd_tree_entropy', 'classifier': DecisionTreeClassifier,
+ 'kwargs': {'criterion': 'entropy', 'splitter': 'best', 'max_depth': None,
+ 'min_samples_split': 2, 'min_samples_leaf': 1,
+ 'max_features': None, 'random_state': None,
+ 'min_density': None}},
+ {'name': 'd_tree_entropy_sqrt', 'classifier': DecisionTreeClassifier,
+ 'kwargs': {'criterion': 'entropy', 'max_depth': 'sqrt'}},
+ {'name': 'd_tree_entropy_log2', 'classifier': DecisionTreeClassifier,
+ 'kwargs': {'criterion': 'entropy', 'max_depth': 'log2'}},
+ {'name': 'd_tree_entropy_0p25', 'classifier': DecisionTreeClassifier,
+ 'kwargs': {'criterion': 'entropy', 'max_depth': 0.25}},
+ {'name': 'd_tree_entropy_0p50', 'classifier': DecisionTreeClassifier,
+ 'kwargs': {'criterion': 'entropy', 'max_depth': 0.5}},
+ {'name': 'd_tree_entropy_0p75', 'classifier': DecisionTreeClassifier,
+ 'kwargs': {'criterion': 'entropy', 'max_depth': 0.75}},
+
+ {'name': 'rand_tree_gini', 'classifier': RandomForestClassifier,
+ 'kwargs': {'criterion': 'gini', 'splitter': 'best', 'max_depth': None,
+ 'min_samples_split': 2, 'min_samples_leaf': 1,
+ 'max_features': None, 'random_state': None,
+ 'min_density': None}},
+ {'name': 'rand_tree_gini_sqrt', 'classifier': RandomForestClassifier,
+ 'kwargs': {'criterion': 'gini', 'max_depth': 'sqrt'}},
+ {'name': 'rand_tree_gini_log2', 'classifier': RandomForestClassifier,
+ 'kwargs': {'criterion': 'gini', 'max_depth': 'log2'}},
+ {'name': 'rand_tree_gini_0p25', 'classifier': RandomForestClassifier,
+ 'kwargs': {'criterion': 'gini', 'max_depth': 0.25}},
+ {'name': 'rand_tree_gini_0p50', 'classifier': RandomForestClassifier,
+ 'kwargs': {'criterion': 'gini', 'max_depth': 0.5}},
+ {'name': 'rand_tree_gini_0p75', 'classifier': RandomForestClassifier,
+ 'kwargs': {'criterion': 'gini', 'max_depth': 0.75}},
+
+ {'name': 'rand_tree_entropy', 'classifier': RandomForestClassifier,
+ 'kwargs': {'criterion': 'entropy', 'splitter': 'best', 'max_depth': None,
+ 'min_samples_split': 2, 'min_samples_leaf': 1,
+ 'max_features': None, 'random_state': None,
+ 'min_density': None}},
+ {'name': 'rand_tree_entropy_sqrt', 'classifier': RandomForestClassifier,
+ 'kwargs': {'criterion': 'entropy', 'max_depth': 'sqrt'}},
+ {'name': 'rand_tree_entropy_log2', 'classifier': RandomForestClassifier,
+ 'kwargs': {'criterion': 'entropy', 'max_depth': 'log2'}},
+ {'name': 'rand_tree_entropy_0p25', 'classifier': RandomForestClassifier,
+ 'kwargs': {'criterion': 'entropy', 'max_depth': 0.25}},
+ {'name': 'rand_tree_entropy_0p50', 'classifier': RandomForestClassifier,
+ 'kwargs': {'criterion': 'entropy', 'max_depth': 0.5}},
+ {'name': 'rand_tree_entropy_0p75', 'classifier': RandomForestClassifier,
+ 'kwargs': {'criterion': 'entropy', 'max_depth': 0.75}},
+
+ #
+ # Gausian
+ #
+ {'name': 'gaussianNB', 'classifier': GaussianNB},
+]
+
+
+class MLPYWrapper(object):
+ def __init__(self, cls):
+ self.cls = cls
+ self.mlcls = None
+ self.wrap = dict(fit='learn', predict='pred')
+
+ def __getattr__(self, name):
+ if self.mlcls and name in self.wrap.keys():
+ return getattr(self.mlcls, self.wrap[name])
+ return super(MLPYWrapper, self).__getattr__(self, name)
+
+ def __call__(self, *args, **kwargs):
+ self.mlcls = self.cls(*args, **kwargs)
+
+
+if mlpy is not None:
+ MLPY_CLS = [
+ #
+ # Linear Discriminant Analysis Classifier (LDAC)
+ #
+ {'name': 'mlpy_LDAC_1', 'classifier': MLPYWrapper(mlpy.LDAC)},
+ #
+ # Elastic Net Classifier
+ #
+ {'name': 'mlpy_ElasticNetC_0.1_0.1',
+ 'classifier': MLPYWrapper(mlpy.ElasticNetC),
+ 'kwargs': {'lmb': 0.1, 'eps': 0.1, 'supp': True, 'tol': 0.01}},
+ {'name': 'mlpy_ElasticNetC_0.1_0.01',
+ 'classifier': MLPYWrapper(mlpy.ElasticNetC),
+ 'kwargs': {'lmb': 0.1, 'eps': 0.01, 'supp': True, 'tol': 0.01}},
+ {'name': 'mlpy_ElasticNetC_0.1_0.001',
+ 'classifier': MLPYWrapper(mlpy.ElasticNetC),
+ 'kwargs': {'lmb': 0.1, 'eps': 0.001, 'supp': True, 'tol': 0.01}},
+ {'name': 'mlpy_ElasticNetC_0.01_0.1',
+ 'classifier': MLPYWrapper(mlpy.ElasticNetC),
+ 'kwargs': {'lmb': 0.01, 'eps': 0.1, 'supp': True, 'tol': 0.01}},
+ {'name': 'mlpy_ElasticNetC_0.01_0.01',
+ 'classifier': MLPYWrapper(mlpy.ElasticNetC),
+ 'kwargs': {'lmb': 0.01, 'eps': 0.01, 'supp': True, 'tol': 0.01}},
+ {'name': 'mlpy_ElasticNetC_0.01_0.001',
+ 'classifier': MLPYWrapper(mlpy.ElasticNetC),
+ 'kwargs': {'lmb': 0.01, 'eps': 0.001, 'supp': True, 'tol': 0.01}},
+ {'name': 'mlpy_ElasticNetC_0.001_0.1',
+ 'classifier': MLPYWrapper(mlpy.ElasticNetC),
+ 'kwargs': {'lmb': 0.001, 'eps': 0.1, 'supp': True, 'tol': 0.01}},
+ {'name': 'mlpy_ElasticNetC_0.001_0.01',
+ 'classifier': MLPYWrapper(mlpy.ElasticNetC),
+ 'kwargs': {'lmb': 0.001, 'eps': 0.01, 'supp': True, 'tol': 0.01}},
+ {'name': 'mlpy_ElasticNetC_0.001_0.001',
+ 'classifier': MLPYWrapper(mlpy.ElasticNetC),
+ 'kwargs': {'lmb': 0.001, 'eps': 0.001, 'supp': True, 'tol': 0.01}},
+ #
+ # Diagonal Linear Discriminant Analysis (DLDA)
+ #
+ {'name': 'mlpy_DLDA_0.01', 'classifier': MLPYWrapper(mlpy.DLDA),
+ 'kwargs': {'delta': 0.01}},
+ {'name': 'mlpy_DLDA_0.05', 'classifier': MLPYWrapper(mlpy.DLDA),
+ 'kwargs': {'delta': 0.05}},
+ {'name': 'mlpy_DLDA_0.1', 'classifier': MLPYWrapper(mlpy.DLDA),
+ 'kwargs': {'delta': 0.1}},
+ {'name': 'mlpy_DLDA_0.5', 'classifier': MLPYWrapper(mlpy.DLDA),
+ 'kwargs': {'delta': 0.5}},
+ #
+ # mlpy.Golub
+ #
+ {'name': 'mlpy_Golub', 'classifier': MLPYWrapper(mlpy.Golub)},
+ #
+ # LibLinear
+ #
+ {'name': 'mlpy_liblin_l2r_lr',
+ 'classifier': MLPYWrapper(mlpy.LibLinear),
+ 'kwargs': {'solver_type': 'l2r_lr', 'C': 1, 'eps': 0.01}},
+ {'name': 'mlpy_liblin_l2r_l2loss_svc',
+ 'classifier': MLPYWrapper(mlpy.LibLinear),
+ 'kwargs': {'solver_type': 'l2r_l2loss_svc', 'C': 1, 'eps': 0.01}},
+ {'name': 'mlpy_liblin_l2r_l1loss_svc_dual',
+ 'classifier': MLPYWrapper(mlpy.LibLinear),
+ 'kwargs': {'solver_type': 'l2r_l1loss_svc_dual',
+ 'C': 1, 'eps': 0.01}},
+ {'name': 'mlpy_liblin_mcsvm_cs',
+ 'classifier': MLPYWrapper(mlpy.LibLinear),
+ 'kwargs': {'solver_type': 'mcsvm_cs', 'C': 1, 'eps': 0.01}},
+ {'name': 'mlpy_liblin_l1r_l2loss_svc',
+ 'classifier': MLPYWrapper(mlpy.LibLinear),
+ 'kwargs': {'solver_type': 'l1r_l2loss_svc', 'C': 1, 'eps': 0.01}},
+ {'name': 'mlpy_liblin_l1r_lr',
+ 'classifier': MLPYWrapper(mlpy.LibLinear),
+ 'kwargs': {'solver_type': 'l1r_lr', 'C': 1, 'eps': 0.01}},
+ {'name': 'mlpy_liblin_l2r_lr_dual',
+ 'classifier': MLPYWrapper(mlpy.LibLinear),
+ 'kwargs': {'solver_type': 'l2r_lr_dual', 'C': 1, 'eps': 0.01}},
+ #
+ # K-NN
+ #
+ {'name': 'mlpy_KNN_1', 'classifier': MLPYWrapper(mlpy.KNN),
+ 'kwargs': {'k': 1}},
+ {'name': 'mlpy_KNN_2', 'classifier': MLPYWrapper(mlpy.KNN),
+ 'kwargs': {'k': 2}},
+ {'name': 'mlpy_KNN_3', 'classifier': MLPYWrapper(mlpy.KNN),
+ 'kwargs': {'k': 3}},
+ {'name': 'mlpy_KNN_4', 'classifier': MLPYWrapper(mlpy.KNN),
+ 'kwargs': {'k': 4}},
+ {'name': 'mlpy_KNN_8', 'classifier': MLPYWrapper(mlpy.KNN),
+ 'kwargs': {'k': 8}},
+ {'name': 'mlpy_KNN_8', 'classifier': MLPYWrapper(mlpy.KNN),
+ 'kwargs': {'k': 16}},
+ #
+ # Tree
+ #
+ {'name': 'mlpy_tree_0_0', 'classifier': MLPYWrapper(mlpy.ClassTree),
+ 'kwargs': {'stumps': 0, 'minsize': 0}},
+ {'name': 'mlpy_tree_0_5', 'classifier': MLPYWrapper(mlpy.ClassTree),
+ 'kwargs': {'stumps': 0, 'minsize': 5}},
+ {'name': 'mlpy_tree_0_10', 'classifier': MLPYWrapper(mlpy.ClassTree),
+ 'kwargs': {'stumps': 0, 'minsize': 10}},
+ {'name': 'mlpy_tree_0_20', 'classifier': MLPYWrapper(mlpy.ClassTree),
+ 'kwargs': {'stumps': 0, 'minsize': 20}},
+ {'name': 'mlpy_tree_0_40', 'classifier': MLPYWrapper(mlpy.ClassTree),
+ 'kwargs': {'stumps': 0, 'minsize': 40}},
+ {'name': 'mlpy_tree_1_', 'classifier': MLPYWrapper(mlpy.ClassTree),
+ 'kwargs': {'stumps': 1, 'minsize': 0}},
+ {'name': 'mlpy_tree_1_5', 'classifier': MLPYWrapper(mlpy.ClassTree),
+ 'kwargs': {'stumps': 1, 'minsize': 5}},
+ {'name': 'mlpy_tree_1_10', 'classifier': MLPYWrapper(mlpy.ClassTree),
+ 'kwargs': {'stumps': 1, 'minsize': 10}},
+ {'name': 'mlpy_tree_1_20', 'classifier': MLPYWrapper(mlpy.ClassTree),
+ 'kwargs': {'stumps': 1, 'minsize': 20}},
+ {'name': 'mlpy_tree_1_40', 'classifier': MLPYWrapper(mlpy.ClassTree),
+ 'kwargs': {'stumps': 1, 'minsize': 40}},
+ #
+ # mlpy.MaximumLikelihoodC
+ #
+ #{'name': 'mlpy_maximumlike',
+ # 'classifier': MLPYWrapper(mlpy.MaximumLikelihoodC)},
+ ]
+ # add MLPY
+ CLASSIFIERS.extend(MLPY_CLS)
Added: grass-addons/grass7/vector/v.class.ml/ml_functions.py
===================================================================
--- grass-addons/grass7/vector/v.class.ml/ml_functions.py (rev 0)
+++ grass-addons/grass7/vector/v.class.ml/ml_functions.py 2014-01-07 16:18:40 UTC (rev 58638)
@@ -0,0 +1,201 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Nov 6 15:08:38 2013
+
+ at author: pietro
+"""
+from __future__ import (absolute_import, division, print_function,
+ unicode_literals)
+import time
+import random as rnd
+from gettext import lgettext as _
+import sys
+
+import numpy as np
+import pylab as pl
+
+
+from sklearn.metrics import accuracy_score
+from sklearn.cross_validation import StratifiedKFold
+from sklearn.grid_search import GridSearchCV
+from sklearn.svm import SVC
+
+from grass.pygrass.messages import Messenger
+
+MSGR = Messenger()
+
+
+COLS = [('cat', 'INTEGER PRIMARY KEY'),
+ ('class', 'INTEGER'),
+ ('color', 'VARCHAR(11)'), ]
+
+
+def print_cols(clss, sep=';', save=sys.stdout):
+ clsses = sorted(set(clss))
+ cols = ['ml_index', 'ml_name', 'fit_time', 'prediction_time',
+ 'tot_accuracy']
+ cols += [str(cls) for cls in clsses]
+ cols += ['mean', ]
+ print(sep.join(cols), file=save)
+
+
+def print_test(cls, timefmt='%.4fs', accfmt='%.5f', sep=';', save=sys.stdout):
+ res = [str(cls['index']) if 'index' in cls else 'None',
+ cls['name'],
+ timefmt % (cls['fit_stop'] - cls['fit_start']),
+ timefmt % (cls['pred_stop'] - cls['pred_start']),
+ accfmt % cls['t_acc'],
+ sep.join([accfmt % acc for acc in cls['c_acc']]),
+ accfmt % cls['c_acc_mean']]
+ print(sep.join(res), file=save)
+
+
+def accuracy(sol, cls=None, data=None, clss=None, pred=None):
+ cls = cls if cls else dict()
+ clsses = clss if clss else sorted(set(sol))
+ if 'cls' in cls:
+ cls['pred_start'] = time.time()
+ pred = cls['cls'].predict(data)
+ cls['pred_stop'] = time.time()
+
+ cls['t_acc'] = accuracy_score(sol, pred, normalize=True)
+ c_acc = []
+ for c in clsses:
+ indx = sol == c
+ c_acc.append(accuracy_score(sol[indx], pred[indx],
+ normalize=True))
+ cls['c_acc'] = np.array(c_acc)
+ cls['c_acc_mean'] = cls['c_acc'].mean()
+ return cls
+
+
+def test_classifier(cls, Xt, Yt, Xd, Yd, clss, save=sys.stdout,
+ verbose=True):
+ cls['cls'] = cls['classifier'](**cls.get('kwargs', {}))
+ cls['fit_start'] = time.time()
+ cls['cls'].fit(Xt, Yt)
+ cls['fit_stop'] = time.time()
+ try:
+ cls['params'] = cls['cls'].get_params()
+ except AttributeError:
+ cls['params'] = None
+ accuracy(Yd, cls, Xd, clss)
+ if verbose:
+ print_test(cls, save=save)
+
+
+def run_classifier(cls, Xt, Yt, Xd, Yd, clss, data,
+ save=sys.stdout):
+ test_classifier(cls, Xt, Yt, Xd, Yd, clss, verbose=False)
+ cls['pred_start'] = time.time()
+ cls['predict'] = cls['cls'].predict(data)
+ cls['pred_stop'] = time.time()
+ print_test(cls, save=save)
+ np.save(cls['name'] + '.npy', cls['predict'])
+
+
+def reduce_cls(Yt, subs):
+ Yr = np.copy(Yt)
+ for k in subs:
+ indx = Yr == k
+ Yr[indx] = subs[k]
+ return Yr
+
+
+def balance_cls(data, num):
+ indx = np.random.randint(0, len(data), size=num)
+ return data[indx]
+
+
+def balance(tdata, tclss, num=None):
+ clss = sorted(set(tclss))
+ num = num if num else min([len(tclss[tclss == c]) for c in clss])
+ dt = []
+ for c in clss:
+ dt.extend([(c, d) for d in balance_cls(tdata[tclss == c], num)])
+ rnd.shuffle(dt)
+ bclss = np.array([r[0] for r in dt], dtype=int)
+ bdata = np.array([r[1] for r in dt])
+ return bdata, bclss
+
+
+def optimize_training(cls, tdata, tclss,
+ scaler=None, num=None, maxiterations=1000):
+ best = cls.copy()
+ best['c_acc_mean'] = 0
+ means = []
+ for i in range(maxiterations): # TODO: use multicore
+ MSGR.percent(i, maxiterations, 1)
+ Xt, Yt = balance(tdata, tclss, num)
+ if scaler:
+ scaler.fit(Xt, Yt)
+ sXt = scaler.transform(Xt)
+ stdata = scaler.transform(tdata)
+ else:
+ sXt, stdata = Xt, tdata
+ test_classifier(cls, sXt, Yt, stdata, tclss, None, verbose=False)
+ if cls['c_acc_mean'] > best['c_acc_mean']:
+ print("%f > %f" % (cls['c_acc_mean'], best['c_acc_mean']))
+ best = cls.copy()
+ bXt, bYt = Xt, Yt
+ means.append(cls['c_acc_mean'])
+ means = np.array(means)
+ print("best accuracy: %f, number of iterations: %d" % (best['c_acc_mean'],
+ maxiterations))
+ print("mean of means: %f" % means.mean())
+ print("min of means: %f" % means.min())
+ print("max of means: %f" % means.max())
+ print("std of means: %f" % means.std())
+ return best, bXt, bYt
+
+
+def explorer_clsfiers(clsses, Xt, Yt, Xd, Yd, clss,
+ indexes=None, csv=sys.stdout):
+ errors = []
+ gen = zip(indexes, clsses) if indexes else enumerate(clsses)
+ print_cols(Yt, sep=';', save=csv)
+ for ind, cls in gen:
+ print(cls['name'], ind)
+ cls['index'] = ind
+ try:
+ test_classifier(cls, Xt, Yt, Xd, Yd, clss, csv)
+ except:
+ errors.append(cls)
+ for err in errors:
+ print('Error in: %s' % err['name'])
+
+
+def plot_grid(grid, save=''):
+ C = grid.param_grid['C']
+ gamma = grid.param_grid['gamma']
+
+ for kernel in grid.param_grid['kernel']:
+ scores = [x[1] for x in grid.grid_scores_ if x[0]['kernel'] == kernel]
+ scores = np.array(scores).reshape(len(C), len(gamma))
+ # draw heatmap of accuracy as a function of gamma and C
+ pl.figure(figsize=(8, 6))
+ pl.subplots_adjust(left=0.05, right=0.95, bottom=0.15, top=0.95)
+ pl.imshow(scores, interpolation='nearest', cmap=pl.cm.spectral)
+ pl.xlabel(r'$\gamma$')
+ pl.ylabel('C')
+ pl.colorbar()
+ pl.xticks(np.arange(len(gamma)), gamma, rotation=45)
+ pl.yticks(np.arange(len(C)), C)
+ ic, igamma = np.unravel_index(np.argmax(scores), scores.shape)
+ pl.plot(igamma, ic, 'r.')
+ best = scores[igamma, ic]
+ titl = r"$best:\, %0.4f, \,C:\, %g, \,\gamma: \,%g$" % (best,
+ C[ic],
+ gamma[igamma])
+ pl.title(titl)
+ if save:
+ pl.savefig(save, dpi=600, trasparent=True, bbox_inches='tight')
+ pl.show()
+
+
+def explore_SVC(Xt, Yt, n_folds=3, n_jobs=1, **kwargs):
+ cv = StratifiedKFold(y=Yt, n_folds=n_folds)
+ grid = GridSearchCV(SVC(), param_grid=kwargs, cv=cv, n_jobs=n_jobs)
+ grid.fit(Xt, Yt)
+ print("The best classifier is: ", grid.best_estimator_)
+ return grid
Added: grass-addons/grass7/vector/v.class.ml/npy2table.py
===================================================================
--- grass-addons/grass7/vector/v.class.ml/npy2table.py (rev 0)
+++ grass-addons/grass7/vector/v.class.ml/npy2table.py 2014-01-07 16:18:40 UTC (rev 58638)
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Nov 10 17:00:13 2013
+
+ at author: pietro
+"""
+from __future__ import print_function, division
+
+import pickle
+import numpy as np
+from grass.pygrass.vector import Vector
+from grass.pygrass.vector.table import Link, Table
+
+COLS = [('cat', 'INTEGER PRIMARY KEY'),
+ ('class', 'INTEGER'),
+ ('color', 'VARCHAR(11)'), ]
+
+
+def export2sqlite(table, cats, clsses, training=None):
+ cur = table.conn.cursor()
+ if training:
+ colors = np.zeros(clsses.shape, dtype=np.dtype(np.str))
+ cur = training.execute('SELECT cat, color FROM %s;' % training.name,
+ cursor=cur)
+ trndict = dict([c for c in cur.fetchall()])
+ for key in trndict:
+ colors[clsses == key] = trndict[key]
+ print("Insert data")
+ table.insert(zip(cats, clsses, colors) if training else zip(cats, clsses),
+ cursor=cur, many=True)
+ cur.close()
+ table.conn.commit()
+
+
+def export2onesqlite(table, cats, *clsses):
+ cur = table.conn.cursor()
+ print("Insert data")
+ table.insert(zip(cats, *clsses), cursor=cur, many=True)
+ cur.close()
+ table.conn.commit()
+
+
+def create_tab(vect, tab_name, cats, clsses, cols, training=None):
+ cur = vect.table.conn.cursor()
+ table = Table(tab_name, vect.table.conn)
+ add_link = True
+ if table.exist():
+ print("Table <%s> already exist, will be removed." % tab_name)
+ table.drop(cursor=cur)
+ add_link = False
+ print("Ceating a new table <%s>." % tab_name)
+ table.create(cols, cursor=cur)
+ export2sqlite(table, cats, clsses,
+ Table(training, vect.table.conn) if training else None)
+ cur.close()
+ if add_link:
+ vect.dblinks.add(Link(layer=len(vect.dblinks) + 1,
+ name=tab_name, table=tab_name))
+
+
+def export_results(vect_name, results, cats, rlayer,
+ training=None, cols=None, overwrite=False, pkl=None):
+ if pkl:
+ res = open(pkl, 'w')
+ pickle.dump(results, res)
+ res.close()
+
+ # check if the link already exist
+ with Vector(vect_name, mode='r') as vct:
+ link = vct.dblinks.by_name(rlayer)
+ mode = 'r' if link else 'rw'
+
+ print("Opening vector <%s>" % vect_name)
+ with Vector(vect_name, mode=mode) as vect:
+ if cols:
+ cols.insert(0, COLS[0])
+ tab = link.table() if link else Table(rlayer, vect.table.conn)
+ if tab.exist():
+ print("Table <%s> already exist, will be removed." % tab.name)
+ tab.drop(force=overwrite)
+ print("Ceating a new table <%s>." % rlayer)
+ import ipdb; ipdb.set_trace()
+ tab.create(cols)
+ export2onesqlite(tab, cats, *[cls['predict'] for cls in results])
+ if mode == 'rw':
+ nlyr = len(vect.dblinks) + 1
+ link = Link(nlyr, tab.name, tab.name)
+ vect.dblinks.add(link)
+ vect.build()
+ else:
+ for cls in results:
+ create_tab(vect, cls['name'], cats, cls['predict'],
+ training, COLS if training else COLS[:2])
+
+#create_tab(VECT, B1[:-4], cats, b1, TRAINING, COLS)
Added: grass-addons/grass7/vector/v.class.ml/sqlite2npy.py
===================================================================
--- grass-addons/grass7/vector/v.class.ml/sqlite2npy.py (rev 0)
+++ grass-addons/grass7/vector/v.class.ml/sqlite2npy.py 2014-01-07 16:18:40 UTC (rev 58638)
@@ -0,0 +1,106 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Nov 2 23:40:22 2013
+
+ at author: pietro
+"""
+from __future__ import print_function, division
+
+import numpy as np
+from grass.pygrass.vector import VectorTopo
+
+FCATS = 'cats.npy'
+FDATA = 'data.npy'
+FINDX = 'indx.npy'
+FCLSS = 'training_classes.npy'
+FTDATA = 'training_data.npy'
+
+
+def cpdata(shape, iterator, msg=''):
+ """Avoid to create a python list and then convert the python list to a
+ numpy array. This function instantiate statically a numpy array and then
+ fill the numpy array with the data coming from the generator to reduce
+ the memory consumption."""
+ nrows = shape[0]
+ #msgr = ???
+ #msgr.message(msg)
+ print(msg)
+ dt = np.zeros(shape)
+ for i, data in enumerate(iterator):
+ #msgr.percent(i, nrows, 2)
+ dt[i] = data
+ return dt
+
+
+def save2npy(vect, l_data, l_trning,
+ fcats=FCATS, fdata=FDATA, findx=FINDX,
+ fclss=FCLSS, ftdata=FTDATA):
+ """Return 5 arrays:
+ - categories,
+ - data,
+ - a boolean array with the training,
+ - the training classes
+ - the training data
+ """
+ with VectorTopo(vect, mode='r') as vct:
+ # instantiate the tables
+ data = (vct.dblinks.by_layer(l_data).table() if l_data.isdigit()
+ else vct.dblinks.by_name(l_data).table())
+ trng = (vct.dblinks.by_layer(l_trning).table() if l_trning.isdigit()
+ else vct.dblinks.by_name(l_trning).table())
+
+ # check the dimensions
+ n_trng, n_data = trng.n_rows(), data.n_rows()
+ if n_trng != n_data:
+ msg = ('Different dimension between the training set (%d)'
+ ' and the data set (%d)' % (n_trng, n_data))
+ print(msg)
+ raise
+
+ # extract the training
+ slct_trn = "SELECT class FROM {tname};".format(tname=trng.name)
+ trn_all = cpdata((n_data, ), (np.nan if a[0] is None else a[0]
+ for a in trng.execute(slct_trn)),
+ msg=slct_trn)
+ # trn_all = np.array([np.nan if a[0] is None else a[0]
+ # for a in trng.execute(slct_trn)])
+ trn_indxs = ~np.isnan(trn_all)
+
+ # extract the data
+ data_cols = data.columns.names()
+ data_cols.remove(data.key)
+ cols = ', '.join(data_cols)
+ slct_data = "SELECT {cols} FROM {tname};".format(cols=cols,
+ tname=data.name)
+ shape = (n_data, len(data_cols))
+ # use the function to be more memory efficient
+ dta = cpdata(shape, data.execute(slct_data), msg=slct_data)
+
+ # extract the cats
+ slct_cats = "SELECT {cat} FROM {tname};".format(cat=data.key,
+ tname=data.name)
+ cats = cpdata((n_data, ), (c[0] for c in data.execute(slct_cats)),
+ msg=slct_cats)
+ # cats = np.array([c[0] for c in data.execute(slct_cats)])
+
+ # training samples
+ trn_dta = dta[trn_indxs]
+ trn_ind = trn_all[trn_indxs]
+
+ # save
+ np.save(fcats, cats)
+ np.save(fdata, dta)
+ np.save(findx, trn_indxs)
+ np.save(fclss, trn_ind)
+ np.save(ftdata, trn_dta)
+ return cats, dta, trn_indxs, trn_ind, trn_dta
+
+
+def load_from_npy(fcats=FCATS, fdata=FDATA, findx=FINDX,
+ fclss=FCLSS, ftdata=FTDATA):
+ cats = np.load(fcats)
+ data = np.load(fdata)
+ indx = np.load(findx)
+ Yt = np.load(fclss)
+ Xt = np.load(ftdata)
+ return cats, data, indx, Yt, Xt
Added: grass-addons/grass7/vector/v.class.ml/test_rpc.py
===================================================================
--- grass-addons/grass7/vector/v.class.ml/test_rpc.py (rev 0)
+++ grass-addons/grass7/vector/v.class.ml/test_rpc.py 2014-01-07 16:18:40 UTC (rev 58638)
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Nov 23 01:47:42 2013
+
+ at author: pietro
+"""
+from grass.pygrass.vector import VectorTopo
+from grass.pygrass.vector.geometry import Point
+from grass.pygrass.function import get_mapset_vector
+
+
+def add_points(vname, vmapset='', *points):
+ """
+ >>> add_points('new', (1, 2), (2, 3), (3, 4))
+ """
+ mapset = get_mapset_vector(vname, vmapset)
+ mode = 'rw' if mapset else 'w'
+ with VectorTopo(vname, mapset, mode=mode) as vct:
+ for x, y in points:
+ vct.write(Point(x, y))
+
+
+ciface = RPCServer()
+check = ciface.call(function=add_points, args=('new', (1, 2), (2, 3), (3, 4)))
+
Added: grass-addons/grass7/vector/v.class.ml/training_extraction.py
===================================================================
--- grass-addons/grass7/vector/v.class.ml/training_extraction.py (rev 0)
+++ grass-addons/grass7/vector/v.class.ml/training_extraction.py 2014-01-07 16:18:40 UTC (rev 58638)
@@ -0,0 +1,167 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Nov 2 13:30:33 2013
+
+ at author: pietro
+
+"""
+from __future__ import (absolute_import, division, print_function,
+ unicode_literals)
+from gettext import lgettext as _
+import numpy as np
+
+from grass.script.core import overwrite
+from grass.pygrass.vector import VectorTopo, Vector
+from grass.pygrass.vector.table import Link, Table
+from grass.pygrass.vector.geometry import Area, intersects
+from grass.pygrass.vector.basic import Bbox, BoxList
+from grass.pygrass.messages import Messenger
+
+
+COLS = [('cat', 'INTEGER PRIMARY KEY'),
+ ('class', 'INTEGER'), ]
+
+UPDATE = "UPDATE {tname} SET class=? WHERE {cat}=?;"
+
+
+def update_lines(line, alist, cur=None, sql=None):
+ """Update lines using only the boundary
+ """
+ to_up = []
+ bbox = Bbox()
+ for area in alist:
+ bbox = area.bbox(bbox)
+ if ((intersects(area.boundary, line)) or
+ (area.contain_pnt(line[0], bbox))):
+ to_up.append((line.cat, area.cat))
+ if (cur is not None) and (sql is not None):
+ cur.executemany(sql, to_up)
+ return to_up
+
+
+def update_areas(trn_area, seg_area, ids, cur=None, sql=None):
+ """Update the table with the areas that contained/are contained or
+ intersect the training areas.
+ """
+ to_up = []
+ bbox = trn_area.bbox()
+ for s_id in ids:
+ seg_area.id = s_id
+ seg_area.read()
+ if ((intersects(seg_area.boundary, trn_area.boundary)) or
+ (trn_area.contain_pnt(seg_area.boundary[0], bbox)) or
+ (seg_area.contain_pnt(trn_area.boundary[0]))):
+ to_up.append((trn_area.cat, seg_area.cat))
+ if (cur is not None) and (sql is not None):
+ cur.executemany(sql, to_up)
+ return to_up
+
+
+def find_lines(table, trn, seg, msgr):
+ """Update the lines' table using the boundaries of the training areas"""
+ sql = UPDATE.format(tname=table.name, cat=table.key)
+ boxlist = BoxList()
+ n_bounds = len(trn)
+ cur = table.conn.cursor()
+ for i, bound in enumerate(trn):
+ msgr.percent(i, n_bounds, 1)
+ alist = seg.find['by_box'].areas(bound.bbox(), boxlist)
+ update_lines(bound, alist, cur, sql)
+ table.conn.commit()
+
+
+def find_area(table, trn_ids, trn_area, seg_area, n_areas, seg, msgr):
+ """Update the lines' table using the training areas"""
+ cur = table.conn.cursor()
+ msgr.message(_("Finding areas..."))
+ sql = UPDATE.format(tname=table.name, cat=table.key)
+ boxlist = BoxList()
+ for i, trn_id in enumerate(trn_ids):
+ msgr.percent(i, n_areas, 1)
+ trn_area.id = trn_id
+ trn_area.read()
+ bblist = seg.find['by_box'].areas(trn_area.boundary.bbox(), boxlist,
+ bboxlist_only=True)
+ update_areas(trn_area, seg_area, bblist.ids, cur, sql)
+ table.conn.commit()
+
+
+def make_new_table(vct, msgr, tname, cols=COLS, force=overwrite()):
+ """Check/remove/create a new table"""
+ create_link = True
+ # make a new table
+ table = Table(tname, vct.table.conn)
+ if table.exist():
+ if any([table.name == l.table_name for l in vct.dblinks]):
+ create_link = False
+ msg = _("Table <%s> already exist and will be removed.")
+ msgr.warning(msg % table.name)
+ table.drop(force=force)
+ table.create(cols)
+ # fill the new table with the segment cats
+ slct = vct.table.filters.select(vct.table.key)
+ cur = vct.table.execute(slct.get_sql())
+ table.insert(((cat[0], None) for cat in cur), many=True)
+ table.conn.commit()
+ return table, create_link
+
+
+def check_balance(table, trntab, msgr):
+ """Checking the balance between different training classes."""
+ msg = _('Checking the balance between different training classes.')
+ msgr.message(msg)
+ chk_balance = ("SELECT class, count(*) as num_of_segments "
+ "FROM {tname} "
+ "GROUP BY class ORDER BY num_of_segments;")
+ res = table.execute(chk_balance.format(tname=table.name))
+ cl_sql = "SELECT cat, class FROM {tname} ORDER BY cat;"
+ clss = dict(trntab.execute(cl_sql.format(tname=trntab.name)))
+ for cls, num in res.fetchall():
+ clname = clss.get(cls, str(cls))
+ msgr.message(" - %s (%d): %d" % (clname if clname else repr(clname),
+ cls if cls else 0, num))
+
+
+def extract_training_array(table):
+ """Return a numpy array with the class id or nan if not define"""
+ cur = table.execute("SELECT class FROM {tname}".format(tname=table.name))
+ return np.array([np.isnan if c is None else c[0] for c in cur])
+
+
+def get_layer_num_name(vect, tlayer):
+ layer_num = len(vect.dblinks)+1
+ layer_name = vect.name + '_training'
+ if '/' in tlayer:
+ layer_num, layer_name = tlayer.split('/')
+ layer_num = int(layer_num)
+ elif tlayer.isdigit():
+ layer_num = int(tlayer)
+ elif tlayer:
+ layer_name = tlayer
+ return layer_num, layer_name
+
+
+def extract_training(vect, tvect, tlayer):
+ """Assign a class to all the areas that contained, are contained
+ or intersect a training vector"""
+ msgr = Messenger()
+ with VectorTopo(tvect, mode='r') as trn:
+ with VectorTopo(vect, mode='r') as vct:
+ layer_num, layer_name = get_layer_num_name(vct, tlayer)
+ # instantiate the area objects
+ trn_area = Area(c_mapinfo=trn.c_mapinfo)
+ seg_area = Area(c_mapinfo=vct.c_mapinfo)
+ n_areas = trn.number_of('areas')
+ # check/remove/create a new table
+ table, create_link = make_new_table(vct, msgr, layer_name)
+ # find and save all the segments
+ find_area(table, trn.viter('areas', idonly=True),
+ trn_area, seg_area, n_areas, vct, msgr)
+ check_balance(table, trn.table, msgr)
+
+ if create_link:
+ msgr.message(_("Connect the new table to the vector map..."))
+ with Vector(vect, mode='rw') as seg:
+ link = Link(layer_num, name=layer_name, table=table.name)
+ seg.dblinks.add(link)
+ seg.build()
Added: grass-addons/grass7/vector/v.class.ml/v.class.ml.html
===================================================================
Added: grass-addons/grass7/vector/v.class.ml/v.class.ml.py
===================================================================
--- grass-addons/grass7/vector/v.class.ml/v.class.ml.py (rev 0)
+++ grass-addons/grass7/vector/v.class.ml/v.class.ml.py 2014-01-07 16:18:40 UTC (rev 58638)
@@ -0,0 +1,499 @@
+#!/usr/bin/env python
+# -- coding: utf-8 --
+#
+############################################################################
+#
+# MODULE: v.class.ml
+#
+# AUTHOR(S): Pietro Zambelli (University of Trento)
+#
+# COPYRIGHT: (C) 2013 by the GRASS Development Team
+#
+# This program is free software under the GNU General Public
+# License (>=v2). Read the file COPYING that comes with GRASS
+# for details.
+#
+#############################################################################
+
+#%Module
+#% description: Vector
+#% keywords: machine learning
+#% keywords: classification
+#% overwrite: yes
+#%End
+#%option G_OPT_V_MAP
+#% key: vector
+#% description: Name of input vector map
+#% required: yes
+#%end
+#%option G_OPT_V_MAP
+#% key: vtraining
+#% description: Name of training vector map
+#% required: no
+#%end
+#%option
+#% key: vlayer
+#% type: string
+#% multiple: no
+#% description: layer name or number to use for the machine learning
+#% required: no
+#%end
+#%option
+#% key: tlayer
+#% type: string
+#% multiple: no
+#% description: layer number/name for the training layer
+#% required: no
+#%end
+#%option
+#% key: rlayer
+#% type: string
+#% multiple: no
+#% description: layer number/name for the ML results
+#% required: no
+#%end
+#%option
+#% key: npy_data
+#% type: string
+#% multiple: no
+#% description: Data with statistics in npy format.
+#% answer: data.npy
+#% required: no
+#%end
+#%option
+#% key: npy_cats
+#% type: string
+#% multiple: no
+#% description: Numpy array with vector cats.
+#% answer: cats.npy
+#% required: no
+#%end
+#%option
+#% key: npy_tdata
+#% type: string
+#% multiple: no
+#% description: training npy file with training set, default: training_data.npy
+#% answer: training_data.npy
+#% required: no
+#%end
+#%option
+#% key: npy_tclasses
+#% type: string
+#% multiple: no
+#% description: training npy file with the classes, default: training_classes.npy
+#% answer: training_classes.npy
+#% required: no
+#%end
+#%option
+#% key: npy_btdata
+#% type: string
+#% multiple: no
+#% description: training npy file with training set, default: training_data.npy
+#% answer: Xbt.npy
+#% required: no
+#%end
+#%option
+#% key: npy_btclasses
+#% type: string
+#% multiple: no
+#% description: training npy file with the classes, default: training_classes.npy
+#% answer: Ybt.npy
+#% required: no
+#%end
+#%option
+#% key: scalar
+#% type: string
+#% multiple: yes
+#% description: scaler method, center the data before scaling, if no, not scale at all
+#% required: no
+#% answer: with_mean,with_std
+#%end
+#%option
+#% key: n_training
+#% type: integer
+#% multiple: no
+#% description: Number of random training to training the machine learning
+#% required: no
+#%end
+#%option
+#% key: pyclassifiers
+#% type: string
+#% multiple: no
+#% description: a python file with classifiers
+#% required: no
+#%end
+#%option
+#% key: pyvar
+#% type: string
+#% multiple: no
+#% description: name of the python variable that must be a list of dictionary
+#% required: no
+#%end
+#%option
+#% key: pyindx
+#% type: string
+#% multiple: no
+#% description: specify the index of the classifiers that you want to use
+#% required: no
+#%end
+#%option
+#% key: pyindx_optimize
+#% type: string
+#% multiple: no
+#% description: Index of the classifiers to optimize the training set
+#% required: no
+#%end
+#%option
+#% key: nan
+#% type: double
+#% multiple: no
+#% description: Value to use to substitute NaN
+#% required: no
+#%end
+#%option
+#% key: inf
+#% type: double
+#% multiple: no
+#% description: Value to use to substitute NaN
+#% required: no
+#%end
+#%option
+#% key: csv
+#% type: string
+#% multiple: no
+#% description: csv file name with tha accuracy of different machine learning
+#% required: no
+#%end
+#%option
+#% key: svc_c_range
+#% type: double
+#% multiple: yes
+#% description: C value list
+#% required: no
+#% answer: 1e-2,1e-1,1e0,1e1,1e2,1e3,1e4,1e5,1e6,1e7,1e8
+#%end
+#%option
+#% key: svc_gamma_range
+#% type: double
+#% multiple: yes
+#% description: gamma value list
+#% required: no
+#% answer: 1e-6,1e-5,1e-4,1e-3,1e-2,1e-1,1e0,1e1,1e2,1e3,1e4
+#%end
+#%option
+#% key: svc_kernel_range
+#% type: string
+#% multiple: yes
+#% description: kernel value list
+#% required: no
+#% answer: linear,poly,rbf,sigmoid
+#%end
+#%option
+#% key: svc_n_jobs
+#% type: integer
+#% multiple: no
+#% description: number of jobs
+#% required: no
+#% answer: 1
+#%end
+#%option
+#% key: svc_c
+#% type: double
+#% multiple: no
+#% description: C value
+#% required: no
+#%end
+#%option
+#% key: svc_gamma
+#% type: double
+#% multiple: no
+#% description: gamma value
+#% required: no
+#%end
+#%option
+#% key: svc_kernel
+#% type: string
+#% multiple: no
+#% description: Available kernel are: ‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’
+#% required: no
+#% answer: rbf
+#%end
+#%option
+#% key: svc_img
+#% type: string
+#% multiple: no
+#% description: filename with the image od SVC parameter
+#% required: no
+#% answer: domain_%s.svg
+#%end
+#%option
+#% key: rst_names
+#% type: string
+#% multiple: no
+#% description: filename with the image od SVC parameter
+#% required: no
+#% answer: %s
+#%end
+#-----------------------------------------------------
+#%flag
+#% key: e
+#% description: Extract the training set from the vtraining map
+#%end
+#%flag
+#% key: n
+#% description: Export to numpy files
+#%end
+#%flag
+#% key: b
+#% description: Balance the training using the class with the minor number of areas
+#%end
+#%flag
+#% key: o
+#% description: optimize the training samples
+#%end
+#%flag
+#% key: c
+#% description: Classify the whole dataset
+#%end
+#%flag
+#% key: r
+#% description: Export the classify resutls to raster maps
+#%end
+#%flag
+#% key: t
+#% description: Test different classification methods
+#%end
+#%flag
+#% key: d
+#% description: Explore the SVC domain
+#%end
+#-----------------------------------------------------
+"""
+v.category input=seg005_64 at pietro layer=1,2,3,4,5,6,7,8,9 type=point,line,centroid,area,face output=seg005_64_new option=transfer
+
+v.category input=seg005_64_new option=report
+
+i.pca -n input=Combabula_Nearmap.red at PERMANENT,Combabula_Nearmap.green at PERMANENT,Combabula_Nearmap.blue at PERMANENT output_prefix=pca
+PC1 2.78 ( 0.5757, 0.5957, 0.5601) [92.83%]
+PC2 0.20 ( 0.6002, 0.1572,-0.7842) [ 6.81%]
+PC3 0.01 ( 0.5552,-0.7877, 0.2670) [ 0.36%]
+
+time r.texture -a input=pca.1 at pietro prefix=pca5_ size=5 --o
+time r.texture -a input=pca.1 at pietro prefix=pca3_ size=3 --o
+echo finish
+"""
+from __future__ import (absolute_import, division, print_function,
+ unicode_literals)
+import imp
+import sys
+import os
+
+import numpy as np
+from sklearn.preprocessing import StandardScaler
+from sklearn.svm import SVC
+
+from grass.pygrass.functions import get_lib_path
+from grass.pygrass.messages import Messenger
+from grass.pygrass.vector import Vector
+from grass.pygrass.modules import Module
+from grass.script.core import parser, overwrite
+
+path = get_lib_path("v.class.ml", "")
+if path is None:
+ raise ImportError("Not able to find the path %s directory." % path)
+
+sys.path.append(path)
+
+
+from training_extraction import extract_training
+from ml_classifiers import CLASSIFIERS
+from ml_functions import (balance, explorer_clsfiers, run_classifier,
+ optimize_training, explore_SVC, plot_grid)
+from sqlite2npy import save2npy
+from npy2table import export_results
+
+
+def get_indexes(string, sep=',', rangesep='-'):
+ """
+ >>> indx = '1-5,34-36,40'
+ >>> [i for i in get_indexes(indx)]
+ [1, 2, 3, 4, 5, 34, 35, 36, 40]
+ """
+ for ind in string.split(sep):
+ if rangesep in ind:
+ start, stop = ind.split(rangesep)
+ for i in range(int(start), int(stop) + 1):
+ yield i
+ else:
+ yield int(ind)
+
+
+def get_colors(vtraining):
+ with Vector(vtraining, mode='r') as vct:
+ cur = vct.table.execute('SELECT cat, color FROM %s;' % vct.name)
+ return dict([c for c in cur.fetchall()])
+
+def main(opt, flg):
+ msgr = Messenger()
+ indexes = None
+ vect = opt['vector']
+ vtraining = opt['vtraining'] if opt['vtraining'] else None
+ scaler = None
+ vlayer = opt['vlayer'] if opt['vlayer'] else vect + '_stats'
+ tlayer = opt['tlayer'] if opt['tlayer'] else vect + '_training'
+ rlayer = opt['rlayer'] if opt['rlayer'] else vect + '_results'
+
+ if opt['scalar']:
+ scapar = opt['scalar'].split(',')
+ scaler = StandardScaler(with_mean='with_mean' in scapar,
+ with_std='with_std' in scapar)
+ # if training extract training
+ if vtraining and flg['e']:
+ msgr.message("Extract training from: <%s>." % vtraining)
+ extract_training(vect, vtraining, tlayer)
+ flg['n'] = True
+
+ if flg['n']:
+ msgr.message("Save arrays to npy files.")
+ save2npy(vect, vlayer, tlayer)
+
+ # define the classifiers to use/test
+ if opt['pyclassifiers'] and opt['pyvar']:
+ # import classifiers to use
+ mycls = imp.load_source("mycls", opt['pyclassifiers'])
+ classifiers = getattr(mycls, opt['pyvar'])
+ else:
+ classifiers = CLASSIFIERS
+
+ # Append the SVC classifier
+ if opt['svc_c'] and opt['svc_gamma']:
+ svc = {'name': 'SVC', 'classifier': SVC,
+ 'kwargs': {'C': float(opt['svc_c']),
+ 'gamma': float(opt['svc_gamma']),
+ 'kernel': opt['svc_kernel']}}
+ classifiers.append(svc)
+
+ # extract classifiers from pyindx
+ if opt['pyindx']:
+ indexes = [i for i in get_indexes(opt['pyindx'])]
+ classifiers = [classifiers[i] for i in indexes]
+
+ csv = open(opt['csv'], 'w') if opt['csv'] else sys.stdout
+ num = int(opt['n_training']) if opt['n_training'] else None
+
+ # load fron npy files
+ Xt = np.load(opt['npy_tdata'])
+ Yt = np.load(opt['npy_tclasses'])
+ clsses = sorted(set(Yt))
+
+ # Substitute NaN
+ if opt['nan']:
+ msgr.message("Substitute NaN values with: <%g>" % float(opt['nan']))
+ Xt[np.isnan(Xt)] = float(opt['nan'])
+ if opt['inf']:
+ msgr.message("Substitute Inf values with: <%g>" % float(opt['inf']))
+ Xt[np.isinf(Xt)] = float(opt['inf'])
+
+ # optimize the training set
+ if flg['o']:
+ ind_optimize = (int(opt['pyindx_optimize']) if opt['pyindx_optimize']
+ else 0)
+ cls = classifiers[ind_optimize]
+ msgr.message("Find the optimum training set.")
+ best, Xbt, Ybt = optimize_training(cls, Xt, Yt, scaler,
+ num=num, maxiterations=1000)
+ msg = " - save the optimum training data set to: %s."
+ msgr.message(msg % opt['npy_btdata'])
+ np.save(opt['npy_btdata'], Xbt)
+ msg = " - save the optimum training classes set to: %s."
+ msgr.message(msg % opt['npy_btclasses'])
+ np.save(opt['npy_btclasses'], Ybt)
+
+ # balance the data
+ if flg['b']:
+ msg = "Balancing the training data set, each class have <%d> samples."
+ msgr.message(msg % num)
+ Xbt, Ybt = balance(Xt, Yt, num)
+ else:
+ if not flg['o']:
+ Xbt = (np.load(opt['npy_btdata'])
+ if os.path.isfile(opt['npy_btdata']) else Xt)
+ Ybt = (np.load(opt['npy_btclasses'])
+ if os.path.isfile(opt['npy_btclasses']) else Yt)
+
+ # scale the data
+ if scaler:
+ msgr.message("Scaling the training data set.")
+ scaler.fit(Xbt, Ybt)
+ Xt = scaler.transform(Xt)
+ Xbt = scaler.transform(Xbt)
+
+ if flg['d']:
+ C_range = [float(c) for c in opt['svc_c_range'].split(',')]
+ gamma_range = [float(g) for g in opt['svc_gamma_range'].split(',')]
+ kernel_range = [str(s) for s in opt['svc_kernel_range'].split(',')]
+ msgr.message("Exploring the SVC domain.")
+ grid = explore_SVC(Xbt, Ybt, n_folds=3, n_jobs=int(opt['svc_n_jobs']),
+ C=C_range, gamma=gamma_range, kernel=kernel_range)
+ plot_grid(grid, save=opt['svc_img'])
+
+ # test the accuracy of different classifiers
+ if flg['t']:
+ # test different classifiers
+ msgr.message("Exploring different classifiers.")
+ explorer_clsfiers(classifiers, Xbt, Ybt, Xt, Yt, clsses, indexes, csv)
+
+ if flg['c']:
+ # classify
+ cols = []
+ data = np.load(opt['npy_data'])
+ if opt['nan']:
+ msg = "Substitute NaN values with: <%g>" % float(opt['nan'])
+ msgr.message(msg)
+ data[np.isnan(data)] = float(opt['nan'])
+ if opt['inf']:
+ msg = "Substitute Inf values with: <%g>" % float(opt['inf'])
+ msgr.message(msg)
+ data[np.isinf(data)] = float(opt['inf'])
+
+ msgr.message("Scaling the whole data set.")
+ data = scaler.transform(data) if scaler else data
+ cats = np.load(opt['npy_cats'])
+
+ for cls in classifiers:
+ run_classifier(cls, Xbt, Ybt, Xt, Yt, clsses, data, save=csv)
+ cols.append((cls['name'], 'INTEGER'))
+
+# import pickle
+# res = open('res.pkl', 'r')
+# classifiers = pickle.load(res)
+ msgr.message("Export the results to layer: <%s>" % str(rlayer))
+ export_results(vect, classifiers, cats, rlayer, vtraining, cols,
+ overwrite(), pkl='res.pkl')
+# res.close()
+
+ if flg['r']:
+ rules = ('\n'.join(['%d %s' % (k, v)
+ for k, v in get_colors(vtraining).items()])
+ if vtraining else None)
+
+ msgr.message("Export the layer with results to raster")
+ with Vector(vect, mode='r') as vct:
+ tab = vct.dblinks.by_name(rlayer).table()
+ rasters = [c for c in tab.columns]
+ rasters.remove(tab.key)
+
+ import ipdb; ipdb.set_trace()
+ v2rst = Module('v.to.rast')
+ rclrs = Module('r.colors')
+ for rst in rasters:
+ v2rst(input=vect, layer=rlayer, type='area',
+ use='attr', attrcolumn=rst, output=opt['rst_names'] % rst,
+ rows=4096 * 4, overwrite=overwrite())
+ if rules:
+ rclrs(map=rst, rules='-', stdin_=rules)
+
+
+
+if __name__ == "__main__":
+ main(*parser())
Property changes on: grass-addons/grass7/vector/v.class.ml/v.class.ml.py
___________________________________________________________________
Added: svn:executable
+ *
More information about the grass-commit
mailing list