[GRASS-SVN] r73967 - in grass-addons/grass7/raster: r.object.activelearning r.object.spatialautocor r.to.vect.tiled
svn_grass at osgeo.org
svn_grass at osgeo.org
Thu Jan 17 12:53:34 PST 2019
Author: neteler
Date: 2019-01-17 12:53:33 -0800 (Thu, 17 Jan 2019)
New Revision: 73967
Modified:
grass-addons/grass7/raster/r.object.activelearning/r.object.activelearning.py
grass-addons/grass7/raster/r.object.spatialautocor/r.object.spatialautocor.py
grass-addons/grass7/raster/r.to.vect.tiled/r.to.vect.tiled.py
Log:
r.object.activelearning, r.object.spatialautocor, r.to.vect.tiled addons: converted tab to space indentation using pycharm tool; tabs are to be avoided, see https://trac.osgeo.org/grass/wiki/Submitting/Python?version=19#Editorsettingsfor4-spaceindentation
Modified: grass-addons/grass7/raster/r.object.activelearning/r.object.activelearning.py
===================================================================
--- grass-addons/grass7/raster/r.object.activelearning/r.object.activelearning.py 2019-01-17 20:42:28 UTC (rev 73966)
+++ grass-addons/grass7/raster/r.object.activelearning/r.object.activelearning.py 2019-01-17 20:53:33 UTC (rev 73967)
@@ -96,13 +96,13 @@
try : # You can run the tests outside of grass where those imports are not available
- import grass as grass
- import grass.script as gcore
+ import grass as grass
+ import grass.script as gcore
except ImportError :
- pass
+ pass
-import numpy as np
-import scipy
+import numpy as np
+import scipy
import os.path
import sys
@@ -112,469 +112,469 @@
def load_data(file_path, labeled=False, skip_header=1, scale=True) :
- """
- Load the data from a csv file
+ """
+ Load the data from a csv file
- :param file_path: Path to the csv data file
- :param labeled: True if the data is labeled (default=False)
- :param skip_header: Header size (in line) (default=1)
- :param scale: True if the data should be normalize (default=True)
+ :param file_path: Path to the csv data file
+ :param labeled: True if the data is labeled (default=False)
+ :param skip_header: Header size (in line) (default=1)
+ :param scale: True if the data should be normalize (default=True)
- :type file_path: string
- :type labeled: boolean
- :type skip_header: int
- :type scale: boolean
+ :type file_path: string
+ :type labeled: boolean
+ :type skip_header: int
+ :type scale: boolean
- :return: Return 4 arrays, the features X, the IDs, the labels y and the header
- :rtype: ndarray
- """
- data = np.genfromtxt(file_path, delimiter=',', skip_header=0, dtype=None)
-
+ :return: Return 4 arrays, the features X, the IDs, the labels y and the header
+ :rtype: ndarray
+ """
+ data = np.genfromtxt(file_path, delimiter=',', skip_header=0, dtype=None)
- header = np.array([])
- if skip_header != 0 :
- header = data[0:skip_header,:]
- data = data[skip_header:, :] #Remove header
- data = data.astype(np.float)
+ header = np.array([])
- ID = data[:,0] #get only row 0s
- if labeled :
- y = data[:,1] #get only row 1
- X = data[:,2:] #remove ID and label
- else :
- y = []
- X = data[:,1:] #remove ID
+ if skip_header != 0 :
+ header = data[0:skip_header,:]
+ data = data[skip_header:, :] #Remove header
+ data = data.astype(np.float)
- if scale :
- X = preprocessing.scale(X)
+ ID = data[:,0] #get only row 0s
+ if labeled :
+ y = data[:,1] #get only row 1
+ X = data[:,2:] #remove ID and label
+ else :
+ y = []
+ X = data[:,1:] #remove ID
- return X, ID, y, header
+ if scale :
+ X = preprocessing.scale(X)
+ return X, ID, y, header
+
def write_result_file(ID, X_unlabeled, predictions, header, filename) :
- """
- Write all samples with their ID and their class prediction in csv file. Also add the header to this csv file.
+ """
+ Write all samples with their ID and their class prediction in csv file. Also add the header to this csv file.
- :param ID: Samples'IDs
- :X_unlabeled: Samples'features
- :predictions: Class predictin for each sample
- :header: Header of the csv file
- :filename: Name of the csv file
- """
- data = np.copy(X_unlabeled)
- data = np.insert(data, 0, map(str, ID), axis=1)
- data = np.insert(data, 1, map(str, predictions), axis=1)
+ :param ID: Samples'IDs
+ :X_unlabeled: Samples'features
+ :predictions: Class predictin for each sample
+ :header: Header of the csv file
+ :filename: Name of the csv file
+ """
+ data = np.copy(X_unlabeled)
+ data = np.insert(data, 0, map(str, ID), axis=1)
+ data = np.insert(data, 1, map(str, predictions), axis=1)
- if header.size != 0 :
- header = np.insert(header, 1, ['Class'])
- data = np.insert(data.astype(str), 0, header , axis=0)
- np.savetxt(filename, data, delimiter=",",fmt="%s")
- return True
+ if header.size != 0 :
+ header = np.insert(header, 1, ['Class'])
+ data = np.insert(data.astype(str), 0, header , axis=0)
+ np.savetxt(filename, data, delimiter=",",fmt="%s")
+ return True
def update(update_file, X_train, ID_train, y_train, X_unlabeled, ID_unlabeled) :
- """
- Transfer features and labels from the unlabeled arrays to the training arrays based on the update file.
+ """
+ Transfer features and labels from the unlabeled arrays to the training arrays based on the update file.
- :param update_file: Path to the update file
- :param X_train: Features for the training samples
- :param ID_train: IDs of the training samples
- :param y_train: Labels of the training samples
- :param X_unlabeled: Features for the training samples
- :param ID_unlabeled: IDs of the unlabeled samples
- """
- update = np.genfromtxt(update_file, delimiter=',', skip_header=1)
- if update.size == 0 :
- return X_train, ID_train, y_train
- elif update.ndim == 1 :
- update = [update]
- for index_update, row in enumerate(update) :
- index = np.where(ID_unlabeled == row[0]) # Find in 'unlabeled' the line corresping to the ID
- if index[0].size != 0 : # Check if row exists
- features = X_unlabeled[index[0][0]] # Features
- ID = ID_unlabeled[index[0][0]]
- label = row[1]
- X_train = np.append(X_train, [features], axis=0)
- ID_train = np.append(ID_train, [ID], axis=0)
- y_train = np.append(y_train, [label], axis=0)
- else :
- gcore.warning("The following sample could not be found :{}".format(row[0]))
+ :param update_file: Path to the update file
+ :param X_train: Features for the training samples
+ :param ID_train: IDs of the training samples
+ :param y_train: Labels of the training samples
+ :param X_unlabeled: Features for the training samples
+ :param ID_unlabeled: IDs of the unlabeled samples
+ """
+ update = np.genfromtxt(update_file, delimiter=',', skip_header=1)
+ if update.size == 0 :
+ return X_train, ID_train, y_train
+ elif update.ndim == 1 :
+ update = [update]
+ for index_update, row in enumerate(update) :
+ index = np.where(ID_unlabeled == row[0]) # Find in 'unlabeled' the line corresping to the ID
+ if index[0].size != 0 : # Check if row exists
+ features = X_unlabeled[index[0][0]] # Features
+ ID = ID_unlabeled[index[0][0]]
+ label = row[1]
+ X_train = np.append(X_train, [features], axis=0)
+ ID_train = np.append(ID_train, [ID], axis=0)
+ y_train = np.append(y_train, [label], axis=0)
+ else :
+ gcore.warning("The following sample could not be found :{}".format(row[0]))
- return X_train, ID_train, y_train
+ return X_train, ID_train, y_train
def write_update(update_file, training_file, unlabeled_file, new_training_filename, new_unlabeled_filename) :
- """
- Transfer samples from the unlabeled set to the training set based on an update file
- with IDs of samples to transfer and their classes.
+ """
+ Transfer samples from the unlabeled set to the training set based on an update file
+ with IDs of samples to transfer and their classes.
- :param update_file: Path to the update file
- :param training_file: Path to the training file
- :param unlabeled_file: Path to the unlabeled file
- :param new_training_filename: Path to the new training file that will be created
- :param new_unlabeled_filename: Path to the new unlabeled file that will be created
+ :param update_file: Path to the update file
+ :param training_file: Path to the training file
+ :param unlabeled_file: Path to the unlabeled file
+ :param new_training_filename: Path to the new training file that will be created
+ :param new_unlabeled_filename: Path to the new unlabeled file that will be created
- :type update_file: string
- :type training_file: string
- :type unlabeled_file: string
- :type new_training_filename: string
- :type new_unlabeled_filename: string
- """
- update = np.genfromtxt(update_file, delimiter=',', skip_header=1)
- training = np.genfromtxt(training_file, delimiter=',', skip_header=0, dtype=None)
- unlabeled = np.genfromtxt(unlabeled_file, delimiter=',', skip_header=0, dtype=None)
- successful_updates = []
+ :type update_file: string
+ :type training_file: string
+ :type unlabeled_file: string
+ :type new_training_filename: string
+ :type new_unlabeled_filename: string
+ """
+ update = np.genfromtxt(update_file, delimiter=',', skip_header=1)
+ training = np.genfromtxt(training_file, delimiter=',', skip_header=0, dtype=None)
+ unlabeled = np.genfromtxt(unlabeled_file, delimiter=',', skip_header=0, dtype=None)
+ successful_updates = []
- if update.size == 0 :
- return
- elif update.ndim == 1 :
- update = [update]
+ if update.size == 0 :
+ return
+ elif update.ndim == 1 :
+ update = [update]
- for index_update, row in enumerate(update) :
- index = np.where(unlabeled == str(row[0])) # Find in 'unlabeled' the line corresping to the ID
- if index[0].size != 0 : # Check if row exists
- data = unlabeled[index[0][0]][1:] # Features
- data = np.insert(data, 0, row[0], axis=0) # ID
- data = np.insert(data, 1, row[1], axis=0) # Class
- training = np.append(training, [data], axis=0)
- unlabeled = np.delete(unlabeled, index[0][0], axis=0)
- successful_updates.append(index_update)
- else :
- gcore.warning("Unable to update completely: the following sample could not be found in the unlabeled set:{}".format(row[0]))
+ for index_update, row in enumerate(update) :
+ index = np.where(unlabeled == str(row[0])) # Find in 'unlabeled' the line corresping to the ID
+ if index[0].size != 0 : # Check if row exists
+ data = unlabeled[index[0][0]][1:] # Features
+ data = np.insert(data, 0, row[0], axis=0) # ID
+ data = np.insert(data, 1, row[1], axis=0) # Class
+ training = np.append(training, [data], axis=0)
+ unlabeled = np.delete(unlabeled, index[0][0], axis=0)
+ successful_updates.append(index_update)
+ else :
+ gcore.warning("Unable to update completely: the following sample could not be found in the unlabeled set:{}".format(row[0]))
- with open(update_file) as f:
- header = f.readline()
- header = header.split(',')
-
- update = np.delete(update, successful_updates, axis=0)
- update = np.insert(update.astype(str), 0, header, axis=0)
+ with open(update_file) as f:
+ header = f.readline()
+ header = header.split(',')
- # Save files
- if new_training_filename != '' :
- write_updated_file(new_training_filename, training)
- gcore.message("New training file written to {}".format(new_training_filename))
- if new_unlabeled_filename != '':
- write_updated_file(new_unlabeled_filename, unlabeled)
- gcore.message("New unlabeled file written to {}".format(new_unlabeled_filename))
+ update = np.delete(update, successful_updates, axis=0)
+ update = np.insert(update.astype(str), 0, header, axis=0)
+ # Save files
+ if new_training_filename != '' :
+ write_updated_file(new_training_filename, training)
+ gcore.message("New training file written to {}".format(new_training_filename))
+ if new_unlabeled_filename != '':
+ write_updated_file(new_unlabeled_filename, unlabeled)
+ gcore.message("New unlabeled file written to {}".format(new_unlabeled_filename))
+
def write_updated_file(file_path, data) :
- """
- Write to disk some csv data. Add '_updated' at the end of the filename
- :param filename: location where the file will be saved
- :param data: data to save
+ """
+ Write to disk some csv data. Add '_updated' at the end of the filename
+ :param filename: location where the file will be saved
+ :param data: data to save
- :type file_path: string
- :type data: ndarray
- """
-
- np.savetxt(file_path, data, delimiter=",",fmt="%s")
+ :type file_path: string
+ :type data: ndarray
+ """
+ np.savetxt(file_path, data, delimiter=",",fmt="%s")
+
def linear_scale(data) :
- """
- Linearly scale values : 5th percentile to 0 and 95th percentile to 1
+ """
+ Linearly scale values : 5th percentile to 0 and 95th percentile to 1
- :param data: Features
- :type data: ndarray(#samples x #features)
+ :param data: Features
+ :type data: ndarray(#samples x #features)
- :return: Linearly scaled data
- :rtype: ndarray(#samples x #features)
- """
- p5 = np.percentile(data, 5, axis=0, interpolation='nearest')[np.newaxis] # 5th percentiles as a 2D array (-> newaxis)
- p95 = np.percentile(data, 95, axis=0, interpolation='nearest')[np.newaxis] # 95th percentiles as a 2D array (-> newaxis)
-
- return (data-p5)/(p95-p5)
+ :return: Linearly scaled data
+ :rtype: ndarray(#samples x #features)
+ """
+ p5 = np.percentile(data, 5, axis=0, interpolation='nearest')[np.newaxis] # 5th percentiles as a 2D array (-> newaxis)
+ p95 = np.percentile(data, 95, axis=0, interpolation='nearest')[np.newaxis] # 95th percentiles as a 2D array (-> newaxis)
+ return (data-p5)/(p95-p5)
+
def train(X, y, c_svm, gamma_parameter) :
- """
- Train a SVM classifier.
+ """
+ Train a SVM classifier.
- :param c: Penalty parameter C of the error term.
- :param gamma: Kernel coefficient
- :param X: Features of the training samples
- :param y: Labels of the training samples
+ :param c: Penalty parameter C of the error term.
+ :param gamma: Kernel coefficient
+ :param X: Features of the training samples
+ :param y: Labels of the training samples
- :return: Returns the trained classifier
- :rtype: sklearn.svm.SVC
- """
- classifier = svm.SVC(kernel='rbf', C=c_svm, gamma=gamma_parameter, probability=False,decision_function_shape='ovr', random_state=1938475632)
- classifier.fit(X, y)
+ :return: Returns the trained classifier
+ :rtype: sklearn.svm.SVC
+ """
+ classifier = svm.SVC(kernel='rbf', C=c_svm, gamma=gamma_parameter, probability=False,decision_function_shape='ovr', random_state=1938475632)
+ classifier.fit(X, y)
- return classifier
+ return classifier
def active_diversity_sample_selection(X_unlabled, nbr, classifier) :
- """
- Select a number of samples to label based on uncertainety and diversity
+ """
+ Select a number of samples to label based on uncertainety and diversity
- :param X_unlabeled: Pool of unlabeled samples
- :param nbr: Number of samples to select from the pool
- :param classifier: Used to predict the class of each sample
+ :param X_unlabeled: Pool of unlabeled samples
+ :param nbr: Number of samples to select from the pool
+ :param classifier: Used to predict the class of each sample
- :type X_unlabeled: ndarray(#samples x #features)
- :type nbr: int
- :type classifier: sklearn.svm.SVC
+ :type X_unlabeled: ndarray(#samples x #features)
+ :type nbr: int
+ :type classifier: sklearn.svm.SVC
- :return: Indexes of selected samples
- :rtype: ndarray
- """
-
- batch_size = nbr_uncertainty # Number of samples to select with the uncertainty criterion
+ :return: Indexes of selected samples
+ :rtype: ndarray
+ """
- uncertain_samples_index = uncertainty_filter(X_unlabled, batch_size, classifier) # Take twice as many samples as needed
- uncertain_samples = X_unlabled[uncertain_samples_index]
-
- return diversity_filter(uncertain_samples, uncertain_samples_index, nbr, diversity_lambda)
+ batch_size = nbr_uncertainty # Number of samples to select with the uncertainty criterion
-def uncertainty_filter(samples, nbr, classifier) :
- """
- Keep only a few samples based on an uncertainty criterion
- Return the indexes of samples to keep
+ uncertain_samples_index = uncertainty_filter(X_unlabled, batch_size, classifier) # Take twice as many samples as needed
+ uncertain_samples = X_unlabled[uncertain_samples_index]
- :param samples: Pool of unlabeled samples to select from
- :param nbr: number of samples to select from the pool
- :param classifier: Used to predict the class of each sample
+ return diversity_filter(uncertain_samples, uncertain_samples_index, nbr, diversity_lambda)
- :type X_unlabeled: ndarray(#samples x #features)
- :type nbr: int
- :type classifier: sklearn.svm.SVC
+def uncertainty_filter(samples, nbr, classifier) :
+ """
+ Keep only a few samples based on an uncertainty criterion
+ Return the indexes of samples to keep
- :return: Indexes of selected samples
- :rtype: ndarray
- """
- NBR_NEW_SAMPLE = nbr
- decision_function = np.absolute(classifier.decision_function(samples))
+ :param samples: Pool of unlabeled samples to select from
+ :param nbr: number of samples to select from the pool
+ :param classifier: Used to predict the class of each sample
- # Check if the number of samples to return is not
- # bigger than the total number of samples
- if (nbr >= samples.shape[0]) :
- NBR_NEW_SAMPLE = samples.shape[0] - 1
-
+ :type X_unlabeled: ndarray(#samples x #features)
+ :type nbr: int
+ :type classifier: sklearn.svm.SVC
- # Get the max distance to each class hyperplane for each example
- max_index = np.argmax(decision_function[:,:], axis=1)
- max_values = decision_function[np.arange(len(decision_function)), max_index]
+ :return: Indexes of selected samples
+ :rtype: ndarray
+ """
+ NBR_NEW_SAMPLE = nbr
+ decision_function = np.absolute(classifier.decision_function(samples))
- # Make the max values very small.
- # The max value is now the second best
- decision_function[np.arange(len(decision_function)), max_index] = np.NINF
-
- # Get the second max distance to each class to hyperplane for each example
- second_max_index = np.argmax(decision_function[:,:], axis=1)
- second_max_values = decision_function[np.arange(len(decision_function)), second_max_index]
+ # Check if the number of samples to return is not
+ # bigger than the total number of samples
+ if (nbr >= samples.shape[0]) :
+ NBR_NEW_SAMPLE = samples.shape[0] - 1
- # "Functionnal margin" for multiclass classifiers for each sample
- f_MC = max_values - second_max_values
-
- selected_sample_index = np.argpartition(f_MC, NBR_NEW_SAMPLE)[:NBR_NEW_SAMPLE]
+ # Get the max distance to each class hyperplane for each example
+ max_index = np.argmax(decision_function[:,:], axis=1)
+ max_values = decision_function[np.arange(len(decision_function)), max_index]
- return selected_sample_index
+ # Make the max values very small.
+ # The max value is now the second best
+ decision_function[np.arange(len(decision_function)), max_index] = np.NINF
+ # Get the second max distance to each class to hyperplane for each example
+ second_max_index = np.argmax(decision_function[:,:], axis=1)
+ second_max_values = decision_function[np.arange(len(decision_function)), second_max_index]
+
+ # "Functionnal margin" for multiclass classifiers for each sample
+ f_MC = max_values - second_max_values
+
+
+ selected_sample_index = np.argpartition(f_MC, NBR_NEW_SAMPLE)[:NBR_NEW_SAMPLE]
+
+ return selected_sample_index
+
def diversity_filter(samples, uncertain_samples_index, nbr, diversity_lambda=0.25) :
- """
- Keep only 'nbr' samples based on a diversity criterion (bruzzone2009 : Active Learning For Classification Of Remote Sensing Images)
- Return the indexes of samples to keep
+ """
+ Keep only 'nbr' samples based on a diversity criterion (bruzzone2009 : Active Learning For Classification Of Remote Sensing Images)
+ Return the indexes of samples to keep
- :param samples: Pool of unlabeled samples
- :param uncertain_samples: Indexes of uncertain samples in the arry of samples
- :param nbr: number of samples to select from the pool
- :param diversity_lambda: Heuristic parameter, between 0 and 1. Weight between the average distance to other samples and the distance to the closest sample. (default=0.25)
+ :param samples: Pool of unlabeled samples
+ :param uncertain_samples: Indexes of uncertain samples in the arry of samples
+ :param nbr: number of samples to select from the pool
+ :param diversity_lambda: Heuristic parameter, between 0 and 1. Weight between the average distance to other samples and the distance to the closest sample. (default=0.25)
- :type X_unlabeled: ndarray(#samples x #features)
- :type uncertain_samples_index: ndarray(#uncertain_samples)
- :type nbr: int
- :type diversity_lambda: float
+ :type X_unlabeled: ndarray(#samples x #features)
+ :type uncertain_samples_index: ndarray(#uncertain_samples)
+ :type nbr: int
+ :type diversity_lambda: float
- :return: Indexes of selected samples
- :rtype: ndarray
- """
- L = diversity_lambda
- m = samples.shape[0] # Number of samples
- samples_cpy = np.empty(samples.shape)
- samples_cpy[:] = samples
+ :return: Indexes of selected samples
+ :rtype: ndarray
+ """
+ L = diversity_lambda
+ m = samples.shape[0] # Number of samples
+ samples_cpy = np.empty(samples.shape)
+ samples_cpy[:] = samples
- selected_sample_index = uncertain_samples_index # At the begining, take all samples
+ selected_sample_index = uncertain_samples_index # At the begining, take all samples
- while (selected_sample_index.shape[0] > nbr) :
+ while (selected_sample_index.shape[0] > nbr) :
- dist_to_closest = distance_to_closest(samples_cpy)
- average_dist = average_distance(samples_cpy)
- discard = np.argmax(L*dist_to_closest + (1-L) * (1./m) * average_dist)
- selected_sample_index = np.delete(selected_sample_index, discard) # Remove the sample to discard
- samples_cpy = np.delete(samples_cpy, discard, axis=0)
-
- return selected_sample_index
+ dist_to_closest = distance_to_closest(samples_cpy)
+ average_dist = average_distance(samples_cpy)
+ discard = np.argmax(L*dist_to_closest + (1-L) * (1./m) * average_dist)
+ selected_sample_index = np.delete(selected_sample_index, discard) # Remove the sample to discard
+ samples_cpy = np.delete(samples_cpy, discard, axis=0)
+ return selected_sample_index
+
def distance_to_closest(samples) :
- """
- For each sample, computes the distance to its closest neighbour
+ """
+ For each sample, computes the distance to its closest neighbour
- :param samples: Samples to consider
- :type samples: ndarray(#samples x #features)
+ :param samples: Samples to consider
+ :type samples: ndarray(#samples x #features)
- :return: For each sample, the distance to its closest neighbour
- :rtype: ndarray(#samples)
- """
- dist_with_samples = rbf_kernel(samples, samples) # Distance between each samples (symetric matrix)
- np.fill_diagonal(dist_with_samples, np.NINF) # Do not take into acount the distance between a sample and itself (values on the diagonal)
- dist_with_closest = dist_with_samples.max(axis=0) # For each sample, the distance to the closest other sample
-
- return dist_with_closest
+ :return: For each sample, the distance to its closest neighbour
+ :rtype: ndarray(#samples)
+ """
+ dist_with_samples = rbf_kernel(samples, samples) # Distance between each samples (symetric matrix)
+ np.fill_diagonal(dist_with_samples, np.NINF) # Do not take into acount the distance between a sample and itself (values on the diagonal)
+ dist_with_closest = dist_with_samples.max(axis=0) # For each sample, the distance to the closest other sample
+ return dist_with_closest
+
def average_distance(samples) :
- """
- For each sample, computes the average distance to all other samples
+ """
+ For each sample, computes the average distance to all other samples
- :param samples: Samples to consider
- :type samples: ndarray(#samples x #features)
+ :param samples: Samples to consider
+ :type samples: ndarray(#samples x #features)
- :return: For each sample, the average distance to all other samples
- :rtype: ndarray(#samples)
- """
- samples = np.asarray(samples)
- nbr_samples = samples.shape[0]
- dist_with_samples = rbf_kernel(samples, samples)
- average_dist = (dist_with_samples.sum(axis=1) - 1)/(nbr_samples-1) # Remove dist to itself (=1)
-
- return average_dist
+ :return: For each sample, the average distance to all other samples
+ :rtype: ndarray(#samples)
+ """
+ samples = np.asarray(samples)
+ nbr_samples = samples.shape[0]
+ dist_with_samples = rbf_kernel(samples, samples)
+ average_dist = (dist_with_samples.sum(axis=1) - 1)/(nbr_samples-1) # Remove dist to itself (=1)
+ return average_dist
+
def learning(X_train, y_train, X_test, y_test, X_unlabeled, ID_unlabeled, steps, sample_selection) :
- """
- Train a SVM classifier with the training data, compute the score of the classifier based on testing data and
- make a class prediction for each sample in the unlabeled data.
- Find the best samples to label that would increase the most the classification score
+ """
+ Train a SVM classifier with the training data, compute the score of the classifier based on testing data and
+ make a class prediction for each sample in the unlabeled data.
+ Find the best samples to label that would increase the most the classification score
- :param X_train: Features of training samples
- :param y_train: Labels of training samples
- :param X_test: Features of test samples
- :param y_test: Labels of test samples
- :param X_unlabeled: Features of unlabeled samples
- :param ID_unlabeled: IDs of unlabeled samples
- :param steps: Number of samples to label
- :param sample_selection: Function used to select the samples to label (different heuristics)
+ :param X_train: Features of training samples
+ :param y_train: Labels of training samples
+ :param X_test: Features of test samples
+ :param y_test: Labels of test samples
+ :param X_unlabeled: Features of unlabeled samples
+ :param ID_unlabeled: IDs of unlabeled samples
+ :param steps: Number of samples to label
+ :param sample_selection: Function used to select the samples to label (different heuristics)
- :type X_train: ndarray(#samples x #features)
- :type y_train: ndarray(#samples)
- :type X_test: ndarray(#samples x #features)
- :type y_test: ndarray(#samples)
- :type X_unlabeled: ndarray(#samples x #features)
- :type ID_unlabeled: ndarray(#samples)
- :type steps: int
- :type samples_selection: callable
+ :type X_train: ndarray(#samples x #features)
+ :type y_train: ndarray(#samples)
+ :type X_test: ndarray(#samples x #features)
+ :type y_test: ndarray(#samples)
+ :type X_unlabeled: ndarray(#samples x #features)
+ :type ID_unlabeled: ndarray(#samples)
+ :type steps: int
+ :type samples_selection: callable
- :return: The IDs of samples to label, the score of the classifier and the prediction for all unlabeled samples
- :rtype indexes: ndarray(#steps)
- :rtype score: float
- :rtype predictions: ndarray(#unlabeled_samples)
- """
+ :return: The IDs of samples to label, the score of the classifier and the prediction for all unlabeled samples
+ :rtype indexes: ndarray(#steps)
+ :rtype score: float
+ :rtype predictions: ndarray(#unlabeled_samples)
+ """
- if(X_unlabeled.size == 0) :
- raise Exception("Pool of unlabeled samples empty")
+ if(X_unlabeled.size == 0) :
+ raise Exception("Pool of unlabeled samples empty")
- c_svm, gamma_parameter = SVM_parameters(options['c_svm'], options['gamma_parameter'], X_train, y_train, search_iter)
- gcore.message('Parameters used : C={}, gamma={}, lambda={}'.format(c_svm, gamma_parameter, diversity_lambda))
+ c_svm, gamma_parameter = SVM_parameters(options['c_svm'], options['gamma_parameter'], X_train, y_train, search_iter)
+ gcore.message('Parameters used : C={}, gamma={}, lambda={}'.format(c_svm, gamma_parameter, diversity_lambda))
- classifier = train(X_train, y_train, c_svm, gamma_parameter)
- score = classifier.score(X_test, y_test)
+ classifier = train(X_train, y_train, c_svm, gamma_parameter)
+ score = classifier.score(X_test, y_test)
- predictions = classifier.predict(X_unlabeled)
-
- samples_to_label = sample_selection(X_unlabeled, steps, classifier)
+ predictions = classifier.predict(X_unlabeled)
- return ID_unlabeled[samples_to_label], score, predictions
+ samples_to_label = sample_selection(X_unlabeled, steps, classifier)
+ return ID_unlabeled[samples_to_label], score, predictions
+
def SVM_parameters(c, gamma, X_train, y_train, n_iter) :
- """
- Determine the parameters (C and gamma) for the SVM classifier.
- If a parameter is specified in the parameters, keep this value.
- If it is not specified, compute the 'best' value by grid search (cross validation set)
+ """
+ Determine the parameters (C and gamma) for the SVM classifier.
+ If a parameter is specified in the parameters, keep this value.
+ If it is not specified, compute the 'best' value by grid search (cross validation set)
- :param c: Penalty parameter C of the error term.
- :param gamma: Kernel coefficient
- :param X_train: Features of the training samples
- :param y_train: Labels of the training samples
- :param n_iter: Number of parameter settings that are sampled. n_iter trades off runtime vs quality of the solution.
+ :param c: Penalty parameter C of the error term.
+ :param gamma: Kernel coefficient
+ :param X_train: Features of the training samples
+ :param y_train: Labels of the training samples
+ :param n_iter: Number of parameter settings that are sampled. n_iter trades off runtime vs quality of the solution.
- :type c: string
- :type gamma: string
- :type X_train: ndarray
- :type Y_train: ndarray
- :type n_iter: int
+ :type c: string
+ :type gamma: string
+ :type X_train: ndarray
+ :type Y_train: ndarray
+ :type n_iter: int
- :return: The c and gamma parameters
- :rtype: floats
- """
+ :return: The c and gamma parameters
+ :rtype: floats
+ """
- parameters = {}
- if c == '' or gamma == '':
- parameters = {'C': scipy.stats.expon(scale=100), 'gamma': scipy.stats.expon(scale=.1),
- 'kernel': ['rbf'], 'class_weight':['balanced', None]}
-
- if parameters != {} :
- svr = svm.SVC()
- clf = RandomizedSearchCV(svr, parameters, n_iter=n_iter, n_jobs=-1, verbose=0)
- clf.fit(X_train, y_train)
+ parameters = {}
+ if c == '' or gamma == '':
+ parameters = {'C': scipy.stats.expon(scale=100), 'gamma': scipy.stats.expon(scale=.1),
+ 'kernel': ['rbf'], 'class_weight':['balanced', None]}
- if c == '' :
- c = clf.best_params_['C']
- if gamma == '' :
- gamma = clf.best_params_['gamma']
- return float(c), float(gamma)
+ if parameters != {} :
+ svr = svm.SVC()
+ clf = RandomizedSearchCV(svr, parameters, n_iter=n_iter, n_jobs=-1, verbose=0)
+ clf.fit(X_train, y_train)
+ if c == '' :
+ c = clf.best_params_['C']
+ if gamma == '' :
+ gamma = clf.best_params_['gamma']
+ return float(c), float(gamma)
+
def main() :
- global learning_steps
- global diversity_lambda
- global nbr_uncertainty
- global search_iter
+ global learning_steps
+ global diversity_lambda
+ global nbr_uncertainty
+ global search_iter
- global svm, preprocessing, train_test_split, RandomizedSearchCV
- global StratifiedKFold, rbf_kernel
- try :
- from sklearn import svm
- from sklearn import preprocessing
- from sklearn.model_selection import train_test_split
- from sklearn.model_selection import RandomizedSearchCV
- from sklearn.model_selection import StratifiedKFold
- from sklearn.metrics.pairwise import rbf_kernel
- except ImportError :
- gcore.fatal("This module requires the scikit-learn python package. Please install it.")
+ global svm, preprocessing, train_test_split, RandomizedSearchCV
+ global StratifiedKFold, rbf_kernel
+ try :
+ from sklearn import svm
+ from sklearn import preprocessing
+ from sklearn.model_selection import train_test_split
+ from sklearn.model_selection import RandomizedSearchCV
+ from sklearn.model_selection import StratifiedKFold
+ from sklearn.metrics.pairwise import rbf_kernel
+ except ImportError :
+ gcore.fatal("This module requires the scikit-learn python package. Please install it.")
- learning_steps = int(options['learning_steps']) if options['learning_steps'] != '0' else 5
- search_iter = int(options['search_iter']) if options['search_iter'] != '0' else 10 # Number of samples to label at each iteration
- diversity_lambda = float(options['diversity_lambda']) if options['diversity_lambda'] != '' else 0.25 # Lambda parameter used in the diversity heuristic
- nbr_uncertainty = int(options['nbr_uncertainty']) if options['nbr_uncertainty'] != '0' else 15 # Number of samples to select (based on uncertainty criterion) before applying the diversity criterion. Must be at least greater or equal to [LEARNING][steps]
-
- X_train, ID_train, y_train, header_train = load_data(options['training_set'], labeled = True)
- X_test, ID_test, y_test, header_test = load_data(options['test_set'], labeled = True)
- X_unlabeled, ID_unlabeled, y_unlabeled, header_unlabeled = load_data(options['unlabeled_set'])
-
- nbr_train = ID_train.shape[0]
+ learning_steps = int(options['learning_steps']) if options['learning_steps'] != '0' else 5
+ search_iter = int(options['search_iter']) if options['search_iter'] != '0' else 10 # Number of samples to label at each iteration
+ diversity_lambda = float(options['diversity_lambda']) if options['diversity_lambda'] != '' else 0.25 # Lambda parameter used in the diversity heuristic
+ nbr_uncertainty = int(options['nbr_uncertainty']) if options['nbr_uncertainty'] != '0' else 15 # Number of samples to select (based on uncertainty criterion) before applying the diversity criterion. Must be at least greater or equal to [LEARNING][steps]
- if (options['update'] !='') : # If an update file has been specified, transfer samples
- X_train, ID_train, y_train = update(options['update'], X_train, ID_train, y_train, X_unlabeled, ID_unlabeled)
- if (options['training_updated'] != '' or options['unlabeled_updated'] != '') :
- write_update(options['update'], options['training_set'], options['unlabeled_set'], options['training_updated'], options['unlabeled_updated'])
- elif (options['update'] =='' and (options['training_updated'] != '' or options['unlabeled_updated'] != '')) :
- gcore.warning('No update file specified : could not write the updated files.')
- nbr_new_train = ID_train.shape[0]
+ X_train, ID_train, y_train, header_train = load_data(options['training_set'], labeled = True)
+ X_test, ID_test, y_test, header_test = load_data(options['test_set'], labeled = True)
+ X_unlabeled, ID_unlabeled, y_unlabeled, header_unlabeled = load_data(options['unlabeled_set'])
- samples_to_label_IDs, score, predictions = learning(X_train, y_train, X_test, y_test, X_unlabeled, ID_unlabeled, learning_steps, active_diversity_sample_selection)
-
- X_unlabeled, ID_unlabeled, y_unlabeled, header_unlabeled = load_data(options['unlabeled_set'], scale=False) # Load unscaled data
+ nbr_train = ID_train.shape[0]
- predictions_file = options['predictions']
- if (predictions_file != '') : # Write the class prediction only if an output file has been specified by the user
- write_result_file(ID_unlabeled, X_unlabeled, predictions, header_unlabeled, predictions_file)
- gcore.message("Class predictions written to {}".format(predictions_file))
+ if (options['update'] !='') : # If an update file has been specified, transfer samples
+ X_train, ID_train, y_train = update(options['update'], X_train, ID_train, y_train, X_unlabeled, ID_unlabeled)
+ if (options['training_updated'] != '' or options['unlabeled_updated'] != '') :
+ write_update(options['update'], options['training_set'], options['unlabeled_set'], options['training_updated'], options['unlabeled_updated'])
+ elif (options['update'] =='' and (options['training_updated'] != '' or options['unlabeled_updated'] != '')) :
+ gcore.warning('No update file specified : could not write the updated files.')
+ nbr_new_train = ID_train.shape[0]
+ samples_to_label_IDs, score, predictions = learning(X_train, y_train, X_test, y_test, X_unlabeled, ID_unlabeled, learning_steps, active_diversity_sample_selection)
- gcore.message('Training set : {}'.format(X_train.shape[0]))
- gcore.message('Test set : {}'.format(X_test.shape[0]))
- gcore.message('Unlabeled set : {}'.format(X_unlabeled.shape[0] - (nbr_new_train - nbr_train)))
- gcore.message('Score : {}'.format(score))
+ X_unlabeled, ID_unlabeled, y_unlabeled, header_unlabeled = load_data(options['unlabeled_set'], scale=False) # Load unscaled data
- for ID in samples_to_label_IDs :
- print(int(ID))
+ predictions_file = options['predictions']
+ if (predictions_file != '') : # Write the class prediction only if an output file has been specified by the user
+ write_result_file(ID_unlabeled, X_unlabeled, predictions, header_unlabeled, predictions_file)
+ gcore.message("Class predictions written to {}".format(predictions_file))
+ gcore.message('Training set : {}'.format(X_train.shape[0]))
+ gcore.message('Test set : {}'.format(X_test.shape[0]))
+ gcore.message('Unlabeled set : {}'.format(X_unlabeled.shape[0] - (nbr_new_train - nbr_train)))
+ gcore.message('Score : {}'.format(score))
+
+ for ID in samples_to_label_IDs :
+ print(int(ID))
+
+
if __name__ == '__main__' :
- options, flags = grass.script.parser()
- main()
+ options, flags = grass.script.parser()
+ main()
Modified: grass-addons/grass7/raster/r.object.spatialautocor/r.object.spatialautocor.py
===================================================================
--- grass-addons/grass7/raster/r.object.spatialautocor/r.object.spatialautocor.py 2019-01-17 20:42:28 UTC (rev 73966)
+++ grass-addons/grass7/raster/r.object.spatialautocor/r.object.spatialautocor.py 2019-01-17 20:53:33 UTC (rev 73967)
@@ -102,48 +102,48 @@
""" Calculate either Moran's I or Geary's C for values of the given raster """
raster_vars = gscript.parse_command('r.univar',
- map_=raster,
- flags='g',
- quiet=True)
+ map_=raster,
+ flags='g',
+ quiet=True)
global_mean = float(raster_vars['mean'])
univar_res = gscript.read_command('r.univar',
- flags='t',
- map_=raster,
- zones=mapname,
- out='-',
- sep='comma',
- quiet=True)
+ flags='t',
+ map_=raster,
+ zones=mapname,
+ out='-',
+ sep='comma',
+ quiet=True)
means = {}
mean_diffs = {}
firstline = True
for line in univar_res.splitlines():
- l = line.split(',')
- if firstline:
- i = l.index('mean')
- firstline = False
- else:
- means[l[0]] = float(l[i])
- mean_diffs[l[0]] = float(l[i]) - global_mean
+ l = line.split(',')
+ if firstline:
+ i = l.index('mean')
+ firstline = False
+ else:
+ means[l[0]] = float(l[i])
+ mean_diffs[l[0]] = float(l[i]) - global_mean
sum_sq_mean_diffs = sum(x**2 for x in mean_diffs.values())
total_nb_neighbors = 0
for region in neighbordict:
- total_nb_neighbors += len(neighbordict[region])
+ total_nb_neighbors += len(neighbordict[region])
N = len(means)
sum_products = 0
sum_squared_differences = 0
for region in neighbordict:
- region_value = means[region] - global_mean
- neighbors = neighbordict[region]
- nb_neighbors = len(neighbors)
- for neighbor in neighbors:
- neighbor_value = means[neighbor] - global_mean
- sum_products += region_value * neighbor_value
- sum_squared_differences = ( means[region] - means[neighbor] ) ** 2
+ region_value = means[region] - global_mean
+ neighbors = neighbordict[region]
+ nb_neighbors = len(neighbors)
+ for neighbor in neighbors:
+ neighbor_value = means[neighbor] - global_mean
+ sum_products += region_value * neighbor_value
+ sum_squared_differences = ( means[region] - means[neighbor] ) ** 2
if method == 'moran':
autocor = ( ( float(N) / total_nb_neighbors ) * (float(sum_products) / sum_sq_mean_diffs ) )
Modified: grass-addons/grass7/raster/r.to.vect.tiled/r.to.vect.tiled.py
===================================================================
--- grass-addons/grass7/raster/r.to.vect.tiled/r.to.vect.tiled.py 2019-01-17 20:42:28 UTC (rev 73966)
+++ grass-addons/grass7/raster/r.to.vect.tiled/r.to.vect.tiled.py 2019-01-17 20:53:33 UTC (rev 73967)
@@ -117,31 +117,31 @@
rtvflags=""
for key in 'sbtvz':
- if flags[key]:
- rtvflags += key
+ if flags[key]:
+ rtvflags += key
# check options
if xtiles <= 0:
- grass.fatal(_("Number of tiles in x direction must be > 0"))
+ grass.fatal(_("Number of tiles in x direction must be > 0"))
if ytiles < 0:
- grass.fatal(_("Number of tiles in y direction must be > 0"))
+ grass.fatal(_("Number of tiles in y direction must be > 0"))
if grass.find_file(name = input)['name'] == '':
- grass.fatal(_("Input raster %s not found") % input)
+ grass.fatal(_("Input raster %s not found") % input)
grass.use_temp_region()
curr = grass.region()
width = int(curr['cols'] / xtiles)
if width <= 1:
- grass.fatal("The requested number of tiles in x direction is too large")
+ grass.fatal("The requested number of tiles in x direction is too large")
height = int(curr['rows'] / ytiles)
if height <= 1:
- grass.fatal("The requested number of tiles in y direction is too large")
+ grass.fatal("The requested number of tiles in y direction is too large")
do_clip = False
overlap = 0
if flags['s'] and ftype == 'area':
- do_clip = True
- overlap = 2
+ do_clip = True
+ overlap = 2
ewres = curr['ewres']
nsres = curr['nsres']
@@ -153,11 +153,11 @@
e = curr['e']
w = curr['w'] + xoverlap
if w >= e:
- grass.fatal(_("Overlap is too large"))
+ grass.fatal(_("Overlap is too large"))
n = curr['n'] - yoverlap
s = curr['s']
if s >= n:
- grass.fatal(_("Overlap is too large"))
+ grass.fatal(_("Overlap is too large"))
datatype = grass.raster_info(input)['datatype']
vtiles = None
@@ -164,88 +164,88 @@
# north to south
for ytile in range(ytiles):
- n = curr['n'] - ytile * height * nsres
- s = n - height * nsres - yoverlap
- if ytile == ytiles - 1:
- s = curr['s']
- # west to east
- for xtile in range(xtiles):
- w = curr['w'] + xtile * width * ewres
- e = w + width * ewres + xoverlap
+ n = curr['n'] - ytile * height * nsres
+ s = n - height * nsres - yoverlap
+ if ytile == ytiles - 1:
+ s = curr['s']
+ # west to east
+ for xtile in range(xtiles):
+ w = curr['w'] + xtile * width * ewres
+ e = w + width * ewres + xoverlap
- if xtile == xtiles - 1:
- e = curr['e']
+ if xtile == xtiles - 1:
+ e = curr['e']
- grass.run_command('g.region', n = n, s = s, e = e, w = w, nsres = nsres, ewres = ewres)
-
- if do_clip:
- tilename = output + '_stile_' + str(ytile) + str(xtile)
- else:
- tilename = output + '_tile_' + str(ytile) + str(xtile)
+ grass.run_command('g.region', n = n, s = s, e = e, w = w, nsres = nsres, ewres = ewres)
- outname = output + '_tile_' + str(ytile) + str(xtile)
+ if do_clip:
+ tilename = output + '_stile_' + str(ytile) + str(xtile)
+ else:
+ tilename = output + '_tile_' + str(ytile) + str(xtile)
- grass.run_command('r.to.vect', input = input, output = tilename,
- type = ftype, column = column, flags = rtvflags)
+ outname = output + '_tile_' + str(ytile) + str(xtile)
- if do_clip:
- n2 = curr['n'] - ytile * height * nsres - yoverlap2
- s2 = n2 - height * nsres
- if ytile == 0:
- n2 = curr['n']
- s2 = n2 - height * nsres - yoverlap2
- if ytile == ytiles - 1:
- s2 = curr['s']
+ grass.run_command('r.to.vect', input = input, output = tilename,
+ type = ftype, column = column, flags = rtvflags)
- w2 = curr['w'] + xtile * width * ewres + xoverlap2
- e2 = w2 + width * ewres
- if xtile == 0:
- w2 = curr['w']
- e2 = w2 + width * ewres + xoverlap2
- if xtile == xtiles - 1:
- e2 = curr['e']
+ if do_clip:
+ n2 = curr['n'] - ytile * height * nsres - yoverlap2
+ s2 = n2 - height * nsres
+ if ytile == 0:
+ n2 = curr['n']
+ s2 = n2 - height * nsres - yoverlap2
+ if ytile == ytiles - 1:
+ s2 = curr['s']
- tilename = output + '_stile_' + str(ytile) + str(xtile)
- if grass.vector_info_topo(tilename)['areas'] > 0:
- grass.run_command('g.region', n = n2, s = s2, e = e2, w = w2,
- nsres = nsres, ewres = ewres)
-
- extname = 'extent_tile_' + str(ytile) + str(xtile)
- grass.run_command('v.in.region', output = extname, flags = 'd')
- outname = output + '_tile_' + str(ytile) + str(xtile)
- grass.run_command('v.overlay', ainput = tilename, binput = extname,
- output = outname, operator = 'and', olayer = '0,1,0')
- grass.run_command('g.remove', flags='f', type='vector', name= extname, quiet = True)
+ w2 = curr['w'] + xtile * width * ewres + xoverlap2
+ e2 = w2 + width * ewres
+ if xtile == 0:
+ w2 = curr['w']
+ e2 = w2 + width * ewres + xoverlap2
+ if xtile == xtiles - 1:
+ e2 = curr['e']
- if vtiles is None:
- vtiles = outname
- else:
- vtiles = vtiles + ',' + outname
+ tilename = output + '_stile_' + str(ytile) + str(xtile)
+ if grass.vector_info_topo(tilename)['areas'] > 0:
+ grass.run_command('g.region', n = n2, s = s2, e = e2, w = w2,
+ nsres = nsres, ewres = ewres)
- grass.run_command('g.remove', flags='f', type='vector', name= tilename, quiet = True)
+ extname = 'extent_tile_' + str(ytile) + str(xtile)
+ grass.run_command('v.in.region', output = extname, flags = 'd')
+ outname = output + '_tile_' + str(ytile) + str(xtile)
+ grass.run_command('v.overlay', ainput = tilename, binput = extname,
+ output = outname, operator = 'and', olayer = '0,1,0')
+ grass.run_command('g.remove', flags='f', type='vector', name= extname, quiet = True)
- else:
- # write cmd history:
- grass.vector_history(outname)
- if vtiles is None:
- vtiles = outname
- else:
- vtiles = vtiles + ',' + outname
+ if vtiles is None:
+ vtiles = outname
+ else:
+ vtiles = vtiles + ',' + outname
+ grass.run_command('g.remove', flags='f', type='vector', name= tilename, quiet = True)
+
+ else:
+ # write cmd history:
+ grass.vector_history(outname)
+ if vtiles is None:
+ vtiles = outname
+ else:
+ vtiles = vtiles + ',' + outname
+
if flags['p']:
- grass.run_command('v.patch', input = vtiles, output = output,
- flags = 'e')
+ grass.run_command('v.patch', input = vtiles, output = output,
+ flags = 'e')
grass.run_command('g.remove', flags='f', type='vector', name= vtiles, quiet = True)
-
- if grass.vector_info_topo(output)['boundaries'] > 0:
- outpatch = output + '_patch'
- grass.run_command('g.rename', vector = (output,outpatch))
- grass.run_command('v.clean', input = outpatch, output = output,
- tool = 'break', flags = 'c')
- grass.run_command('g.remove', flags='f', type='vector', name= outpatch)
+ if grass.vector_info_topo(output)['boundaries'] > 0:
+ outpatch = output + '_patch'
+ grass.run_command('g.rename', vector = (output,outpatch))
+ grass.run_command('v.clean', input = outpatch, output = output,
+ tool = 'break', flags = 'c')
+ grass.run_command('g.remove', flags='f', type='vector', name= outpatch)
+
grass.message(_("%s complete") % 'r.to.vect.tiled')
return 0
More information about the grass-commit
mailing list