[GRASS-SVN] r66993 - grass-addons/grass7/raster/r.series.filter
svn_grass at osgeo.org
svn_grass at osgeo.org
Mon Nov 30 15:46:18 PST 2015
Author: DmitryKolesov
Date: 2015-11-30 15:46:18 -0800 (Mon, 30 Nov 2015)
New Revision: 66993
Modified:
grass-addons/grass7/raster/r.series.filter/r.series.filter.html
grass-addons/grass7/raster/r.series.filter/r.series.filter.py
Log:
r.series.filter: procedure for parameter optimization
Modified: grass-addons/grass7/raster/r.series.filter/r.series.filter.html
===================================================================
--- grass-addons/grass7/raster/r.series.filter/r.series.filter.html 2015-11-30 21:20:33 UTC (rev 66992)
+++ grass-addons/grass7/raster/r.series.filter/r.series.filter.html 2015-11-30 23:46:18 UTC (rev 66993)
@@ -1,21 +1,53 @@
<h2>DESCRIPTION</h2>
-
+<em>-c</em> Find optimal parameters of used filter. The function to optimize depends on
+ difference between original and filtered signals and on derivates of the
+ filtered signal.
+<p>
<em>r.series.filter</em> is a module to filter raster time series <em>X</em>
in time domain.
<p>
<em>input</em> Raster names of equally spaced time series <em>X</em>.
<p>
<em>result_prefix</em> Prefix for raster names of filterd <em>X</em>.
+<p>
<em>method</em> filtering method. Savitzky-Golay filter is only implemented now.
+<p>
<em>winsize</em> The length of the filter window. <em>winsize</em> must be a positive odd integer.
-<em>order</em> The order of the polynomial used to fit the samples. <em>order</em> must be less than <em>winsize</em>.
+<p>
+<em>order</em> The order of the polynomial used to fit the samples. <em>order</em> must be less than <em>winsize</em>.
+<p>
+<em>opt_points</em> If <em>-c</em> is specifed, then random sample
+ <em>opt_points</em> and use them in parameter optimization.
+<p>
+<em>diff_penalty</em> Penalty for difference between original and filtered signals (see Notes).
+<p>
+<em>deriv_penalty</em> Penalty for derivates of filtered signal (see Notes).
+
<h2>NOTES</h2>
<em>X</em> must be equally spaced time series. If the series isn't equally
spaced, insert NULL raster maps into <em>X</em>.
+<p>
+There is a procedure for searching for good filtering parameters: it uses <em>opt_points</em>
+random points and perfoms filtering in that points. The result of the filtering can be tested
+for quality. The quality function is a tradeof two features: accuracy and smoothing.
+Accuracy can be estimated as the (abs) difference between original and filtered data,
+quality of smoothing can be estimated as absalute values of the derivates. So there are two
+parameters <em>diff_penalty</em> and <em>deriv_penalty</em> that can ajust the tradeof.
+<p>
+So the optimizing procedure performs loop over filtering parameters and calculates
+ thequality function
+<div class="code"><pre>
+penalty =quality diff_penalty * sum(abs(Xi-Fi)) + sum(abs(dFi))
+</pre></div>
+where <em>Xi</em> are original signals in the samplig points, <em>Fi</em>
+are filtered signals in the sampling points.
+<p>
+Optimal parameters are used for signal filtering in the whole region.
+
<h2>EXAMPLES</h2>
Create test data: <em>X = sin(t) + E</em>,
where <em>X</em> is raster time series, <em>E</em> is a error term.
Modified: grass-addons/grass7/raster/r.series.filter/r.series.filter.py
===================================================================
--- grass-addons/grass7/raster/r.series.filter/r.series.filter.py 2015-11-30 21:20:33 UTC (rev 66992)
+++ grass-addons/grass7/raster/r.series.filter/r.series.filter.py 2015-11-30 23:46:18 UTC (rev 66993)
@@ -21,6 +21,11 @@
#% description: Perform filtering of raster time series X (in time domain)
#% overwrite: yes
#%End
+#%flag
+#% key: c
+#% description: Try to find optimal parameters for filtering
+#% guisection: Parameters
+#%end
#%option
#% key: input
#% type: string
@@ -62,9 +67,35 @@
#% multiple: no
#% description: Order of the Savitzky-Golay filter
#%end
+#%option
+#% key: opt_points
+#% type: integer
+#% answer: 50
+#% required: no
+#% multiple: no
+#% description: Count of random points used for parameter optimization
+#%end
+#%option
+#% key: diff_penalty
+#% type: double
+#% answer: 1.0
+#% required: no
+#% multiple: no
+#% description: Penalty for difference between original and filtered signals
+#%end
+#%option
+#% key: deriv_penalty
+#% type: double
+#% answer: 1.0
+#% required: no
+#% multiple: no
+#% description: Penalty for big derivates of the filtered signal
+#%end
+
+
import os
import sys
@@ -121,7 +152,7 @@
_, cols = row_data.shape
for i in range(cols):
arr = _fill_nulls(row_data[:, i])
- arr = savgol_filter(arr, winsize, order)
+ arr = savgol_filter(arr, winsize, order, mode='nearest')
result[:, i] = arr
return result
@@ -139,6 +170,68 @@
return arr
+def fitting_quality(input_data, fitted_data, diff_penalty=1.0, deriv_penalty=1.0):
+ """Returns penalty for fitted curves:
+ :param input_data: 2d array of samples
+ :param fitted_data: result of the fitting
+ :param diff_penalty: penalty for difference between original data and fitted data
+ :param deriv_penalty: penalty for derivations of the fitted data
+ """
+ difference = sum(abs(input_data.flatten() - fitted_data.flatten()))
+ deriv_diff = sum(abs(np.diff(fitted_data, axis=0).flatten()))
+
+ return diff_penalty * difference + deriv_penalty * deriv_diff
+
+
+def optimize_params(names, npoints, diff_penalty, deriv_penalty):
+ """Perform crossvalidation:
+ take 'npoints' random points,
+ find winsize and order that minimize the quality function
+ """
+
+ reg = Region()
+ map_count = len(names)
+ input_data = np.empty((map_count, npoints), dtype=float)
+ inputs = init_rasters(names)
+ # Select sample data points
+ try:
+ open_rasters(inputs)
+ i = attempt = 0
+ while i < npoints:
+ row = np.random.randint(reg.rows)
+ col = np.random.randint(reg.cols)
+ for map_num in range(len(inputs)):
+ map = inputs[map_num]
+ input_data[map_num, i] = get_val_or_nan(map, row=row, col=col)
+ if not all(np.isnan(input_data[:, i])):
+ i += 1
+ attempt = 0
+ else:
+ attempt += 1
+ grass.warning('Selected point contains NULL values in all input maps. Performing of selection another point.')
+ if attempt >= npoints:
+ grass.fatal("Can't find points with non NULL data.")
+
+ finally:
+ close_rasters(inputs)
+
+ # Find the optima
+ best = np.inf
+ best_winsize = best_order = None
+ for winsize in range(5, map_count/2, 2):
+ for order in range(2, min(winsize - 2, 10)): # 10 is a 'magic' number: we don't want very hight polynomyal fitting usually
+ for i in range(npoints):
+ test_data = np.copy(input_data)
+ test_data = _filter(test_data, winsize, order)
+ penalty = fitting_quality(input_data, test_data,
+ diff_penalty, deriv_penalty)
+ if penalty < best:
+ best = penalty
+ best_winsize, best_order = winsize, order
+
+ return best_winsize, best_order
+
+
def filter(names, winsize, order, prefix):
inputs = init_rasters(names)
output_names = [prefix + name for name in names]
@@ -150,7 +243,7 @@
reg = Region()
for i in range(reg.rows):
# import ipdb; ipdb.set_trace()
- row_data = np.array([_get_val_or_nan(r, i) for r in inputs])
+ row_data = np.array([_get_row_or_nan(r, i) for r in inputs])
filtered_rows = _filter(row_data, winsize, order)
for map_num in range(len(outputs)):
map = outputs[map_num]
@@ -161,7 +254,18 @@
close_rasters(outputs)
close_rasters(inputs)
-def _get_val_or_nan(raster, row_num):
+
+def get_val_or_nan(map, row, col):
+ """
+ Return map value of the cell or FNULL (if the cell is null)
+ """
+ value = map.get(row, col)
+ if map.mtype == "CELL" and value == CNULL:
+ value = FNULL
+ return value
+
+
+def _get_row_or_nan(raster, row_num):
row = raster.get_row(row_num)
if raster.mtype != 'CELL':
return row
@@ -171,6 +275,9 @@
return row
def main(options, flags):
+
+ optimize = flags['c']
+
xnames = options['input']
xnames = xnames.split(',')
@@ -180,6 +287,16 @@
order = options['order']
order = int(order)
+ opt_points = options['opt_points']
+ opt_points = int(opt_points)
+
+ diff_penalty = options['diff_penalty']
+ diff_penalty = float(diff_penalty)
+
+ deriv_penalty = options['deriv_penalty']
+ deriv_penalty = float(deriv_penalty)
+
+
res_prefix = options['result_prefix']
N = len(xnames)
@@ -195,6 +312,13 @@
if order >= winsize:
grass.error("Order of the filter must be less than window length")
+ if optimize:
+ winsize, order = optimize_params(xnames, opt_points,
+ diff_penalty, deriv_penalty)
+ if winsize is None:
+ grass.error("Optimization procedure doesn't convergence.")
+ sys.exit(1)
+
filter(xnames, winsize, order, res_prefix)
if __name__ == "__main__":
More information about the grass-commit
mailing list