[GRASS-SVN] r71626 - grass-addons/grass7/imagery/i.segment.stats
svn_grass at osgeo.org
svn_grass at osgeo.org
Thu Nov 2 06:27:22 PDT 2017
Author: mlennert
Date: 2017-11-02 06:27:22 -0700 (Thu, 02 Nov 2017)
New Revision: 71626
Modified:
grass-addons/grass7/imagery/i.segment.stats/i.segment.stats.html
grass-addons/grass7/imagery/i.segment.stats/i.segment.stats.py
Log:
i.segment.stats: adding option to calculate raster stats in parallel (fixes #3427)
Modified: grass-addons/grass7/imagery/i.segment.stats/i.segment.stats.html
===================================================================
--- grass-addons/grass7/imagery/i.segment.stats/i.segment.stats.html 2017-11-02 13:14:12 UTC (rev 71625)
+++ grass-addons/grass7/imagery/i.segment.stats/i.segment.stats.html 2017-11-02 13:27:22 UTC (rev 71626)
@@ -45,7 +45,13 @@
can be very time-consuming. In that case, it might be easier to only work
with the <b>csvfile</b> output.
+<p>
+The processing of several raster input files for which to calculate per-segment
+statistics can be parallelized by setting the <b>processes</b> parameter to the
+number of desired parallel processes, with at most one process per raster to
+be treated.
+
<h2>EXAMPLE</h2>
<div class="code"><pre>
@@ -53,7 +59,8 @@
g.region rast=lsat7_2002_80 -p
i.segment group=landsat_pan output=ls_pan_seg01 threshold=0.1 memory=4000 minsize=50
i.segment.stats map=ls_pan_seg01 csvfile=segstats.csv vectormap=ls_pan_seg01 \
- rasters=lsat7_2002_10,lsat7_2002_20,lsat7_2002_30,lsat7_2002_40,lsat7_2002_50,lsat7_2002_70
+ rasters=lsat7_2002_10,lsat7_2002_20,lsat7_2002_30,lsat7_2002_40,lsat7_2002_50,lsat7_2002_70 \
+ processes=4
</pre></div>
<h2>SEE ALSO</h2>
Modified: grass-addons/grass7/imagery/i.segment.stats/i.segment.stats.py
===================================================================
--- grass-addons/grass7/imagery/i.segment.stats/i.segment.stats.py 2017-11-02 13:14:12 UTC (rev 71625)
+++ grass-addons/grass7/imagery/i.segment.stats/i.segment.stats.py 2017-11-02 13:27:22 UTC (rev 71626)
@@ -72,7 +72,7 @@
#%option
#% key: processes
#% type: integer
-#% description: Number of processes to run in parallel
+#% description: Number of processes to run in parallel (for multiple rasters)
#% required: no
#% answer: 1
#%end
@@ -91,11 +91,13 @@
import os
+import glob
import atexit
import collections
import math
import grass.script as gscript
-
+from functools import partial
+from multiprocessing import Pool
def cleanup():
@@ -109,7 +111,27 @@
if stats_temp_file:
os.remove(stats_temp_file)
+ if rasters:
+ for tempfile in glob.glob(stats_temp_file + ".*"):
+ os.remove(tempfile)
+def worker(segment_map, stat_temp_file, raster):
+
+ rastername = raster.split('@')[0]
+ rastername = rastername.replace('.', '_')
+ temp_file = stat_temp_file + '.' + rastername
+ if not gscript.find_file(raster, element='cell')['name']:
+ gscript.message(_("Cannot find raster %s" % raster))
+ return
+ gscript.run_command('r.univar',
+ map_=raster,
+ zones=segment_map,
+ output=temp_file,
+ flags='et',
+ overwrite=True,
+ quiet=True)
+
+
def main():
global insert_sql
@@ -122,6 +144,7 @@
segment_map = options['map']
csvfile = options['csvfile'] if options['csvfile'] else []
vectormap = options['vectormap'] if options['vectormap'] else []
+ global rasters
rasters = options['rasters'].split(',') if options['rasters'] else []
area_measures = options['area_measures'].split(',') if (options['area_measures'] and not flags['s']) else []
if area_measures:
@@ -133,6 +156,7 @@
raster_statistics = options['raster_statistics'].split(',') if options['raster_statistics'] else []
separator = gscript.separator(options['separator'])
+ processes = int(options['processes'])
output_header = ['cat']
output_dict = collections.defaultdict(list)
@@ -171,35 +195,34 @@
output_dict[values[0]] = [values[x] for x in stat_indices]
if rasters:
+ gscript.message(_("Calculating statistics for raster maps..."))
+ if len(rasters) < processes:
+ processes = len(rasters)
+ gscript.message(_("Only one process per raster. Reduced number of processes to %i." % processes))
stat_indices = [raster_stat_dict[x] for x in raster_statistics]
- for raster in rasters:
- gscript.message(_("Calculating statistics for raster map <%s>..." % raster))
- if not gscript.find_file(raster, element='cell')['name']:
- gscript.message(_("Cannot find raster %s" % raster))
- continue
- rastername = raster.split('@')[0]
- rastername = rastername.replace('.', '_')
- output_header += [rastername + "_" + x for x in raster_statistics]
- gscript.run_command('r.univar',
- map_=raster,
- zones=segment_map,
- output=stats_temp_file,
- flags='et',
- overwrite=True,
- quiet=True)
+ pool = Pool(processes)
+ func = partial(worker, segment_map, stats_temp_file)
+ pool.map(func, rasters)
+ pool.close()
+ pool.join()
- firstline = True
- with open(stats_temp_file, 'r') as fin:
- for line in fin:
- if firstline:
- firstline = False
- continue
- values = line.rstrip().split('|')
- values = line.rstrip().split('|')
- if area_measures:
- output_dict[values[0]] = output_dict[values[0]]+ [values[x] for x in stat_indices]
- else:
- output_dict[values[0]] = [values[x] for x in stat_indices]
+ for raster in rasters:
+ rastername = raster.split('@')[0]
+ rastername = rastername.replace('.', '_')
+ temp_file = stats_temp_file + '.' + rastername
+ output_header += [rastername + "_" + x for x in raster_statistics]
+ firstline = True
+ with open(temp_file, 'r') as fin:
+ for line in fin:
+ if firstline:
+ firstline = False
+ continue
+ values = line.rstrip().split('|')
+ values = line.rstrip().split('|')
+ if area_measures:
+ output_dict[values[0]] = output_dict[values[0]]+ [values[x] for x in stat_indices]
+ else:
+ output_dict[values[0]] = [values[x] for x in stat_indices]
message = _("Some values could not be calculated for the objects below. ")
message += _("These objects are thus not included in the results. ")
More information about the grass-commit
mailing list