[GRASS-SVN] r71626 - grass-addons/grass7/imagery/i.segment.stats

Thu Nov 2 06:27:22 PDT 2017

Author: mlennert
Date: 2017-11-02 06:27:22 -0700 (Thu, 02 Nov 2017)
New Revision: 71626

Modified:
   grass-addons/grass7/imagery/i.segment.stats/i.segment.stats.html
   grass-addons/grass7/imagery/i.segment.stats/i.segment.stats.py
Log:
i.segment.stats: adding option to calculate raster stats in parallel (fixes #3427)

Modified: grass-addons/grass7/imagery/i.segment.stats/i.segment.stats.html
===================================================================

--- grass-addons/grass7/imagery/i.segment.stats/i.segment.stats.html	2017-11-02 13:14:12 UTC (rev 71625)
+++ grass-addons/grass7/imagery/i.segment.stats/i.segment.stats.html	2017-11-02 13:27:22 UTC (rev 71626)
@@ -45,7 +45,13 @@
 can be very time-consuming. In that case, it might be easier to only work
 with the <b>csvfile</b> output.
 
+<p>
+The processing of several raster input files for which to calculate per-segment
+statistics can be parallelized by setting the <b>processes</b> parameter to the
+number of desired parallel processes, with at most one process per raster to 
+be treated.
 
+
 <h2>EXAMPLE</h2>
 
 <div class="code"><pre>
@@ -53,7 +59,8 @@
 g.region rast=lsat7_2002_80 -p
 i.segment group=landsat_pan output=ls_pan_seg01 threshold=0.1 memory=4000 minsize=50
 i.segment.stats map=ls_pan_seg01 csvfile=segstats.csv vectormap=ls_pan_seg01 \
-  rasters=lsat7_2002_10,lsat7_2002_20,lsat7_2002_30,lsat7_2002_40,lsat7_2002_50,lsat7_2002_70
+  rasters=lsat7_2002_10,lsat7_2002_20,lsat7_2002_30,lsat7_2002_40,lsat7_2002_50,lsat7_2002_70 \
+  processes=4
 </pre></div>
 
 <h2>SEE ALSO</h2>

Modified: grass-addons/grass7/imagery/i.segment.stats/i.segment.stats.py
===================================================================
--- grass-addons/grass7/imagery/i.segment.stats/i.segment.stats.py	2017-11-02 13:14:12 UTC (rev 71625)
+++ grass-addons/grass7/imagery/i.segment.stats/i.segment.stats.py	2017-11-02 13:27:22 UTC (rev 71626)
@@ -72,7 +72,7 @@
 #%option
 #% key: processes
 #% type: integer
-#% description: Number of processes to run in parallel
+#% description: Number of processes to run in parallel (for multiple rasters)
 #% required: no
 #% answer: 1
 #%end
@@ -91,11 +91,13 @@
 
 
 import os
+import glob
 import atexit
 import collections
 import math
 import grass.script as gscript
-    
+from functools import partial    
+from multiprocessing import Pool
 
 def cleanup():
 
@@ -109,7 +111,27 @@
     if stats_temp_file:
         os.remove(stats_temp_file)
 
+    if rasters:
+        for tempfile in glob.glob(stats_temp_file + ".*"):
+            os.remove(tempfile)
 
+def worker(segment_map, stat_temp_file, raster):
+
+    rastername = raster.split('@')[0]
+    rastername = rastername.replace('.', '_')
+    temp_file = stat_temp_file + '.' + rastername
+    if not gscript.find_file(raster, element='cell')['name']:
+        gscript.message(_("Cannot find raster %s" % raster))
+        return
+    gscript.run_command('r.univar',
+                        map_=raster,
+                        zones=segment_map,
+                        output=temp_file,
+                        flags='et',
+                        overwrite=True,
+                        quiet=True)
+
+
 def main():
 
     global insert_sql
@@ -122,6 +144,7 @@
     segment_map = options['map']
     csvfile = options['csvfile'] if options['csvfile'] else []
     vectormap = options['vectormap'] if options['vectormap'] else []
+    global rasters
     rasters = options['rasters'].split(',') if options['rasters'] else []
     area_measures = options['area_measures'].split(',') if (options['area_measures'] and not flags['s']) else []
     if area_measures:
@@ -133,6 +156,7 @@
 
     raster_statistics = options['raster_statistics'].split(',') if options['raster_statistics'] else []
     separator = gscript.separator(options['separator'])
+    processes = int(options['processes'])
 
     output_header = ['cat']
     output_dict = collections.defaultdict(list)
@@ -171,35 +195,34 @@
 		output_dict[values[0]] = [values[x] for x in stat_indices]
 
     if rasters:
+        gscript.message(_("Calculating statistics for raster maps..."))
+        if len(rasters) < processes:
+            processes = len(rasters)
+            gscript.message(_("Only one process per raster. Reduced number of processes to %i." % processes))
         stat_indices = [raster_stat_dict[x] for x in raster_statistics]
-    for raster in rasters:
-	gscript.message(_("Calculating statistics for raster map <%s>..." % raster))
-        if not gscript.find_file(raster, element='cell')['name']:
-            gscript.message(_("Cannot find raster %s" % raster))
-            continue
-        rastername = raster.split('@')[0]
-        rastername = rastername.replace('.', '_')
-        output_header += [rastername + "_" + x for x in raster_statistics]
-        gscript.run_command('r.univar',
-                            map_=raster,
-                            zones=segment_map,
-			    output=stats_temp_file,
-                            flags='et',
-			    overwrite=True,
-			    quiet=True)
+        pool = Pool(processes)
+        func = partial(worker, segment_map, stats_temp_file)
+        pool.map(func, rasters)
+        pool.close()
+        pool.join()
 
-	firstline = True
-    	with open(stats_temp_file, 'r') as fin:
-	    for line in fin:
-		if firstline:
-		    firstline = False
-		    continue
-		values = line.rstrip().split('|')
-		values = line.rstrip().split('|')
-	    	if area_measures:
-            	    output_dict[values[0]] = output_dict[values[0]]+ [values[x] for x in stat_indices]
-	    	else:
-            	    output_dict[values[0]] = [values[x] for x in stat_indices]
+        for raster in rasters:
+            rastername = raster.split('@')[0]
+            rastername = rastername.replace('.', '_')
+            temp_file = stats_temp_file + '.' + rastername
+            output_header += [rastername + "_" + x for x in raster_statistics]
+            firstline = True
+            with open(temp_file, 'r') as fin:
+                for line in fin:
+                    if firstline:
+                        firstline = False
+                        continue
+                    values = line.rstrip().split('|')
+                    values = line.rstrip().split('|')
+                    if area_measures:
+                        output_dict[values[0]] = output_dict[values[0]]+ [values[x] for x in stat_indices]
+                    else:
+                        output_dict[values[0]] = [values[x] for x in stat_indices]
 
     message = _("Some values could not be calculated for the objects below. ")
     message += _("These objects are thus not included in the results. ")