[GRASS-SVN] r68622 - grass-addons/grass7/vector/v.class.mlR

Mon Jun 6 06:56:40 PDT 2016

Author: mlennert
Date: 2016-06-06 06:56:40 -0700 (Mon, 06 Jun 2016)
New Revision: 68622

Modified:
   grass-addons/grass7/vector/v.class.mlR/v.class.mlR.html
   grass-addons/grass7/vector/v.class.mlR/v.class.mlR.py
Log:
v.class.mlR: added option to write model details to file, added error handling to subprocess call


Modified: grass-addons/grass7/vector/v.class.mlR/v.class.mlR.html
===================================================================

--- grass-addons/grass7/vector/v.class.mlR/v.class.mlR.html	2016-06-06 10:20:26 UTC (rev 68621)
+++ grass-addons/grass7/vector/v.class.mlR/v.class.mlR.html	2016-06-06 13:56:40 UTC (rev 68622)
@@ -49,12 +49,16 @@
 estimation of the probability of the classification after weighted vote, 
 based on equation (2) in Moreno et al (2006), page 709.
 
-<p>Optional output of the module include a box-and-whisker plot indicating
-the resampling variance based on the cross-validation for each classifier 
-(<em>bw_plot_file</em>) and a csv file containing accuracy measures (overall
-accuracy and kappa) for each classifier (<em>accuracy_file</em>). The user
-can also chose to write the R script constructed and used internally to a text
-file for study or further modification.
+<p>Optional output of the module include detailed information about the 
+different classifier models and their cross-validation results 
+<em>model_details</em> (for details of these results see the train, 
+resamples and confusionMatrix.train functions in the caret package)  a 
+box-and-whisker plot indicating the resampling variance based on the 
+cross-validation for each classifier (<em>bw_plot_file</em>) and a csv 
+file containing accuracy measures (overall accuracy and kappa) for each 
+classifier (<em>accuracy_file</em>). The user can also chose to write the 
+R script constructed and used internally to a text file for study or further 
+modification.
 
 <h2>NOTES</h2>
 
@@ -72,8 +76,9 @@
 
 <ul>
 	<li>Add automagic installation of missing R packages.</li>
-	<li>Add output with confusion matrix
 	<li>Add option to manually define grid of tuning parameters</li>
+	<li>Check for existing file created by R as no overwrite check is 
+	    done in R</li>
 </ul>
 - 
 

Modified: grass-addons/grass7/vector/v.class.mlR/v.class.mlR.py
===================================================================
--- grass-addons/grass7/vector/v.class.mlR/v.class.mlR.py	2016-06-06 10:20:26 UTC (rev 68621)
+++ grass-addons/grass7/vector/v.class.mlR/v.class.mlR.py	2016-06-06 13:56:40 UTC (rev 68622)
@@ -127,6 +127,12 @@
 #% guisection: Optional output
 #%end
 #%option G_OPT_F_OUTPUT
+#% key: model_details
+#% description: File for saving details about the classifier module runs
+#% required: no
+#% guisection: Optional output
+#%end
+#%option G_OPT_F_OUTPUT
 #% key: bw_plot_file
 #% description: PNG file for saving box-whisker plot of classifier performance
 #% required: no
@@ -234,6 +240,10 @@
     if flags['f'] and not classification_results:
         gscript.fatal("A classification_results file is necessary for flag 'f'")
 
+    model_details = None
+    if options['model_details']:
+        model_details = options['model_details']
+
     raster_segments_map = None
     if options['raster_segments_map']:
         raster_segments_map = options['raster_segments_map']
@@ -290,8 +300,6 @@
     r_file.write("\n")
     r_file.write('require(caret)')
     r_file.write("\n")
-    r_file.write("cat('\\nRunning R...\\n')")
-    r_file.write("\n")
     r_file.write('features <- read.csv("%s", sep="|", header=TRUE, row.names=1)' % feature_vars)
     r_file.write("\n")
     r_file.write('training <- read.csv("%s", sep="|", header=TRUE, row.names=1)' % training_vars)
@@ -380,6 +388,42 @@
         r_file.write("\n")
         r_file.write("write.csv(df_means, '%s', row.names=FALSE, quote=FALSE)" % accuracy_file)
         r_file.write("\n")
+    if model_details:
+        r_file.write("conf.mat.cv <- lapply(models.cv, function(x) confusionMatrix(x))")
+        r_file.write("\n")
+        r_file.write("sink('%s')" % model_details)
+        r_file.write("\n")
+        r_file.write("cat('BEST TUNING VALUES\n')")
+        r_file.write("\n")
+        r_file.write("cat('******************************\n\n')")
+        r_file.write("\n")
+        r_file.write("lapply(models.cv, function(x) x$best)")
+        r_file.write("\n")
+        r_file.write("cat('\n')")
+        r_file.write("\n")
+        r_file.write("cat('\nSUMMARY OF RESAMPLING RESULTS\n')")
+        r_file.write("\n")
+        r_file.write("cat('******************************\n\n')")
+        r_file.write("\n")
+        r_file.write("summary(resamps.cv)")
+        r_file.write("\n")
+        r_file.write("cat('\n')")
+        r_file.write("\n")
+        r_file.write("cat('\nRESAMPLED CONFUSION MATRICES\n')")
+        r_file.write("\n")
+        r_file.write("cat('******************************\n\n')")
+        r_file.write("\n")
+        r_file.write("print(conf.mat.cv)")
+        r_file.write("\n")
+        r_file.write("cat('\nDETAILED CV RESULTS\n')")
+        r_file.write("\n")
+        r_file.write("cat('******************************\n\n')")
+        r_file.write("\n")
+        r_file.write("lapply(models.cv, function(x) x$results)")
+        r_file.write("\n")
+        r_file.write("sink()")
+        r_file.write("\n")
+
     if bw_plot_file:
         r_file.write("png('%s.png')" % bw_plot_file)
         r_file.write("\n")
@@ -391,8 +435,14 @@
     if r_script_file:
         shutil.copy(r_commands, r_script_file)
 
-    subprocess.call(['Rscript', r_commands], stdout=open(os.devnull, 'wb'))
+    gscript.message("Running R now. Following output is R output.")
+    try:
+        subprocess.check_call(['Rscript', r_commands], stderr=subprocess.STDOUT, )
+    except subprocess.CalledProcessError:
+        gscript.fatal("There was an error in the execution of the R script.\nPlease check the R output.")
 
+    gscript.message("Finished running R.")
+
     if allmap and not flags['f']:
 
         model_output_desc = model_output + 't'