[GRASS-SVN] r63988 - grass-addons/grass7/raster/r.vif

svn_grass at osgeo.org svn_grass at osgeo.org
Wed Jan 7 15:08:45 PST 2015


Author: pvanbosgeo
Date: 2015-01-07 15:08:45 -0800 (Wed, 07 Jan 2015)
New Revision: 63988

Added:
   grass-addons/grass7/raster/r.vif/r.vif.py
Removed:
   grass-addons/grass7/raster/r.vif/r.vif
Modified:
   grass-addons/grass7/raster/r.vif/r.vif.html
Log:
Rewritten shell script as python script + added functionality (reiterative computation of VIF till user-defined threshold is reached)

Deleted: grass-addons/grass7/raster/r.vif/r.vif
===================================================================
--- grass-addons/grass7/raster/r.vif/r.vif	2015-01-07 23:06:24 UTC (rev 63987)
+++ grass-addons/grass7/raster/r.vif/r.vif	2015-01-07 23:08:45 UTC (rev 63988)
@@ -1,204 +0,0 @@
-#!/bin/sh
-# 
-#set -x
-########################################################################
-# 
-# MODULE:       r.vif
-# AUTHOR(S):    Paulo van Breugel <p.vanbreugel AT gmail.com>
-# PURPOSE:      Calculate the variance inflaction factor of set of
-#				variables. Alternatively, to speed the calculation up, 
-#               this can be done for an user defined number of random 
-#               cells.
-# Dependency:	r.regression.multi
-#
-# COPYRIGHT: (C) 2013-2014 Paulo van Breugel
-#            http://ecodiv.org
-#            http://pvanb.wordpress.com/
-# 
-#            This program is free software under the GNU General Public 
-#            License (>=v2). Read the file COPYING that comes with GRASS 
-#            for details. 
-# 
-########################################################################
-#
-#%Module 
-#% description: Calculate the variance inflaction factor
-#%End 
-
-#%option
-#% key: maps
-#% type: string
-#% gisprompt: old,cell,raster
-#% description: variables
-#% key_desc: name
-#% required: yes
-#% multiple: yes
-#%end
-
-#%option
-#% key: output_file
-#% type: string
-#% gisprompt: new
-#% description: Name of output text file
-#% key_desc: name
-#% required: no
-#%end
-
-#%option
-#% key: nsp
-#% type: string
-#% gisprompt: new
-#% description: random sample size
-#% key_desc: number or percentage
-#% required: no
-#%end
-
-#=======================================================================
-## GRASS team recommandations
-#=======================================================================
-
-## Check if in GRASS
-if  [ -z "$GISBASE" ] ; then
-    echo "You must be in GRASS GIS to run this program." 1>&2
-    exit 1
-fi
-
-## check for awk
-if [ ! -x "$(which awk)" ] ; then
-    g.message -e "<awk> required, please install <awk> or <gawk> first"
-    exit 1
-fi
-
-## To parse the code into interactive menu
-if [ "$1" != "@ARGS_PARSED@" ] ; then
-    exec g.parser "$0" "$@"
-fi
-
-## set environment so that awk works properly in all languages ##
-unset LC_ALL
-export LC_NUMERIC=C
-
-## what to do in case of user break:
-exitprocedure()
-{
-    echo "User break!"
-    cleanup
-    exit 1
-}
-
-## shell check for user break (signal list: trap -l)
-trap "exitprocedure" 2 3 15
-
-#=======================================================================
-## function
-#=======================================================================
-
-cleanup()
-{
-   g.remove -f type=rast pattern="rnicheOVERLAP$$_*" --quiet
-   g.remove -f type=rast pattern="rnicheEQUIV$$_*" --quiet
-}
-trap "cleanup" 2 3 15
-
-
-#=======================================================================
-## Check if input maps exist
-#=======================================================================
-
-OPF=${GIS_OPT_OUTPUT_FILE}
-MAPS=${GIS_OPT_MAPS}
-TEMP1=`g.tempfile pid=$$`
-
-# test for missing input raster maps
-oIFS=$IFS
-IFS=,
-for nvar in $MAPS ; do
-    tstIN=`echo $nvar | awk 'BEGIN{FS="@"}{print $1}'`
-    g.findfile element=cell file=${tstIN} > /dev/null
-    if [ $? -gt 0 ] ; then 
-        g.message -e 'The map '${tstIN}' is missing'
-    exit 1
-    fi
-done
-IFS=$oIFS
-unset tstIN
-
-#=======================================================================
-## Test if text file exists. If so, append _v1 to file name
-#=======================================================================
-
-k=1
-OPFN=$OPF
-while [ -f "$OPF" ]; do
-   a1=`echo $OPFN | awk -F. '{print $1}'`
-   a2=`echo $OPFN | awk -F. '{print $2}'`
-   OPF=${a1}_v${k}.${a2}
-   k=$((k + 1))
-done
-if [ $k -gt 1 ]; then
-   g.message -w "There is already a file $OPFN
-   Will use ${OPF} instead"
-fi
-
-#=======================================================================
-## Calculate VIF and write to standard output (& optionally to file)
-#=======================================================================
-
-# List with the input maps and first input map
-MAPS=`echo $GIS_OPT_MAPS | sed 's;,; ;g'`
-MAP1=`echo $MAPS | awk '{ print $1 }'`
-eval `g.gisenv`
-
-# Write text file with column headers
-if [ -n "$OPF" ]; then 
-    echo "variable,vif,sqrtvif" > "$OPF"
-fi
-
-if [ ! -z "$GIS_OPT_NSP" ]; then
-    # Copy mask as backup if it exists
-    eval `g.findfile element=cell file=MASK mapset=$MAPSET`
-    remove_mask=1
-    if [ $file ] ; then 
-        g.copy raster=MASK,mask_r_vif_backup
-        mask_exists=1
-        unset file
-    fi
-    r.random input=$MAP1 n=$GIS_OPT_NSP raster_output=TMP_R_VIF_98765
-    r.mask --overwrite raster=TMP_R_VIF_98765
-fi
-
-# Calculate VIF for each variable
-MAPS=`echo $GIS_OPT_MAPS | sed 's;,; ;g'`
-for nvar in ${MAPS} ; do
-	expl=`echo $MAPS | eval sed '-e s/$nvar/''/g'`
-	expl=`echo $expl | sed 's; ;,;g'`
-	eval `r.regression.multi -g --quiet mapx=$expl mapy=$nvar`
-	vif=`echo "1/(1-$Rsq)" | bc -l`
-	sqrtvif=`echo "sqrt($vif)" | bc -l`
-	
-	# Write to std output
-	echo "VIF for $nvar = "$vif
-	echo "sqrt(VIF) for $nvar = "$sqrtvif
-	
-	# Write to user-defined txt file
-	if [ -n "$OPF" ] ; then 
-        echo "$nvar,$vif,$sqrtvif" >> "$OPF"
-    fi
-done
-
-if [ $remove_mask ]; then
-    g.remove -f type=rast name=MASK@${MAPSET},TMP_R_VIF_98765
-    unset remove_mask
-fi
-
-if [ $mask_exists ]; then
-    r.mask raster=mask_r_vif_backup
-    g.remove -f type=rast name=mask_r_vif_backup
-    unset mask_exist
-fi
-
-
-
-
-
-

Modified: grass-addons/grass7/raster/r.vif/r.vif.html
===================================================================
--- grass-addons/grass7/raster/r.vif/r.vif.html	2015-01-07 23:06:24 UTC (rev 63987)
+++ grass-addons/grass7/raster/r.vif/r.vif.html	2015-01-07 23:08:45 UTC (rev 63988)
@@ -2,15 +2,17 @@
 
 <em>r.vif</em> This module will compute variance inflaction factor (VIF) and the square root of the VIF. The VIF quantifies how much the variance (the square of the estimate's standard deviation) of an estimated regression coefficient is increased because of collinearity. The square root of VIF is a measure of how much larger the standard error is, compared with what it would be if that variable were uncorrelated with the other predictor variables in the model. 
 
-<p>The user has the option to compute the VIF over a random subset of the raster cells. Like in the r.random module (which is used to select the random set of raster cells), the user may specify the quantity of random cells either as a positive integer (e.g., 10), or as a percentage of the raster map layer's cells (e.g., 10%, or 3.05%). Percentages less than one percent may be stated as decimals. 
+<p>By default the VIF is calculated for each variable. If the user sets a VIF threshold value (maxvif) the VIF will calculated again after removing the variable with the highest VIF. This will be repeated till the VIF is smaller than maxvif.
 
+<p>The user has the option to compute the VIF over a random subset of the raster cells (to speed up computation). Like in the r.random module (which is used to select the random set of raster cells), the user may specify the quantity of random cells either as a positive integer (e.g., 10), or as a percentage of the raster map layer's cells (e.g., 10%, or 3.05%). Percentages less than one percent may be stated as decimals. 
 
+
 <h2>Notes</h2>
 
-<p> This add-on doesn't work in GRASS 6.* as it depends on r.regression.multi, which is available in GRASS 7 main only. 
+<p> This add-on doesn't work in GRASS 6.* as it depends on r.regression.multi, which is available in GRASS 7+ main only. 
 
 <h2>AUTHOR</h2>
 
 Paulo van Breugel, paulo at ecodiv.org
 
-<i>Last changed: $Date$</i>
+<p><i>Last changed: $Date$</i>

Added: grass-addons/grass7/raster/r.vif/r.vif.py
===================================================================
--- grass-addons/grass7/raster/r.vif/r.vif.py	                        (rev 0)
+++ grass-addons/grass7/raster/r.vif/r.vif.py	2015-01-07 23:08:45 UTC (rev 63988)
@@ -0,0 +1,208 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+########################################################################
+#
+# MODULE:       r.vif
+# AUTHOR(S):    Paulo van Breugel <p.vanbreugel AT gmail.com>
+# PURPOSE:      Calculate the variance inflaction factor of set of
+#				variables. Alternatively, to speed the calculation up,
+#               this can be done for an user defined number of random
+#               cells. The user can set a maximum VIF, in wich case the VIF
+#               will calculated again after removing the variables with the
+#               highest VIF. This will be repeated till the VIF falls below
+#               the user defined VIF threshold value.
+# TODO          Include the option to force the function to retain one or more
+#               variables
+# Dependency:	r.regression.multi
+#
+# COPYRIGHT: (C) 2014 Paulo van Breugel
+#            http://ecodiv.org
+#            http://pvanb.wordpress.com/
+#
+#            This program is free software under the GNU General Public
+#            License (>=v2). Read the file COPYING that comes with GRASS
+#            for details.
+#
+########################################################################
+#
+#%Module
+#% description: Calculate the variance inflaction factor
+#%End
+
+#%option
+#% key: maps
+#% type: string
+#% gisprompt: old,cell,raster
+#% description: variables
+#% key_desc: name
+#% required: yes
+#% multiple: yes
+#%end
+
+#%option
+#% key: output
+#% type: string
+#% gisprompt: new
+#% description: Name of output text file
+#% key_desc: name
+#% required: no
+#%end
+
+#%option
+#% key: maxvif
+#% type: string
+#% description: Maximum vif
+#% key_desc: string
+#%end
+
+#%option
+#% key: nsp
+#% type: string
+#% gisprompt: new
+#% description: random sample size
+#% key_desc: number or percentage
+#% required: no
+#%end
+
+# Test purposes
+#options = {"maps":"bio1,bio5,bio6", "output":"bb", "nsp":"100", "maxvif":"100"}
+#flags = {"m":True, "k":True, "n":False, "i":True, "k":True}
+
+#=======================================================================
+## General
+#=======================================================================
+
+# import libraries
+import os
+import sys
+import uuid
+import atexit
+import math
+import tempfile
+import string
+import numpy as np
+import grass.script as grass
+
+# Check if running in GRASS
+if not os.environ.has_key("GISBASE"):
+    grass.message( "You must be in GRASS GIS to run this program." )
+    sys.exit(1)
+
+# create set to store names of temporary maps to be deleted upon exit
+clean_rast = set()
+
+def cleanup():
+    for rast in clean_rast:
+        grass.run_command("g.remove",
+        type="rast", name = rast, quiet = True)
+
+# main function
+def main():
+
+    # Variables
+    IPF = options['maps']
+    IPF = IPF.split(',')
+    IPFn = [i.split('@')[0] for i in IPF]
+    OPF = options['output']
+    if OPF == '':
+        OPF = tempfile.mkstemp()[1]
+    NSP = options['nsp']
+    MXVIF = float(options['maxvif'])
+
+    # Test if text file exists. If so, append _v1 to file name
+    k = 0
+    OPFold = OPF[:]
+    while os.path.isfile(OPF):
+        k = k + 1
+        opft = OPF.split('.')
+        if len(opft) == 1:
+            OPF = opft[0] + "_" + str(k)
+        else:
+            OPF = opft[0] + "_v" + str(k) + "." + opft[1]
+    if k > 0:
+        grass.info("there is already a file " + OPFold + ".")
+        grass.info("Using " + OPF + "_v" + str(k) + " instead")
+
+#=======================================================================
+## Calculate VIF and write to standard output (& optionally to file)
+#=======================================================================
+
+    # Create mask if nsp is set replace existing MASK if exist)
+    citiam = grass.find_file('MASK', element = 'cell')
+    if NSP != '':
+        if citiam['fullname'] != '':
+            tmpf0 = "rvif_mask_" + str(uuid.uuid4())
+            tmpf0 = string.replace(tmpf0, '-', '_')
+            grass.run_command("g.copy", quiet=True, raster=("MASK", tmpf0))
+            clean_rast.add(tmpf0)
+        tmpf1 = "rvif_" + str(uuid.uuid4())
+        tmpf1 = string.replace(tmpf1, '-', '_')
+        grass.run_command("r.random", quiet=True, input=IPF[0], n=NSP, raster=tmpf1)
+        grass.run_command("r.mask", quiet=True, overwrite=True, raster=tmpf1)
+        clean_rast.add(tmpf1)
+
+    # Open text file for writing and write heading
+    text_file = open(OPF, "w")
+
+    # Calculate VIF and write results to text file
+    rvifmx = MXVIF + 1
+    IPF2 = IPF[:]
+    IPFn2 = IPFn[:]
+    while MXVIF < rvifmx:
+        rvif = np.zeros(len(IPF2))
+        text_file.write("variable\tvif\tsqrtvif\n")
+        for k in xrange(len(IPF2)):
+            MAPy = IPF2[k]
+            nMAPy = IPFn2[k]
+            MAPx = IPF2[:]
+            del MAPx[k]
+            vifstat = grass.read_command("r.regression.multi",
+                               flags="g", quiet=True,
+                               mapx=MAPx, mapy=MAPy)
+            vifstat = vifstat.split('\n')
+            vifstat = [i.split('=') for i in vifstat]
+            vif = 1 / (1 - float(vifstat[1][1]))
+            sqrtvif = math.sqrt(vif)
+            text_file.write(nMAPy + "\t" + str(round(vif,3)) + "\t" + str(round(sqrtvif,3)) + "\n")
+            rvif[k] = vif
+
+        rvifmx = max(rvif)
+        if rvifmx >= MXVIF:
+            rvifindex = np.argmax(rvif, axis=None)
+            varremove = IPF2[rvifindex]
+            text_file.write("\n")
+            text_file.write("Removing " + varremove)
+            text_file.write("\n---------------------------\n")
+            text_file.write("\n")
+            del IPF2[rvifindex]
+            del IPFn2[rvifindex]
+        else:
+            text_file.write("\n\n")
+            text_file.write("Final selection (above) has maximum VIF = " + str(round(rvifmx, 3)))
+    text_file.close()
+
+    # Recover original mask
+    if NSP != '':
+        grass.run_command("r.mask", flags="r", quiet=True)
+        if citiam['fullname'] != '':
+            grass.mapcalc("MASK = $tmpf0",tmpf0 = tmpf0, quiet=True)
+            grass.run_command("g.remove", quiet=True, flags="f", type="raster", name=tmpf0)
+        grass.run_command("g.remove", quiet=True, flags="f", type="raster", name=tmpf1)
+
+    # Write to std output
+    grass.info("selected variables are: ")
+    grass.info(', '.join(IPFn2))
+    grass.info("with as maximum VIF: " + str(rvifmx))
+    grass.info("Full statistics are in " + OPF)
+
+if __name__ == "__main__":
+    options, flags = grass.parser()
+    atexit.register(cleanup)
+    sys.exit(main())
+
+
+
+
+
+


Property changes on: grass-addons/grass7/raster/r.vif/r.vif.py
___________________________________________________________________
Added: svn:executable
   + *
Added: svn:mime-type
   + text/x-python
Added: svn:eol-style
   + native



More information about the grass-commit mailing list