[GRASS-SVN] r57061 - grass-addons/grass6/raster/r.broscoe
svn_grass at osgeo.org
svn_grass at osgeo.org
Wed Jul 10 07:05:28 PDT 2013
Author: annalisapg
Date: 2013-07-10 07:05:28 -0700 (Wed, 10 Jul 2013)
New Revision: 57061
Modified:
grass-addons/grass6/raster/r.broscoe/r.broscoe
Log:
main code upgrade with the new APTDTM statistical test
Modified: grass-addons/grass6/raster/r.broscoe/r.broscoe
===================================================================
--- grass-addons/grass6/raster/r.broscoe/r.broscoe 2013-07-10 13:25:51 UTC (rev 57060)
+++ grass-addons/grass6/raster/r.broscoe/r.broscoe 2013-07-10 14:05:28 UTC (rev 57061)
@@ -1,11 +1,11 @@
#!/bin/sh
#
-############################################################################
+#####################################################################################
#
# MODULE: r.broscoe
-# AUTHOR(S): Annalisa Minelli, Ivan Marchesini
+# AUTHOR(S): Annalisa Minelli, Ivan Marchesini, Pierluigi De Rosa, Luca Scrucca
#
-# PURPOSE: Calculates waerden test and t test statistics
+# PURPOSE: Calculates APTDTM test and t test statistics
# for some values of threshold, on a single basin
# according to A. J. Broscoe theory (1959)
#
@@ -15,18 +15,15 @@
# License (>=v2). Read the file COPYING that comes with GRASS
# for details.
#
-# REQUIREMENTS: you need R installed with "agricolae" package
-# You can install the package by starting R as root and typing
-# install.packages("agricolae")
-# This will install the package from a CRAN mirror on the internet.
+# REQUIREMENTS: you need R installed with packages "stringr" and "tools".
# For more information on R see the documentation available at
# http://www.r-project.org/
#
# TODO: solve stability problems for low area threshold (cf. v.strahler)
-#############################################################################
+###################################################################################
#%Module
-#% description: Calculates waerden test and t test statistics for some values of threshold, on a single basin according to A. J. Broscoe theory (1959)
-#% keywords: waerden.test,t.test,mean stream drop
+#% description: Calculates statistics for some values of threshold, on a single basin according to A. J. Broscoe theory (1959)
+#% keywords: APTDTM test,adjusted t test,t test,mean stream drop
#%End
#%option
#% key: dem
@@ -63,9 +60,9 @@
#%option
#% key: result
#% type: string
-#% gisprompt: new_file,file,output
+#% gisprompt: new_folder,folder,output
#% key_desc: name
-#% description: Resultant text file to write statistics into
+#% description: Name for the folder in /tmp where "drop"_threshold text files are stored
#% required: yes
#%end
@@ -91,13 +88,14 @@
echo $LOCATION_NAME
-#g.region rast=$dem
-
res=`g.region -p | grep res | sed 1d | cut -f2 -d':' | tr -d ' ' `
-#g.region vect=$input
+rm -R /tmp/$result
+mkdir /tmp/$result
-#g.region n=n+$res s=s-$res e=e+$res w=w-$res
+#preparing text output header
+echo "threshold t Pr Radj Pvalue">result
+echo "threshold maxord">thrVSmaxord
for j in $thresholds
do
@@ -121,10 +119,8 @@
maxord=`cat maxord`
-rm thrVSmaxord_$j
+echo "$j $maxord" >> thrVSmaxord
-echo "$j $maxord" > thrVSmaxord_$j
-
a=1
until [ "$a" -gt "$maxord" ]
do
@@ -147,18 +143,27 @@
v.to.db map=ord1_pl_3d_cat type=line layer=1 qlayer=1 option=start units=meters 'column=sx, sy, sz'
v.to.db map=ord1_pl_3d_cat type=line layer=1 qlayer=1 option=end units=meters 'column=ex, ey, ez'
+
+ #added to prevent r.category - in v.rast.stats - errors.. align on DEM doesn't work
+ v.db.addcol map=ord1_pl_3d_cat columns="stats_min double precision"
- v.rast.stats vector=ord1_pl_3d_cat raster=$dem colprefix=stats percentile=90 --verbose -c
+ #v.rast.stats vector=ord1_pl_3d_cat raster=$dem colprefix=stats percentile=90 --verbose -c
db_driver=`db.connect -p | grep driver | cut -f2 -d':'`
db_database=`db.connect -p | grep database | cut -f2 -d':'`
for i in `db.select table=ord1_pl_3d_cat database=$db_database driver=$db_driver 'sql=select cat from ord1_pl_3d_cat' | sed 1d`
do
-
- min=`echo "select stats_min from ord1_pl_3d_cat where cat=$i" | db.select | sed 1d`
sz=`echo "select sz from ord1_pl_3d_cat where cat=$i" | db.select | sed 1d`
ez=`echo "select ez from ord1_pl_3d_cat where cat=$i" | db.select | sed 1d`
+ #added to prevent r.category - in v.rast.stats - errors.. align on DEM doesn't work
+ if [ "$sz" -gt "$ez" ]
+ then
+ v.db.update map=ord1_pl_3d_cat column=stats_min value=$ez where="cat=$i"
+ else
+ v.db.update map=ord1_pl_3d_cat column=stats_min value=$sz where="cat=$i"
+ fi
+ min=`echo "select stats_min from ord1_pl_3d_cat where cat=$i" | db.select | sed 1d`
if [ `echo "$ez - $min" | bc -l | cut -f1 -d'.'` -lt "$lt" ] || [ `echo "$sz - $min" | bc -l | cut -f1 -d'.'` -lt "$lt" ]
then
if [ "$ez" -gt "$sz" ]
@@ -177,7 +182,6 @@
fi
done
paste ordcol tmp > textout\_$a
- #read
if [ "$a" -gt 1 ]
then
@@ -193,111 +197,143 @@
mv textout\_$maxord textout
echo "plotting textout..."
cat textout
+ cp textout /tmp/$result/drop_$j.csv
+done
+echo "
+cat('running R\n')
+workdir=\"/tmp/$result/\"
- echo "
- imported=read.csv('textout', sep = '\t', dec='.', header = F)
- dframe=data.frame(imported)
- str(dframe)
- nrow_maxord=nrow(dframe[dframe\$V1==max(dframe\$V1),])
- maxord=max(dframe\$V1)
- ifelse(nrow_maxord==1, (dframe=na.omit(subset(dframe[dframe\$V1 < maxord,]))), (dframe))
- cat('plotted dframe \n')
- nord=c(1:max(dframe\$V1))
- #search for negative values of drop, replacing them with 0.1 values
- dframe\$V2[(dframe\$V2 < 0)]<-0.1
- tmp<-dframe
- #search for zero values of drop, replacing them with 0.1 values
- tmp\$V2[dframe\$V2==0]<-0.1
- dframe<-tmp
- delta=c()
- for(i in nord)
- {
- offset=(qt(0.025,nrow(dframe[dframe\$V1==i,])-1,lower.tail=FALSE)*(sd(log(dframe\$V2[dframe\$V1==i])))/sqrt(nrow(dframe[dframe\$V1==i,])))/(mean (log(dframe\$V2[dframe\$V1==i])))
- delta=c(delta,offset)
+nperm = 999
+trim = 0.25/2
- }
- delta=delta[delta<0.5]
- delta<-data.frame(na.omit(delta))
- dframe<-data.frame(na.omit(dframe[dframe\$V1 <= (nrow(delta)),]))
- print(dframe)
- cat('plotted input dataframe \n')
- linear_regr=summary(lm(dframe\$V2~dframe\$V1))
- print(linear_regr)
- cat('plotted linear regression output \n')
- t=linear_regr\$coefficients[2,3]
- Pr=linear_regr\$coefficients[2,4]
- Radj=linear_regr\$adj.r.squared
- library(agricolae)
- wer_t=waerden.test(dframe\$V2,dframe\$V1,group=F)
- print(wer_t)
- cat('plotted Van der Waerden test output \n')
- sink(file=\"tmpfile\")
- waerden.test(dframe\$V2,dframe\$V1,group=F)
- sink()
- Pval=try(system(\"cat tmpfile | grep Pvalue | cut -f2 -d' '\", intern=TRUE))
- system(\"rm tmpfile\")
- Pval<-as.numeric(Pval)
- out_row<-as.numeric(c(\"$j\",t,Pr,Radj,Pval))
- cat('plotting values: threshold, t, Pr, Radj, Pval \n')
- print(out_row)
- write(out_row,file='results\_$j',ncolumns=5,sep='\t')
- " > R_temp
+#############################################################################
- echo 'source ("R_temp")' | R --vanilla --slave
+setwd(workdir)
+Bname = tolower(strsplit(workdir, \"/tmp/\")[[1]][2])
+bacino = substr(Bname,1,nchar(Bname)-1)
+files = list.files(path = workdir, glob2rx(\"drop_*.csv\"))
+pdfile = paste(bacino, \".pdf\", sep = \"\")
+csvfile = paste(bacino, \".csv\", sep = \"\")
- rm R_temp
+thresholds = as.numeric(sub(\"drop_\", \"\", sub(\".csv\", \"\", files)))
+ord = order(thresholds)
+thresholds = thresholds[ord]
+files = files[ord]
-done
+# t stat with equal variances
+t.statistic <- function(data)
+{
+ m1 = with(data, mean(drop[order==1]))
+ s1 = with(data, sd(drop[order==1]))
+ n1 = with(data, length(drop[order==1]))
+ m2 = with(data, mean(drop[data\$order>1]))
+ s2 = with(data, sd(drop[order>1]))
+ n2 = with(data, length(drop[order>1]))
+ t = (m1-m2)/( sqrt( ((n1-1)*s1^2+(n2-1)*s2^2)/(n1+n2-2) * (1/n1 + 1/n2)) )
+ return(t)
+}
-rm $result
-rm thrVSmaxord
+# difference trimmed mean statistic
+ADTDTM.statistic <- function(data)
+{
+ m1 = with(data, mean(drop[data\$order==1], trim = trim))
+ m2 = with(data, mean(drop[data\$order>1], trim = trim))
+ t = (m1-m2)
+ return(t)
+}
-echo "threshold t Pr Radj Pvalue">$result
-echo "threshold maxord">thrVSmaxord
-for k in $thresholds
-do
- cat results_$k >> $result
- rm results_$k
+# difftrmean.statistic <- function(data)
+# { # difference on median
+# m1 = with(data, median(drop[data\$order==1]))
+# m2 = with(data, median(drop[data\$order>1]))
+# t = (m1-m2)
+# return(t)
+# }
- cat thrVSmaxord_$k >> thrVSmaxord
- rm thrVSmaxord_$k
-done
+tab = matrix(NA, length(files), 10)
+rownames(tab) = thresholds
+colnames(tab) = c(\"n1\", \"n2\", \"Mean 1\", \"Mean >1\", \"diff\", \"sd 1\", \"sd >1\", \"TrMean 1\", \"Tr Mean >1\", \"diff\")
-echo "plotting results..."
-cat $result
+test_pvalue = matrix(NA, length(files), 3)
+colnames(test_pvalue) = c(\"Tarboton t-test\", \"t-test (adjusted)\", \"Permutation test (adjusted)\")
+rownames(test_pvalue) = thresholds
-echo "
-thrVSmaxord=read.csv(\"thrVSmaxord\", sep = '\t', dec='.', header = F)
-names(thrVSmaxord)[1]=\"Threshold\"
-names(thrVSmaxord)[2]=\"Maxord\"
-xgraph=read.csv(\"$result\", sep = '\t', dec='.', header = F)
-names(xgraph)[1]=\"Threshold\"
-names(xgraph)[2]=\"t_statistic\"
-names(xgraph)[3]=\"Pr\"
-names(xgraph)[4]=\"R_squared_adj\"
-names(xgraph)[5]=\"Pvalue\"
+for(i in 1:length(files))
+{
+ data = read.table(files[i], header = FALSE, sep = \"\t\")
+ names(data) = c(\"order\", \"drop\")
+
+
+# with(data,
+# { ord = factor(ifelse(order == 1, 1, 2), labels = c(\"1\", \">1\"))
+# boxplot(drop ~ ord)
+# points(1:nlevels(ord), as.vector(by(drop, ord, mean, trim = trim)), col = 2, pch = 3)
+# })
+#
+# with(data, c(mean(drop[data\$order==1]), mean(drop[data\$order>1])))
+# with(data, c(mean(drop[data\$order==1], trim = trim), mean(drop[data\$order>1], trim = trim)))
+# with(data, c(median(drop[data\$order==1]), median(drop[data\$order>1])))
+
+ tab[i,1] = sum(data\$order==1)
+ tab[i,2] = sum(data\$order>1)
+ tab[i,3] = with(data, mean(drop[data\$order==1]))
+ tab[i,4] = with(data, mean(drop[data\$order>1]))
+ tab[i,5] = tab[i,3] - tab[i,4]
+ tab[i,6] = with(data, sd(drop[data\$order==1]))
+ tab[i,7] = with(data, sd(drop[data\$order>1]))
+ tab[i,8] = with(data, mean(drop[data\$order==1], trim = trim))
+ tab[i,9] = with(data, mean(drop[data\$order>1], trim = trim))
+ tab[i,10] = tab[i,8] - tab[i,9]
-pdf('waerden_test.pdf')
-print(matplot(xgraph\$Threshold, xgraph[, c(\"Pvalue\")], type=\"l\", col=\"red\", lwd=3, lty=1, ylab=\"Pvalue\", pch=1))
-dev.off()
+ t_test = with(data, t.test(drop[order==1], drop[order > 1],
+ var.equal = TRUE, alternative = \"two.sided\"))
+ test_pvalue[i,1:2] = t_test\$p.value
+
+
+ dStat = ADTDTM.statistic(data)
+ permStat = rep(NA, nperm)
+ for(j in 1:nperm)
+ { permData = data
+ perm = sample.int(nrow(data))
+ permData\$order = data\$order[perm]
+ permStat[j] = ADTDTM.statistic(permData)
+ }
+
+ # hist(permStat); abline(v=dStat, col = 2)
+
+ test_pvalue[i,3] = (sum(abs(permStat) > abs(dStat))+1)/(nperm+1)
+}
-pdf('linear_regression.pdf')
-print(matplot(xgraph\$Threshold, xgraph[, c(\"Pr\")], type=\"l\", col=\"green\", lwd=3, lty=1, ylab=\"Pr\", pch=1))
-dev.off()
+# adjust p-value using Benjamini-Hochberg method to control FDR
+test_pvalue[,2] = p.adjust(test_pvalue[,2], method = \"BH\")
+test_pvalue[,3] = p.adjust(test_pvalue[,3], method = \"BH\")
-pdf('all_tests.pdf')
-print(matplot(xgraph\$Threshold, xgraph[, c(\"Pr\",\"Pvalue\")], type=\"b\", lwd=3, lty=1, ylab=\"(1)Pr, (2)Pvalue\"))
-dev.off()
+# Save results
+write.csv(file = csvfile, cbind(tab,test_pvalue))
-pdf('thrVSmaxord.pdf')
-print(matplot(thrVSmaxord\$Threshold, thrVSmaxord[, c(\"Maxord\")], type=\"l\", col=\"blue\", lwd=3, lty=1, ylab=\"Maxord\", pch=1))
+# Read save results
+# out = read.csv(file = csvfile, check.names = FALSE)
+# tab = out[,1:11]
+# test_pvalue = out[,12:14]
+# colnames(test_pvalue)[3] = \"APTDTM\"
+
+pdf(file = pdfile, width = 8, height = 6)
+par(mar = c(4,4,1,1), mgp = c(2.2, 0.5, 0), tcl = -0.3)
+cols = c(\"grey30\", \"black\", \"cornflowerblue\")
+pchs = c(2, 17, 19)
+par(mar = c(4,4,1,1))
+matplot(thresholds, test_pvalue, type = \"b\", log = \"x\",
+ xlab = \"Threshold\", ylab = \"p-value\", lty = 1, col = cols, pch = pchs, xaxt = \"n\", yaxt = \"n\")
+axis(1, at = thresholds, cex.axis = 0.7, las = 3)
+axis(2, at = sort(c(0.05, 0:10/10)), cex.axis = 0.7)
+abline(h = 0.05, lty = 2)
+legend(\"topleft\", legend = colnames(test_pvalue), col = cols, pch = pchs, lty = 1, inset = 0.05)
dev.off()
-
" > R_temp
echo 'source ("R_temp")' | R --vanilla --slave
-rm R_temp
+rm R_temp one maxord ordcol textout three two tmp
More information about the grass-commit
mailing list