[GRASS-SVN] r61033 - grass-addons/grass7/raster/r.gwr

svn_grass at osgeo.org svn_grass at osgeo.org
Sat Jun 28 11:57:00 PDT 2014


Author: mmetz
Date: 2014-06-28 11:57:00 -0700 (Sat, 28 Jun 2014)
New Revision: 61033

Modified:
   grass-addons/grass7/raster/r.gwr/estimate_bw.c
   grass-addons/grass7/raster/r.gwr/gwr.c
   grass-addons/grass7/raster/r.gwr/local_proto.h
   grass-addons/grass7/raster/r.gwr/main.c
   grass-addons/grass7/raster/r.gwr/r.gwr.html
   grass-addons/grass7/raster/r.gwr/weights.c
Log:
r.gwr: +REFERENCES -BUGS

Modified: grass-addons/grass7/raster/r.gwr/estimate_bw.c
===================================================================
--- grass-addons/grass7/raster/r.gwr/estimate_bw.c	2014-06-28 16:46:15 UTC (rev 61032)
+++ grass-addons/grass7/raster/r.gwr/estimate_bw.c	2014-06-28 18:57:00 UTC (rev 61033)
@@ -57,12 +57,13 @@
     int r, c;
     struct rb *xbuf1, *xbuf2, *xbuf3, ybuf1, ybuf2, ybuf3;
     int prevbw, nextbw, newbw;
+    int bwmin, bwmax;
     int n1, n2, n3;
     double f11, f12, f2;
     double ss1, ss2, ss3;
     double **w1, **w2, **w3;
     DCELL *ybuf, yval;
-    int nr, nc, nrt, nct, count;
+    int nr, nc, nrt, nct, count, gwrfailed;
     
     ybuf = Rast_allocate_d_buf();
     
@@ -94,6 +95,9 @@
 	bw = 2;
     }
 
+    bwmin = 2;
+    bwmax = sqrt((double) nrows * nrows + (double) ncols * ncols);
+
     prevbw = bw - 1;
     nextbw = bw + 1;
 
@@ -105,6 +109,9 @@
 
     while (1) {
 
+	if (bw > bwmax)
+	    break;
+
 	G_message(_("Testing bandwidth %d"), bw);
 
 	for (i = 0; i < ninx; i++) {
@@ -147,6 +154,7 @@
 	nrt = nr;
 	nct = nc;
 	count = 0;
+	gwrfailed = 0;
 
 	for (r = 0; r < nrows; r++) {
 	    G_percent(r, nrows, 4);
@@ -178,13 +186,37 @@
 			ss2 += (est[0] - yval) * (est[0] - yval);
 			n2++;
 		    }
+		    else {
+			/* enforce increase */
+			bwmin = bw + 2;
+			gwrfailed = 1;
+			break;
+		    }
 		    if (gwr(xbuf3, ninx, &ybuf3, c, nextbw, w3, est, NULL)) {
 			ss3 += (est[0] - yval) * (est[0] - yval);
 			n3++;
 		    }
+		    else {
+			/* enforce increase */
+			bwmin = nextbw + 2;
+			gwrfailed = 1;
+			break;
+		    }
 		}
 		nct--;
 	    }
+	    if (gwrfailed) {
+		G_message(_("Unable to determine coefficients for bandwidth %d, "
+		            "continuing with new minimum of %d."), bw, bwmin);
+
+		bw = bwmin;
+		ss1 = 3;
+		ss2 = 2;
+		ss3 = 1;
+		n1 = n2 = n3 = 1;
+
+		break;
+	    }
 	}
 	G_percent(nrows, nrows, 4);
 	
@@ -202,10 +234,9 @@
 	if (n1 == 0 || n2 == 0 || n3 == 0)
 	    G_fatal_error(_("Unable to calculate sum of squares"));
 
-	/* correcting factor to favour larger bandwiths: bw * 2 + 1 */
-	ss1 /= n1 * prevbw * 2 + 1;
-	ss2 /= n2 * bw * 2 + 1;
-	ss3 /= n3 * nextbw * 2 + 1;
+	ss1 /= n1;
+	ss2 /= n2;
+	ss3 /= n3;
 
 	G_debug(1, "ss1: %g", ss1);
 	G_debug(1, "ss2: %g", ss2);
@@ -232,61 +263,85 @@
 	G_free(w2);
 	G_free(w3);
 
+/* deactivate to get sum of squares for (newbw = bw; newbw > 1; newbw--) */
+#if 1
+	if (gwrfailed) {
+	    bw = bwmin;
+	    prevbw = bw - 1;
+	    nextbw = bw + 1;
+	    continue;
+	}
+
+	newbw = bw;
+
+	/* local minimum */
 	if (ss2 < ss1 && ss2 < ss3)
 	    break;
 
-	if (ss1 < ss3) {
-	    /* decrease bandwidth */
-	    if (f2 > 0) {
-		/* Newton's method to find the extremum */
-		newbw = bw - (f11 + f12) / (2. * f2) + 0.5;
-	    }
-	    else {
-		newbw = bw - bw / 2;
-	    }
+	/* local maximum */
+	if (ss2 > ss1 && ss2 > ss3) {
+	    G_warning(_("Detected local maximum"));
+	    if (bw > bwmin)
+		newbw = bw - 1;
+	    else
+		newbw = bw + 1;
 	}
-	else if (ss1 > ss3) {
-	    /* increase bandwidth */
+	else {
+	    /* determine new bandwidth */
 	    if (f2 > 0) {
 		/* Newton's method to find the extremum */
 		newbw = bw - (f11 + f12) / (2. * f2) + 0.5;
 	    }
 	    else {
-		newbw = bw + bw / 2;
+		if (ss1 < ss3) {
+		    /* decrease bandwidth */
+		    newbw = bw - bw / 2;
+		}
+		else if (ss1 > ss3) {
+		    /* increase bandwidth */
+		    newbw = bw + bw / 2;
+		}
+		else {  /* ss1 == ss3 && ss1 == ss2 */
+		    newbw = bw + 1;
+		}
 	    }
 	}
-	else {
-	    /* ss1 == ss3 && ss2 >= ss1|ss3)  */
-	    break;
-	}
 
 	if (ss1 < ss2 && newbw > bw) {
 	    /* moving bw to wrong direction: stop here? */
-	    G_debug(0, "f2: %g", f2);
+	    G_debug(0, "f11: %g, f12: %g, f2: %g", f11, f12, f2);
 	    newbw = bw - 1;
 	}
 	if (ss3 < ss2 && newbw < bw) {
 	    /* moving bw to wrong direction: stop here? */
-	    G_debug(0, "f2: %g", f2);
+	    G_debug(0, "f11: %g, f12: %g, f2: %g", f11, f12, f2);
 	    newbw = bw + 1;
 	}
 	
 	if (newbw == bw)
 	    break;
+#else
+	newbw = bw - 1;
+#endif
 	
-	if (newbw < 2) {
+	if (newbw < bwmin) {
 	    G_debug(1, "Bandwidth is getting too small: %d", newbw);
-	    if (bw > 2)
-		bw--;
+	    if (bw > bwmin)
+		newbw = bwmin;
 	    else
 		break;
 	}
-	else
-	    bw = newbw;
 
+	bw = newbw;
+
 	prevbw = bw - 1;
 	nextbw = bw + 1;
     }
 
+    if (bw < bwmin)
+	bw = bwmin;
+    if (bw > bwmax)
+	bw = bwmax;
+
     return bw;
 }

Modified: grass-addons/grass7/raster/r.gwr/gwr.c
===================================================================
--- grass-addons/grass7/raster/r.gwr/gwr.c	2014-06-28 16:46:15 UTC (rev 61032)
+++ grass-addons/grass7/raster/r.gwr/gwr.c	2014-06-28 18:57:00 UTC (rev 61033)
@@ -232,7 +232,7 @@
 		fprintf(stdout, "b%d=0.0\n", i);
 	    }
 	    */
-	    G_debug(0, "Try a larger bandwidth");
+	    G_debug(1, "Try a larger bandwidth");
 	    count = 0;
 	}
     }

Modified: grass-addons/grass7/raster/r.gwr/local_proto.h
===================================================================
--- grass-addons/grass7/raster/r.gwr/local_proto.h	2014-06-28 16:46:15 UTC (rev 61032)
+++ grass-addons/grass7/raster/r.gwr/local_proto.h	2014-06-28 18:57:00 UTC (rev 61033)
@@ -1,10 +1,5 @@
 #include "bufs.h"
 
-double **gauss(int);
-double **epanechnikov(int);
-double **quartic(int);
-double **tricubic(int);
-
 extern double ** (*w_fn) (int);
 
 void set_wfn(char *name, int vfu);

Modified: grass-addons/grass7/raster/r.gwr/main.c
===================================================================
--- grass-addons/grass7/raster/r.gwr/main.c	2014-06-28 16:46:15 UTC (rev 61032)
+++ grass-addons/grass7/raster/r.gwr/main.c	2014-06-28 18:57:00 UTC (rev 61033)
@@ -42,11 +42,16 @@
     double Rsq, Rsqadj, SE, F, t, AIC, AICc, BIC;
     DCELL *mapx_val, mapy_val, *mapy_buf, *mapres_buf, *mapest_buf;
     struct rb *xbuf, ybuf;
+    struct outputb {
+	int fd;
+	char name[GNAME_MAX];
+	DCELL *buf;
+    } *outb, *outbp;
     double **weights;
     int bw;
     char *name;
     struct Option *input_mapx, *input_mapy,
-                  *output_res, *output_est, *output_opt,
+                  *output_res, *output_est, *output_b, *output_opt,
 		  *bw_opt, *kernel_opt, *vf_opt;
     struct Flag *shell_style, *estimate;
     struct Cell_head region;
@@ -63,11 +68,11 @@
     /* Define the different options */
     input_mapx = G_define_standard_option(G_OPT_R_INPUTS);
     input_mapx->key = "mapx";
-    input_mapx->description = (_("Map for x coefficient"));
+    input_mapx->description = (_("Map(s) with X variables"));
 
     input_mapy = G_define_standard_option(G_OPT_R_INPUT);
     input_mapy->key = "mapy";
-    input_mapy->description = (_("Map for y coefficient"));
+    input_mapy->description = (_("Map with Y variable"));
 
     output_res = G_define_standard_option(G_OPT_R_OUTPUT);
     output_res->key = "residuals";
@@ -79,6 +84,12 @@
     output_est->required = NO;
     output_est->description = (_("Map to store estimates"));
 
+    output_b = G_define_option();
+    output_b->key = "coefficients";
+    output_b->type = TYPE_STRING;
+    output_b->required = NO;
+    output_b->description = (_("Prefix for maps to store coefficients"));
+
     output_opt = G_define_standard_option(G_OPT_F_OUTPUT);
     output_opt->key = "output";
     output_opt->required = NO;
@@ -88,7 +99,7 @@
     kernel_opt = G_define_option();
     kernel_opt->key = "kernel";
     kernel_opt->type = TYPE_STRING;
-    kernel_opt->options = "gauss,epanechnikov,quartic,tricubic";
+    kernel_opt->options = "gauss,epanechnikov,bisquare,tricubic";
     kernel_opt->answer = "gauss";
     kernel_opt->required = NO;
     kernel_opt->description =
@@ -250,6 +261,7 @@
     Bsumsq = G_malloc((n_predictors + 1) * sizeof(double));
     Bmean = G_malloc((n_predictors + 1) * sizeof(double));
     
+    /* localized coefficients */
     for (i = 0; i <= n_predictors; i++) {
 	Bmin[i] = 1.0 / 0.0;	/* inf */
 	Bmax[i] = -1.0 / 0.0;	/* -inf */
@@ -257,6 +269,19 @@
     }
     bcount = 0;
 
+    outb = outbp = NULL;
+    if (output_b->answer) {
+	outb = G_malloc((n_predictors + 1) * sizeof(struct outputb));
+
+	for (i = 0; i <= n_predictors; i++) {
+	    outbp = &outb[i];
+	    sprintf(outbp->name, "%s.%d", output_b->answer, i);
+	    
+	    outbp->fd = Rast_open_new(outbp->name, DCELL_TYPE);
+	    outbp->buf = Rast_allocate_d_buf();
+	}
+    }
+
     for (r = 0; r < rows; r++) {
 	G_percent(r, rows, 2);
 
@@ -271,6 +296,13 @@
 	if (mapest_buf)
 	    Rast_set_d_null_value(mapest_buf, cols);
 
+	if (outb) {
+	    for (i = 0; i <= n_predictors; i++) {
+		outbp = &outb[i];
+		Rast_set_d_null_value(outbp->buf, cols);
+	    }
+	}
+
 	for (c = 0; c < cols; c++) {
 	    int isnull = 0;
 
@@ -285,17 +317,24 @@
 		continue;
 
 	    if (!gwr(xbuf, n_predictors, &ybuf, c, bw, weights, yest, &B)) {
+		G_warning(_("Unable to determine coefficients. Consider increasing the bandwidth."));
 		continue;
 	    }
 	    
 	    /* coefficient stats */
-	    for (k = 0; k <= n_predictors; k++) {
-		if (Bmin[k] > B[k])
-		    Bmin[k] = B[k];
-		if (Bmax[k] < B[k])
-		    Bmax[k] = B[k];
-		Bsum[k] += B[k];
-		Bsumsq[k] += B[k] * B[k];
+	    for (i = 0; i <= n_predictors; i++) {
+		if (Bmin[i] > B[i])
+		    Bmin[i] = B[i];
+		if (Bmax[i] < B[i])
+		    Bmax[i] = B[i];
+		Bsum[i] += B[i];
+		Bsumsq[i] += B[i] * B[i];
+
+		/* output raster for coefficients */
+		if (outb) {
+		    outbp = &outb[i];
+		    outbp->buf[c] = B[i];
+		}
 	    }
 	    bcount++;
 
@@ -330,6 +369,12 @@
 	    Rast_put_d_row(mapres_fd, mapres_buf);
 	if (mapest_buf)
 	    Rast_put_d_row(mapest_fd, mapest_buf);
+	if (outb) {
+	    for (i = 0; i <= n_predictors; i++) {
+		outbp = &outb[i];
+		Rast_put_d_row(outbp->fd, outbp->buf);
+	    }
+	}
     }
     G_percent(rows, rows, 2);
 
@@ -381,7 +426,7 @@
 
 	fprintf(stdout, "\npredictor%d=%s\n", i + 1, input_mapx->answers[i]);
 	Bmean[i + 1] = Bsum[i + 1] / bcount;
-	fprintf(stdout, "bmean%d=%g\n", i, Bmean[i + 1]);
+	fprintf(stdout, "bmean%d=%g\n", i + 1, Bmean[i + 1]);
 	Bstddev = sqrt(Bsumsq[i + 1] / bcount - (Bmean[i + 1] * Bmean[i + 1]));
 	fprintf(stdout, "bstddev%d=%g\n", i + 1, Bstddev);
 	fprintf(stdout, "bmin%d=%g\n", i + 1, Bmin[i + 1]);
@@ -475,5 +520,19 @@
 	Rast_write_history(output_est->answer, &history);
     }
 
+    if (outb) {
+	for (i = 0; i <= n_predictors; i++) {
+	    struct History history;
+
+	    outbp = &outb[i];
+	    Rast_close(outbp->fd);
+	    G_free(outbp->buf);
+
+	    Rast_short_history(outbp->name, "raster", &history);
+	    Rast_command_history(&history);
+	    Rast_write_history(outbp->name, &history);
+	}
+    }
+
     exit(EXIT_SUCCESS);
 }

Modified: grass-addons/grass7/raster/r.gwr/r.gwr.html
===================================================================
--- grass-addons/grass7/raster/r.gwr/r.gwr.html	2014-06-28 16:46:15 UTC (rev 61032)
+++ grass-addons/grass7/raster/r.gwr/r.gwr.html	2014-06-28 18:57:00 UTC (rev 61033)
@@ -22,18 +22,24 @@
 </pre></div>
 
 <p>
-The b coefficients are localized, i.e. determined for each cell 
-individually.
+The β coefficients are localized, i.e. determined for each cell 
+individually. These β coefficients are the most important output 
+of <em>r.gwr</em>. Spatial patterns and localized outliers in these 
+coefficients can reveal details of the relation of Y to X. Outliers in 
+the β coefficients can also be caused by a small bandwidth and can 
+be removed by increasing the bandwidth.
 
 <p>
 Geographically weighted regressions should be used as a diagnostic tool 
 and not as an interpolation method. If a geographically weighted 
-regression provides a higher R<sup>2</sup> than the corresponding 
-global regression, a crucial predictor is missing in the model. If that 
+regression provides a higher R squared than the corresponding global 
+regression, then a crucial predictor is missing in the model. If that 
 missing predictor can not be estimated or is supposed to behave 
 randomly, a geographically weighted regression might be used for 
 interpolation, but the result, in particular the variation of the 
-β coefficients needs to be judged according to prior assumptions.
+β coefficients needs to be judged according to prior assumptions. 
+See also the manual and the examples of the R package 
+<a href="http://cran.rstudio.com/web/packages/spgwr/index.html">spgwr</a>.
 
 <p>
 <em>r.gwr</em> is designed for large datasets that can not be processed 
@@ -60,16 +66,55 @@
 R successively includes one variable after another in the order
 specified by the formula and at each step calculates the F value
 expressing the gain by including the current variable in addition to the
-previous variables, <em>r.regression.multi</em> calculates the F-value
+previous variables, <em>r.gwr</em> calculates the F-value
 expressing the gain by including the current variable in addition to all
 other variables, not only the previous variables.
 
+<h4>Bandwidth</h4>
+The bandwidth is the crucial parameter for geographically weighed 
+regressions. A too small bandwidth will essentially use the weighed 
+average, any predictors are mostly ignored. A too large bandwidth will 
+produce results similar to a global regression, and spatial 
+non-stationarity can not be explored.
 
-<h2>BUGS</h2>
+<h4>Kernel functions</h4>
+The kernel function has little influence on the result, much more important is the 
+bandwidth. Available kernel funtions to calculate weights are
 
-The estimated bandwidth is too small. A correcting factor has been 
-introduced, but this is just a workaround.
+<dl>
+<dt><b>Epanechnikov</b></dt>
+<dd>w = 1 - d / bw</dd>
+<dt><b>Bisquare (Quartic)</b></dt>
+<dd>w = (1 - (d / bw)<sup>2</sup>)<sup>2</sup></dd>
+<dt><b>Tricubic</b></dt>
+<dd>w = (1 - (d / bw)<sup>3</sup>)<sup>3</sup></dd>
+<dt><b>Gaussian</b></dt>
+<dd>w = exp(-0.5 * (d / bw)<sup>2</sup>)</dd>
+</dl>
 
+with<br>
+w = weight for current cell<br>
+d = distance to the current cell<br>
+bw = bandwidth
+
+<h2>REFERENCES</h2>
+
+Brunsdon, C., Fotheringham, A.S., and Charlton, M.E., 1996, 
+Geographically Weighted Regression: A Method for Exploring Spatial 
+Nonstationarity, Geographical Analysis, 28(4), 281- 298<br>
+Fotheringham, A.S., Brunsdon, C., and Charlton, M.E., 2002, 
+Geographically Weighted Regression: The Analysis of Spatially Varying 
+Relationships, Chichester: Wiley.<br>
+
+<h2>SEE ALSO</h2>
+
+<a href="http://geoinformatics.wp.st-andrews.ac.uk/gwr/">http://geoinformatics.wp.st-andrews.ac.uk/gwr/</a>
+<br>
+<a href="http://gwr.nuim.ie/">http://gwr.nuim.ie/</a>
+<br>
+R package 
+<a href="http://cran.rstudio.com/web/packages/spgwr/index.html">spgwr</a>
+
 <h2>AUTHOR</h2>
 
 Markus Metz

Modified: grass-addons/grass7/raster/r.gwr/weights.c
===================================================================
--- grass-addons/grass7/raster/r.gwr/weights.c	2014-06-28 16:46:15 UTC (rev 61032)
+++ grass-addons/grass7/raster/r.gwr/weights.c	2014-06-28 18:57:00 UTC (rev 61033)
@@ -8,7 +8,7 @@
  * each cell within bw (distance from center <= bw)
  * gets a weight > 0 */
 
-static double vf = 2;
+static double vf = -0.5;
 
 double ** (*w_fn) (int);
 
@@ -18,7 +18,7 @@
     double bw2, bw2d, d, **w;
 
     w = G_alloc_matrix(bw * 2 + 1, bw * 2 + 1);
-    bw2 = (bw + 1) * (bw + 1);
+    bw2 = (bw + 1);
     bw2d = bw * bw;
     
     for (r = -bw; r <= bw; r++) {
@@ -35,7 +35,7 @@
     return w;
 }
 
-double **quartic(int bw)
+double **bisquare(int bw)
 {
     int r, c;
     double bw2, bw2d, d, **w, t;
@@ -101,25 +101,24 @@
 
 	    if (d <= bw2) {
 		w[r + bw][c + bw] = exp(vf * d / bw2);
-		w[r + bw][c + bw] = 1;
 	    }
 	}
     }
-    
+
     return w;
 }
 
 /* set weighing kernel function and variance factor */
 void set_wfn(char *name, int vfu)
 {
-    vf = vfu / 2.;
+    vf = vfu / -2.;
 
     if (*name == 'g')
 	w_fn = gauss;
     else if (*name == 'e')
 	w_fn = epanechnikov;
-    else if (*name == 'q')
-	w_fn = quartic;
+    else if (*name == 'b')
+	w_fn = bisquare;
     else if (*name == 't')
 	w_fn = tricubic;
     else



More information about the grass-commit mailing list