[GRASS-SVN] r61033 - grass-addons/grass7/raster/r.gwr
svn_grass at osgeo.org
svn_grass at osgeo.org
Sat Jun 28 11:57:00 PDT 2014
Author: mmetz
Date: 2014-06-28 11:57:00 -0700 (Sat, 28 Jun 2014)
New Revision: 61033
Modified:
grass-addons/grass7/raster/r.gwr/estimate_bw.c
grass-addons/grass7/raster/r.gwr/gwr.c
grass-addons/grass7/raster/r.gwr/local_proto.h
grass-addons/grass7/raster/r.gwr/main.c
grass-addons/grass7/raster/r.gwr/r.gwr.html
grass-addons/grass7/raster/r.gwr/weights.c
Log:
r.gwr: +REFERENCES -BUGS
Modified: grass-addons/grass7/raster/r.gwr/estimate_bw.c
===================================================================
--- grass-addons/grass7/raster/r.gwr/estimate_bw.c 2014-06-28 16:46:15 UTC (rev 61032)
+++ grass-addons/grass7/raster/r.gwr/estimate_bw.c 2014-06-28 18:57:00 UTC (rev 61033)
@@ -57,12 +57,13 @@
int r, c;
struct rb *xbuf1, *xbuf2, *xbuf3, ybuf1, ybuf2, ybuf3;
int prevbw, nextbw, newbw;
+ int bwmin, bwmax;
int n1, n2, n3;
double f11, f12, f2;
double ss1, ss2, ss3;
double **w1, **w2, **w3;
DCELL *ybuf, yval;
- int nr, nc, nrt, nct, count;
+ int nr, nc, nrt, nct, count, gwrfailed;
ybuf = Rast_allocate_d_buf();
@@ -94,6 +95,9 @@
bw = 2;
}
+ bwmin = 2;
+ bwmax = sqrt((double) nrows * nrows + (double) ncols * ncols);
+
prevbw = bw - 1;
nextbw = bw + 1;
@@ -105,6 +109,9 @@
while (1) {
+ if (bw > bwmax)
+ break;
+
G_message(_("Testing bandwidth %d"), bw);
for (i = 0; i < ninx; i++) {
@@ -147,6 +154,7 @@
nrt = nr;
nct = nc;
count = 0;
+ gwrfailed = 0;
for (r = 0; r < nrows; r++) {
G_percent(r, nrows, 4);
@@ -178,13 +186,37 @@
ss2 += (est[0] - yval) * (est[0] - yval);
n2++;
}
+ else {
+ /* enforce increase */
+ bwmin = bw + 2;
+ gwrfailed = 1;
+ break;
+ }
if (gwr(xbuf3, ninx, &ybuf3, c, nextbw, w3, est, NULL)) {
ss3 += (est[0] - yval) * (est[0] - yval);
n3++;
}
+ else {
+ /* enforce increase */
+ bwmin = nextbw + 2;
+ gwrfailed = 1;
+ break;
+ }
}
nct--;
}
+ if (gwrfailed) {
+ G_message(_("Unable to determine coefficients for bandwidth %d, "
+ "continuing with new minimum of %d."), bw, bwmin);
+
+ bw = bwmin;
+ ss1 = 3;
+ ss2 = 2;
+ ss3 = 1;
+ n1 = n2 = n3 = 1;
+
+ break;
+ }
}
G_percent(nrows, nrows, 4);
@@ -202,10 +234,9 @@
if (n1 == 0 || n2 == 0 || n3 == 0)
G_fatal_error(_("Unable to calculate sum of squares"));
- /* correcting factor to favour larger bandwiths: bw * 2 + 1 */
- ss1 /= n1 * prevbw * 2 + 1;
- ss2 /= n2 * bw * 2 + 1;
- ss3 /= n3 * nextbw * 2 + 1;
+ ss1 /= n1;
+ ss2 /= n2;
+ ss3 /= n3;
G_debug(1, "ss1: %g", ss1);
G_debug(1, "ss2: %g", ss2);
@@ -232,61 +263,85 @@
G_free(w2);
G_free(w3);
+/* deactivate to get sum of squares for (newbw = bw; newbw > 1; newbw--) */
+#if 1
+ if (gwrfailed) {
+ bw = bwmin;
+ prevbw = bw - 1;
+ nextbw = bw + 1;
+ continue;
+ }
+
+ newbw = bw;
+
+ /* local minimum */
if (ss2 < ss1 && ss2 < ss3)
break;
- if (ss1 < ss3) {
- /* decrease bandwidth */
- if (f2 > 0) {
- /* Newton's method to find the extremum */
- newbw = bw - (f11 + f12) / (2. * f2) + 0.5;
- }
- else {
- newbw = bw - bw / 2;
- }
+ /* local maximum */
+ if (ss2 > ss1 && ss2 > ss3) {
+ G_warning(_("Detected local maximum"));
+ if (bw > bwmin)
+ newbw = bw - 1;
+ else
+ newbw = bw + 1;
}
- else if (ss1 > ss3) {
- /* increase bandwidth */
+ else {
+ /* determine new bandwidth */
if (f2 > 0) {
/* Newton's method to find the extremum */
newbw = bw - (f11 + f12) / (2. * f2) + 0.5;
}
else {
- newbw = bw + bw / 2;
+ if (ss1 < ss3) {
+ /* decrease bandwidth */
+ newbw = bw - bw / 2;
+ }
+ else if (ss1 > ss3) {
+ /* increase bandwidth */
+ newbw = bw + bw / 2;
+ }
+ else { /* ss1 == ss3 && ss1 == ss2 */
+ newbw = bw + 1;
+ }
}
}
- else {
- /* ss1 == ss3 && ss2 >= ss1|ss3) */
- break;
- }
if (ss1 < ss2 && newbw > bw) {
/* moving bw to wrong direction: stop here? */
- G_debug(0, "f2: %g", f2);
+ G_debug(0, "f11: %g, f12: %g, f2: %g", f11, f12, f2);
newbw = bw - 1;
}
if (ss3 < ss2 && newbw < bw) {
/* moving bw to wrong direction: stop here? */
- G_debug(0, "f2: %g", f2);
+ G_debug(0, "f11: %g, f12: %g, f2: %g", f11, f12, f2);
newbw = bw + 1;
}
if (newbw == bw)
break;
+#else
+ newbw = bw - 1;
+#endif
- if (newbw < 2) {
+ if (newbw < bwmin) {
G_debug(1, "Bandwidth is getting too small: %d", newbw);
- if (bw > 2)
- bw--;
+ if (bw > bwmin)
+ newbw = bwmin;
else
break;
}
- else
- bw = newbw;
+ bw = newbw;
+
prevbw = bw - 1;
nextbw = bw + 1;
}
+ if (bw < bwmin)
+ bw = bwmin;
+ if (bw > bwmax)
+ bw = bwmax;
+
return bw;
}
Modified: grass-addons/grass7/raster/r.gwr/gwr.c
===================================================================
--- grass-addons/grass7/raster/r.gwr/gwr.c 2014-06-28 16:46:15 UTC (rev 61032)
+++ grass-addons/grass7/raster/r.gwr/gwr.c 2014-06-28 18:57:00 UTC (rev 61033)
@@ -232,7 +232,7 @@
fprintf(stdout, "b%d=0.0\n", i);
}
*/
- G_debug(0, "Try a larger bandwidth");
+ G_debug(1, "Try a larger bandwidth");
count = 0;
}
}
Modified: grass-addons/grass7/raster/r.gwr/local_proto.h
===================================================================
--- grass-addons/grass7/raster/r.gwr/local_proto.h 2014-06-28 16:46:15 UTC (rev 61032)
+++ grass-addons/grass7/raster/r.gwr/local_proto.h 2014-06-28 18:57:00 UTC (rev 61033)
@@ -1,10 +1,5 @@
#include "bufs.h"
-double **gauss(int);
-double **epanechnikov(int);
-double **quartic(int);
-double **tricubic(int);
-
extern double ** (*w_fn) (int);
void set_wfn(char *name, int vfu);
Modified: grass-addons/grass7/raster/r.gwr/main.c
===================================================================
--- grass-addons/grass7/raster/r.gwr/main.c 2014-06-28 16:46:15 UTC (rev 61032)
+++ grass-addons/grass7/raster/r.gwr/main.c 2014-06-28 18:57:00 UTC (rev 61033)
@@ -42,11 +42,16 @@
double Rsq, Rsqadj, SE, F, t, AIC, AICc, BIC;
DCELL *mapx_val, mapy_val, *mapy_buf, *mapres_buf, *mapest_buf;
struct rb *xbuf, ybuf;
+ struct outputb {
+ int fd;
+ char name[GNAME_MAX];
+ DCELL *buf;
+ } *outb, *outbp;
double **weights;
int bw;
char *name;
struct Option *input_mapx, *input_mapy,
- *output_res, *output_est, *output_opt,
+ *output_res, *output_est, *output_b, *output_opt,
*bw_opt, *kernel_opt, *vf_opt;
struct Flag *shell_style, *estimate;
struct Cell_head region;
@@ -63,11 +68,11 @@
/* Define the different options */
input_mapx = G_define_standard_option(G_OPT_R_INPUTS);
input_mapx->key = "mapx";
- input_mapx->description = (_("Map for x coefficient"));
+ input_mapx->description = (_("Map(s) with X variables"));
input_mapy = G_define_standard_option(G_OPT_R_INPUT);
input_mapy->key = "mapy";
- input_mapy->description = (_("Map for y coefficient"));
+ input_mapy->description = (_("Map with Y variable"));
output_res = G_define_standard_option(G_OPT_R_OUTPUT);
output_res->key = "residuals";
@@ -79,6 +84,12 @@
output_est->required = NO;
output_est->description = (_("Map to store estimates"));
+ output_b = G_define_option();
+ output_b->key = "coefficients";
+ output_b->type = TYPE_STRING;
+ output_b->required = NO;
+ output_b->description = (_("Prefix for maps to store coefficients"));
+
output_opt = G_define_standard_option(G_OPT_F_OUTPUT);
output_opt->key = "output";
output_opt->required = NO;
@@ -88,7 +99,7 @@
kernel_opt = G_define_option();
kernel_opt->key = "kernel";
kernel_opt->type = TYPE_STRING;
- kernel_opt->options = "gauss,epanechnikov,quartic,tricubic";
+ kernel_opt->options = "gauss,epanechnikov,bisquare,tricubic";
kernel_opt->answer = "gauss";
kernel_opt->required = NO;
kernel_opt->description =
@@ -250,6 +261,7 @@
Bsumsq = G_malloc((n_predictors + 1) * sizeof(double));
Bmean = G_malloc((n_predictors + 1) * sizeof(double));
+ /* localized coefficients */
for (i = 0; i <= n_predictors; i++) {
Bmin[i] = 1.0 / 0.0; /* inf */
Bmax[i] = -1.0 / 0.0; /* -inf */
@@ -257,6 +269,19 @@
}
bcount = 0;
+ outb = outbp = NULL;
+ if (output_b->answer) {
+ outb = G_malloc((n_predictors + 1) * sizeof(struct outputb));
+
+ for (i = 0; i <= n_predictors; i++) {
+ outbp = &outb[i];
+ sprintf(outbp->name, "%s.%d", output_b->answer, i);
+
+ outbp->fd = Rast_open_new(outbp->name, DCELL_TYPE);
+ outbp->buf = Rast_allocate_d_buf();
+ }
+ }
+
for (r = 0; r < rows; r++) {
G_percent(r, rows, 2);
@@ -271,6 +296,13 @@
if (mapest_buf)
Rast_set_d_null_value(mapest_buf, cols);
+ if (outb) {
+ for (i = 0; i <= n_predictors; i++) {
+ outbp = &outb[i];
+ Rast_set_d_null_value(outbp->buf, cols);
+ }
+ }
+
for (c = 0; c < cols; c++) {
int isnull = 0;
@@ -285,17 +317,24 @@
continue;
if (!gwr(xbuf, n_predictors, &ybuf, c, bw, weights, yest, &B)) {
+ G_warning(_("Unable to determine coefficients. Consider increasing the bandwidth."));
continue;
}
/* coefficient stats */
- for (k = 0; k <= n_predictors; k++) {
- if (Bmin[k] > B[k])
- Bmin[k] = B[k];
- if (Bmax[k] < B[k])
- Bmax[k] = B[k];
- Bsum[k] += B[k];
- Bsumsq[k] += B[k] * B[k];
+ for (i = 0; i <= n_predictors; i++) {
+ if (Bmin[i] > B[i])
+ Bmin[i] = B[i];
+ if (Bmax[i] < B[i])
+ Bmax[i] = B[i];
+ Bsum[i] += B[i];
+ Bsumsq[i] += B[i] * B[i];
+
+ /* output raster for coefficients */
+ if (outb) {
+ outbp = &outb[i];
+ outbp->buf[c] = B[i];
+ }
}
bcount++;
@@ -330,6 +369,12 @@
Rast_put_d_row(mapres_fd, mapres_buf);
if (mapest_buf)
Rast_put_d_row(mapest_fd, mapest_buf);
+ if (outb) {
+ for (i = 0; i <= n_predictors; i++) {
+ outbp = &outb[i];
+ Rast_put_d_row(outbp->fd, outbp->buf);
+ }
+ }
}
G_percent(rows, rows, 2);
@@ -381,7 +426,7 @@
fprintf(stdout, "\npredictor%d=%s\n", i + 1, input_mapx->answers[i]);
Bmean[i + 1] = Bsum[i + 1] / bcount;
- fprintf(stdout, "bmean%d=%g\n", i, Bmean[i + 1]);
+ fprintf(stdout, "bmean%d=%g\n", i + 1, Bmean[i + 1]);
Bstddev = sqrt(Bsumsq[i + 1] / bcount - (Bmean[i + 1] * Bmean[i + 1]));
fprintf(stdout, "bstddev%d=%g\n", i + 1, Bstddev);
fprintf(stdout, "bmin%d=%g\n", i + 1, Bmin[i + 1]);
@@ -475,5 +520,19 @@
Rast_write_history(output_est->answer, &history);
}
+ if (outb) {
+ for (i = 0; i <= n_predictors; i++) {
+ struct History history;
+
+ outbp = &outb[i];
+ Rast_close(outbp->fd);
+ G_free(outbp->buf);
+
+ Rast_short_history(outbp->name, "raster", &history);
+ Rast_command_history(&history);
+ Rast_write_history(outbp->name, &history);
+ }
+ }
+
exit(EXIT_SUCCESS);
}
Modified: grass-addons/grass7/raster/r.gwr/r.gwr.html
===================================================================
--- grass-addons/grass7/raster/r.gwr/r.gwr.html 2014-06-28 16:46:15 UTC (rev 61032)
+++ grass-addons/grass7/raster/r.gwr/r.gwr.html 2014-06-28 18:57:00 UTC (rev 61033)
@@ -22,18 +22,24 @@
</pre></div>
<p>
-The b coefficients are localized, i.e. determined for each cell
-individually.
+The β coefficients are localized, i.e. determined for each cell
+individually. These β coefficients are the most important output
+of <em>r.gwr</em>. Spatial patterns and localized outliers in these
+coefficients can reveal details of the relation of Y to X. Outliers in
+the β coefficients can also be caused by a small bandwidth and can
+be removed by increasing the bandwidth.
<p>
Geographically weighted regressions should be used as a diagnostic tool
and not as an interpolation method. If a geographically weighted
-regression provides a higher R<sup>2</sup> than the corresponding
-global regression, a crucial predictor is missing in the model. If that
+regression provides a higher R squared than the corresponding global
+regression, then a crucial predictor is missing in the model. If that
missing predictor can not be estimated or is supposed to behave
randomly, a geographically weighted regression might be used for
interpolation, but the result, in particular the variation of the
-β coefficients needs to be judged according to prior assumptions.
+β coefficients needs to be judged according to prior assumptions.
+See also the manual and the examples of the R package
+<a href="http://cran.rstudio.com/web/packages/spgwr/index.html">spgwr</a>.
<p>
<em>r.gwr</em> is designed for large datasets that can not be processed
@@ -60,16 +66,55 @@
R successively includes one variable after another in the order
specified by the formula and at each step calculates the F value
expressing the gain by including the current variable in addition to the
-previous variables, <em>r.regression.multi</em> calculates the F-value
+previous variables, <em>r.gwr</em> calculates the F-value
expressing the gain by including the current variable in addition to all
other variables, not only the previous variables.
+<h4>Bandwidth</h4>
+The bandwidth is the crucial parameter for geographically weighed
+regressions. A too small bandwidth will essentially use the weighed
+average, any predictors are mostly ignored. A too large bandwidth will
+produce results similar to a global regression, and spatial
+non-stationarity can not be explored.
-<h2>BUGS</h2>
+<h4>Kernel functions</h4>
+The kernel function has little influence on the result, much more important is the
+bandwidth. Available kernel funtions to calculate weights are
-The estimated bandwidth is too small. A correcting factor has been
-introduced, but this is just a workaround.
+<dl>
+<dt><b>Epanechnikov</b></dt>
+<dd>w = 1 - d / bw</dd>
+<dt><b>Bisquare (Quartic)</b></dt>
+<dd>w = (1 - (d / bw)<sup>2</sup>)<sup>2</sup></dd>
+<dt><b>Tricubic</b></dt>
+<dd>w = (1 - (d / bw)<sup>3</sup>)<sup>3</sup></dd>
+<dt><b>Gaussian</b></dt>
+<dd>w = exp(-0.5 * (d / bw)<sup>2</sup>)</dd>
+</dl>
+with<br>
+w = weight for current cell<br>
+d = distance to the current cell<br>
+bw = bandwidth
+
+<h2>REFERENCES</h2>
+
+Brunsdon, C., Fotheringham, A.S., and Charlton, M.E., 1996,
+Geographically Weighted Regression: A Method for Exploring Spatial
+Nonstationarity, Geographical Analysis, 28(4), 281- 298<br>
+Fotheringham, A.S., Brunsdon, C., and Charlton, M.E., 2002,
+Geographically Weighted Regression: The Analysis of Spatially Varying
+Relationships, Chichester: Wiley.<br>
+
+<h2>SEE ALSO</h2>
+
+<a href="http://geoinformatics.wp.st-andrews.ac.uk/gwr/">http://geoinformatics.wp.st-andrews.ac.uk/gwr/</a>
+<br>
+<a href="http://gwr.nuim.ie/">http://gwr.nuim.ie/</a>
+<br>
+R package
+<a href="http://cran.rstudio.com/web/packages/spgwr/index.html">spgwr</a>
+
<h2>AUTHOR</h2>
Markus Metz
Modified: grass-addons/grass7/raster/r.gwr/weights.c
===================================================================
--- grass-addons/grass7/raster/r.gwr/weights.c 2014-06-28 16:46:15 UTC (rev 61032)
+++ grass-addons/grass7/raster/r.gwr/weights.c 2014-06-28 18:57:00 UTC (rev 61033)
@@ -8,7 +8,7 @@
* each cell within bw (distance from center <= bw)
* gets a weight > 0 */
-static double vf = 2;
+static double vf = -0.5;
double ** (*w_fn) (int);
@@ -18,7 +18,7 @@
double bw2, bw2d, d, **w;
w = G_alloc_matrix(bw * 2 + 1, bw * 2 + 1);
- bw2 = (bw + 1) * (bw + 1);
+ bw2 = (bw + 1);
bw2d = bw * bw;
for (r = -bw; r <= bw; r++) {
@@ -35,7 +35,7 @@
return w;
}
-double **quartic(int bw)
+double **bisquare(int bw)
{
int r, c;
double bw2, bw2d, d, **w, t;
@@ -101,25 +101,24 @@
if (d <= bw2) {
w[r + bw][c + bw] = exp(vf * d / bw2);
- w[r + bw][c + bw] = 1;
}
}
}
-
+
return w;
}
/* set weighing kernel function and variance factor */
void set_wfn(char *name, int vfu)
{
- vf = vfu / 2.;
+ vf = vfu / -2.;
if (*name == 'g')
w_fn = gauss;
else if (*name == 'e')
w_fn = epanechnikov;
- else if (*name == 'q')
- w_fn = quartic;
+ else if (*name == 'b')
+ w_fn = bisquare;
else if (*name == 't')
w_fn = tricubic;
else
More information about the grass-commit
mailing list