[GRASS-SVN] r48669 - grass-addons/grass7/raster/r.regression.multi
svn_grass at osgeo.org
svn_grass at osgeo.org
Fri Oct 7 05:24:50 EDT 2011
Author: mmetz
Date: 2011-10-07 02:24:50 -0700 (Fri, 07 Oct 2011)
New Revision: 48669
Added:
grass-addons/grass7/raster/r.regression.multi/r.regression.multi.html
Removed:
grass-addons/grass7/raster/r.regression.multi/r.regression.line.html
Modified:
grass-addons/grass7/raster/r.regression.multi/Makefile
grass-addons/grass7/raster/r.regression.multi/main.c
Log:
multiple regression with raster maps
Modified: grass-addons/grass7/raster/r.regression.multi/Makefile
===================================================================
--- grass-addons/grass7/raster/r.regression.multi/Makefile 2011-10-07 09:22:02 UTC (rev 48668)
+++ grass-addons/grass7/raster/r.regression.multi/Makefile 2011-10-07 09:24:50 UTC (rev 48669)
@@ -1,6 +1,6 @@
MODULE_TOPDIR = ../..
-PGM = r.regression.line
+PGM = r.regression.multi
LIBES = $(RASTERLIB) $(GISLIB) $(MATHLIB)
DEPENDENCIES = $(GISDEP) $(RASTERDEP)
Modified: grass-addons/grass7/raster/r.regression.multi/main.c
===================================================================
--- grass-addons/grass7/raster/r.regression.multi/main.c 2011-10-07 09:22:02 UTC (rev 48668)
+++ grass-addons/grass7/raster/r.regression.multi/main.c 2011-10-07 09:24:50 UTC (rev 48669)
@@ -1,15 +1,14 @@
/****************************************************************************
*
- * MODULE: r.regression.line
+ * MODULE: r.regression.multi
*
- * AUTHOR(S): Dr. Agustin Lobo
- * Markus Metz (conversion to C for speed)
+ * AUTHOR(S): Markus Metz
*
- * PURPOSE: Calculates linear regression from two raster maps:
- * y = a + b*x
+ * PURPOSE: Calculates multiple linear regression from raster maps:
+ * y = b0 + b1*x1 + b2*x2 + ... + bn*xn + e
*
- * COPYRIGHT: (C) 2010 by the GRASS Development Team
+ * COPYRIGHT: (C) 2011 by the GRASS Development Team
*
* This program is free software under the GNU General Public
* License (>=v2). Read the file COPYING that comes with GRASS
@@ -25,17 +24,99 @@
#include <grass/glocale.h>
#include <grass/raster.h>
+struct MATRIX
+{
+ int n; /* SIZE OF THIS MATRIX (N x N) */
+ double *v;
+};
+
+#define M(m,row,col) (m)->v[((row) * ((m)->n)) + (col)]
+
+static int solvemat(struct MATRIX *m, double a[], double B[])
+{
+ int i, j, i2, j2, imark;
+ double factor, temp;
+ double pivot; /* ACTUAL VALUE OF THE LARGEST PIVOT CANDIDATE */
+
+ for (i = 0; i < m->n; i++) {
+ j = i;
+
+ /* find row with largest magnitude value for pivot value */
+
+ pivot = M(m, i, j);
+ imark = i;
+ for (i2 = i + 1; i2 < m->n; i2++) {
+ temp = fabs(M(m, i2, j));
+ if (temp > fabs(pivot)) {
+ pivot = M(m, i2, j);
+ imark = i2;
+ }
+ }
+
+ /* if the pivot is very small then the points are nearly co-linear */
+ /* co-linear points result in an undefined matrix, and nearly */
+ /* co-linear points results in a solution with rounding error */
+
+ if (pivot == 0.0) {
+ G_fatal_error(_("Matrix is unsolvable"));
+ }
+
+ /* if row with highest pivot is not the current row, switch them */
+
+ if (imark != i) {
+ for (j2 = 0; j2 < m->n; j2++) {
+ temp = M(m, imark, j2);
+ M(m, imark, j2) = M(m, i, j2);
+ M(m, i, j2) = temp;
+ }
+
+ temp = a[imark];
+ a[imark] = a[i];
+ a[i] = temp;
+ }
+
+ /* compute zeros above and below the pivot, and compute
+ values for the rest of the row as well */
+
+ for (i2 = 0; i2 < m->n; i2++) {
+ if (i2 != i) {
+ factor = M(m, i2, j) / pivot;
+ for (j2 = j; j2 < m->n; j2++)
+ M(m, i2, j2) -= factor * M(m, i, j2);
+ a[i2] -= factor * a[i];
+ }
+ }
+ }
+
+ /* SINCE ALL OTHER VALUES IN THE MATRIX ARE ZERO NOW, CALCULATE THE
+ COEFFICIENTS BY DIVIDING THE COLUMN VECTORS BY THE DIAGONAL VALUES. */
+
+ for (i = 0; i < m->n; i++) {
+ B[i] = a[i] / M(m, i, i);
+ }
+
+ return 1;
+}
+
int main(int argc, char *argv[])
{
- unsigned int r, c, rows, cols; /* totals */
- int map1_fd, map2_fd;
- double sumX, sumY, sumsqX, sumsqY, sumXY;
- double meanX, meanY, varX, varY, sdX, sdY;
- double A, B, R, F;
- long count = 0;
- DCELL *map1_buf, *map2_buf, map1_val, map2_val;
+ unsigned int r, c, rows, cols, n_valid; /* totals */
+ int *mapx_fd, mapy_fd, mapres_fd;
+ int i, j, k, n_predictors;
+ double *sumX, sumY, *sumsqX, sumsqY, *sumXY;
+ double *meanX, meanY, *varX, varY, *sdX, sdY;
+ double yest, yres; /* estimated y, residual */
+ double sumYest, *SSerr_without;
+ double SE;
+ double meanYest, meanYres, varYest, varYres, sdYest, sdYres;
+ double SStot, SSerr, SSreg;
+ double **a;
+ struct MATRIX *m, *m_all;
+ double **B, Rsq, Rsqadj, F, t, AIC, AICc, BIC;
+ unsigned int count = 0;
+ DCELL **mapx_buf, *mapy_buf, *mapx_val, mapy_val, *mapres_buf;
char *name;
- struct Option *input_map1, *input_map2, *output_opt;
+ struct Option *input_mapx, *input_mapy, *output_res, *output_opt;
struct Flag *shell_style;
struct Cell_head region;
struct GModule *module;
@@ -46,17 +127,22 @@
G_add_keyword(_("raster"));
G_add_keyword(_("statistics"));
module->description =
- _("Calculates linear regression from two raster maps: y = a + b*x.");
+ _("Calculates multiple linear regression from raster maps.");
/* Define the different options */
- input_map1 = G_define_standard_option(G_OPT_R_MAP);
- input_map1->key = "map1";
- input_map1->description = (_("Map for x coefficient"));
+ input_mapx = G_define_standard_option(G_OPT_R_INPUTS);
+ input_mapx->key = "mapx";
+ input_mapx->description = (_("Map for x coefficient"));
- input_map2 = G_define_standard_option(G_OPT_R_MAP);
- input_map2->key = "map2";
- input_map2->description = (_("Map for y coefficient"));
+ input_mapy = G_define_standard_option(G_OPT_R_INPUT);
+ input_mapy->key = "mapy";
+ input_mapy->description = (_("Map for y coefficient"));
+ output_res = G_define_standard_option(G_OPT_R_OUTPUT);
+ output_res->key = "residuals";
+ output_res->required = NO;
+ output_res->description = (_("Map to store residuals"));
+
output_opt = G_define_standard_option(G_OPT_F_OUTPUT);
output_opt->key = "output";
output_opt->required = NO;
@@ -81,79 +167,366 @@
rows = region.rows;
cols = region.cols;
+ /* count x maps */
+ for (i = 0; input_mapx->answers[i]; i++);
+ n_predictors = i;
+
+ /* allocate memory for x maps */
+ mapx_fd = (int *)G_malloc(n_predictors * sizeof(int));
+ sumX = (double *)G_malloc(n_predictors * sizeof(double));
+ sumsqX = (double *)G_malloc(n_predictors * sizeof(double));
+ sumXY = (double *)G_malloc(n_predictors * sizeof(double));
+ SSerr_without = (double *)G_malloc(n_predictors * sizeof(double));
+ meanX = (double *)G_malloc(n_predictors * sizeof(double));
+ varX = (double *)G_malloc(n_predictors * sizeof(double));
+ sdX = (double *)G_malloc(n_predictors * sizeof(double));
+ mapx_buf = (DCELL **)G_malloc(n_predictors * sizeof(DCELL *));
+ mapx_val = (DCELL *)G_malloc((n_predictors + 1) * sizeof(DCELL));
+
+ /* ordinary least squares */
+ m = NULL;
+ m_all = (struct MATRIX *)G_malloc((n_predictors + 1) * sizeof(struct MATRIX));
+ a = (double **)G_malloc((n_predictors + 1) * sizeof(double *));
+ B = (double **)G_malloc((n_predictors + 1) * sizeof(double *));
+
+ m = &(m_all[0]);
+ m->n = n_predictors + 1;
+ m->v = (double *)G_malloc(m->n * m->n * sizeof(double));
+
+ a[0] = (double *)G_malloc(m->n * sizeof(double));
+ B[0] = (double *)G_malloc(m->n * sizeof(double));
+
+ for (i = 0; i < m->n; i++) {
+ for (j = i; j < m->n; j++)
+ M(m, i, j) = 0.0;
+ a[0][i] = 0.0;
+ B[0][i] = 0.0;
+ }
+
+ for (k = 1; k <= n_predictors; k++) {
+ m = &(m_all[k]);
+ m->n = n_predictors;
+ m->v = (double *)G_malloc(m->n * m->n * sizeof(double));
+ a[k] = (double *)G_malloc(m->n * sizeof(double));
+ B[k] = (double *)G_malloc(m->n * sizeof(double));
+
+ for (i = 0; i < m->n; i++) {
+ for (j = i; j < m->n; j++)
+ M(m, i, j) = 0.0;
+ a[k][i] = 0.0;
+ B[k][i] = 0.0;
+ }
+ }
+
/* open maps */
- map1_fd = Rast_open_old(input_map1->answer, "");
- map2_fd = Rast_open_old(input_map2->answer, "");
+ G_debug(1, "open maps");
+ for (i = 0; i < n_predictors; i++) {
+ mapx_fd[i] = Rast_open_old(input_mapx->answers[i], "");
+ }
+ mapy_fd = Rast_open_old(input_mapy->answer, "");
- map1_buf = Rast_allocate_d_buf();
- map2_buf = Rast_allocate_d_buf();
+ /* residuals output */
+ if (output_res->answer) {
+ mapres_fd = Rast_open_new(output_res->answer, DCELL_TYPE);
+ mapres_buf = Rast_allocate_d_buf();
+ }
+ else {
+ mapres_fd = -1;
+ mapres_buf = NULL;
+ }
- sumX = sumY = sumsqX = sumsqY = sumXY = 0.0;
- meanX = meanY = varX = varY = sdX = sdY = 0.0;
+ for (i = 0; i < n_predictors; i++)
+ mapx_buf[i] = Rast_allocate_d_buf();
+ mapy_buf = Rast_allocate_d_buf();
+
+ for (i = 0; i < n_predictors; i++) {
+ sumX[i] = sumsqX[i] = sumXY[i] = 0.0;
+ meanX[i] = varX[i] = sdX[i] = 0.0;
+ SSerr_without[i] = 0.0;
+ }
+ sumY = sumsqY = meanY = varY = sdY = 0.0;
+ sumYest = meanYest = varYest = sdYest = 0.0;
+ meanYres = varYres = sdYres = 0.0;
+
+ /* read input maps */
+ G_message(_("First pass..."));
+ n_valid = 0;
+ mapx_val[0] = 1.0;
for (r = 0; r < rows; r++) {
G_percent(r, rows, 2);
- Rast_get_d_row(map1_fd, map1_buf, r);
- Rast_get_d_row(map2_fd, map2_buf, r);
+
+ for (i = 0; i < n_predictors; i++)
+ Rast_get_d_row(mapx_fd[i], mapx_buf[i], r);
+
+ Rast_get_d_row(mapy_fd, mapy_buf, r);
+
for (c = 0; c < cols; c++) {
- map1_val = map1_buf[c];
- map2_val = map2_buf[c];
- if (Rast_is_d_null_value(&map1_val) ||
- Rast_is_d_null_value(&map2_val))
+ int isnull = 0;
+
+ for (i = 0; i < n_predictors; i++) {
+ mapx_val[i + 1] = mapx_buf[i][c];
+ if (Rast_is_d_null_value(&(mapx_val[i + 1]))) {
+ isnull = 1;
+ break;
+ }
+ }
+ if (isnull)
continue;
- sumX += map1_val;
- sumY += map2_val;
- sumsqX += map1_val * map1_val;
- sumsqY += map2_val * map2_val;
- sumXY += map1_val * map2_val;
+ mapy_val = mapy_buf[c];
+ if (Rast_is_d_null_value(&mapy_val))
+ continue;
+
+ for (i = 0; i <= n_predictors; i++) {
+ double val1 = mapx_val[i];
+
+ for (j = i; j <= n_predictors; j++) {
+ double val2 = mapx_val[j];
+
+ m = &(m_all[0]);
+ M(m, i, j) += val1 * val2;
+
+ /* linear model without predictor k */
+ for (k = 1; k <= n_predictors; k++) {
+ if (k != i && k != j) {
+ int i2 = k > i ? i : i - 1;
+ int j2 = k > j ? j : j - 1;
+
+ m = &(m_all[k]);
+ M(m, i2, j2) += val1 * val2;
+ }
+ }
+ }
+
+ a[0][i] += mapy_val * val1;
+ for (k = 1; k <= n_predictors; k++) {
+ if (k != i) {
+ int i2 = k > i ? i : i - 1;
+
+ a[k][i2] += mapy_val * val1;
+ }
+ }
+
+ if (i > 0) {
+ sumX[i - 1] += val1;
+ sumsqX[i - 1] += val1 * val1;
+ sumXY[i - 1] += val1 * mapy_val;
+ }
+ }
+
+ sumY += mapy_val;
+ sumsqY += mapy_val * mapy_val;
count++;
}
}
- Rast_close(map1_fd);
- Rast_close(map2_fd);
- G_free(map1_buf);
- G_free(map2_buf);
+ G_percent(rows, rows, 2);
+
+ if (count < n_predictors + 1)
+ G_fatal_error(_("Not enough valid cells available"));
- B = (sumXY - sumX * sumY / count) / (sumsqX - sumX * sumX / count);
- R = (sumXY - sumX * sumY / count) /
- sqrt((sumsqX - sumX * sumX / count) * (sumsqY - sumY * sumY / count));
+ /* TRANSPOSE VALUES IN UPPER HALF OF M TO OTHER HALF */
- meanX = sumX / count;
- sumsqX = sumsqX / count;
- varX = sumsqX - (meanX * meanX);
- sdX = sqrt(varX);
+ for (k = 0; k <= n_predictors; k++) {
+ m = &(m_all[k]);
+ for (i = 1; i < m->n; i++)
+ for (j = 0; j < i; j++)
+ M(m, i, j) = M(m, j, i);
+ solvemat(m, a[k], B[k]);
+ }
+
+ /* second pass */
+ G_message(_("Second pass..."));
+
+ for (i = 0; i < n_predictors; i++)
+ meanX[i] = sumX[i] / count;
+
meanY = sumY / count;
- sumsqY = sumsqY / count;
- varY = sumsqY - (meanY * meanY);
- sdY = sqrt(varY);
+ SStot = SSerr = SSreg = 0.0;
+ for (r = 0; r < rows; r++) {
+ G_percent(r, rows, 2);
- A = meanY - B * meanX;
- F = R * R / (1 - R * R / count - 2);
+ for (i = 0; i < n_predictors; i++)
+ Rast_get_d_row(mapx_fd[i], mapx_buf[i], r);
- if (shell_style->answer) {
- fprintf(stdout, "a=%f\n", A);
- fprintf(stdout, "b=%f\n", B);
- fprintf(stdout, "R=%f\n", R);
- fprintf(stdout, "N=%ld\n", count);
- fprintf(stdout, "F=%f\n", F);
- fprintf(stdout, "meanX=%f\n", meanX);
- fprintf(stdout, "sdX=%f\n", sdX);
- fprintf(stdout, "meanY=%f\n", meanY);
- fprintf(stdout, "sdY=%f\n", sdY);
+ Rast_get_d_row(mapy_fd, mapy_buf, r);
+
+ if (mapres_buf)
+ Rast_set_d_null_value(mapres_buf, cols);
+
+ for (c = 0; c < cols; c++) {
+ int isnull = 0;
+
+ for (i = 0; i < n_predictors; i++) {
+ mapx_val[i + 1] = mapx_buf[i][c];
+ if (Rast_is_d_null_value(&(mapx_val[i + 1]))) {
+ isnull = 1;
+ break;
+ }
+ }
+ if (isnull)
+ continue;
+
+ mapy_val = mapy_buf[c];
+ if (Rast_is_d_null_value(&mapy_val))
+ continue;
+
+ yest = 0.0;
+ for (i = 0; i <= n_predictors; i++) {
+ yest += B[0][i] * mapx_val[i];
+ }
+ yres = mapy_val - yest;
+ if (mapres_buf)
+ mapres_buf[c] = yres;
+
+ SStot += (mapy_val - meanY) * (mapy_val - meanY);
+ SSreg += (yest - meanY) * (yest - meanY);
+ SSerr += yres * yres;
+
+ for (k = 1; k <= n_predictors; k++) {
+ double yesti = 0.0;
+ double yresi;
+
+ /* linear model without predictor k */
+ for (i = 0; i <= n_predictors; i++) {
+ if (i != k) {
+ j = k > i ? i : i - 1;
+ yesti += B[k][j] * mapx_val[i];
+ }
+ }
+ yresi = mapy_val - yesti;
+
+ /* linear model without predictor k */
+ SSerr_without[k - 1] += yresi * yresi;
+
+ varX[k - 1] = (mapx_val[k] - meanX[k - 1]) * (mapx_val[k] - meanX[k - 1]);
+ }
+ }
+
+ if (mapres_buf)
+ Rast_put_d_row(mapres_fd, mapres_buf);
}
- else {
- fprintf(stdout, "y = a + b*x\n");
- fprintf(stdout, " a (Offset): %f\n", A);
- fprintf(stdout, " b (Gain): %f\n", B);
- fprintf(stdout, " R (sumXY - sumX*sumY/N): %f\n", R);
- fprintf(stdout, " N (Number of elements): %ld\n", count);
- fprintf(stdout, " F (F-test significance): %f\n", F);
- fprintf(stdout, " meanX (Mean of map1): %f\n", meanX);
- fprintf(stdout, " sdX (Standard deviation of map1): %f\n", sdX);
- fprintf(stdout, " meanY (Mean of map2): %f\n", meanY);
- fprintf(stdout, " sdY (Standard deviation of map2): %f\n", sdY);
+ G_percent(rows, rows, 2);
+
+ fprintf(stdout, "n=%d\n", count);
+ /* coefficient of determination aka R squared */
+ Rsq = 1 - (SSerr / SStot);
+ fprintf(stdout, "Rsq=%f\n", Rsq);
+ /* adjusted coefficient of determination */
+ Rsqadj = 1 - ((SSerr * (count - 1)) / (SStot * (count - n_predictors - 1)));
+ fprintf(stdout, "Rsqadj=%f\n", Rsqadj);
+ /* F statistic */
+ /* F = ((SStot - SSerr) / (n_predictors)) / (SSerr / (count - n_predictors));
+ * , or: */
+ F = ((SStot - SSerr) * (count - n_predictors)) / (SSerr * n_predictors);
+ fprintf(stdout, "F=%f\n", F);
+
+ i = 0;
+ /* constant aka estimate for intercept in R */
+ fprintf(stdout, "b%d=%f\n", i, B[0][i]);
+ /* t score for R squared of the full model, unused */
+ t = sqrt(Rsq) * sqrt((count - 2) / (1 - Rsq));
+ /*
+ fprintf(stdout, "t%d=%f\n", i, t);
+ */
+
+ /* AIC, corrected AIC, and BIC information criteria for the full model */
+ AIC = count * log(SSerr / count) + 2 * (n_predictors + 1);
+ fprintf(stdout, "AIC=%f\n", AIC);
+ AICc = AIC + (2 * n_predictors * (n_predictors + 1)) / (count - n_predictors - 1);
+ fprintf(stdout, "AICc=%f\n", AICc);
+ BIC = count * log(SSerr / count) + log(count) * (n_predictors + 1);
+ fprintf(stdout, "BIC=%f\n", BIC);
+
+ /* error variance of the model, identical to R */
+ SE = SSerr / (count - n_predictors - 1);
+ /*
+ fprintf(stdout, "SE=%f\n", SE);
+ fprintf(stdout, "SSerr=%f\n", SSerr);
+ */
+
+ for (i = 0; i < n_predictors; i++) {
+
+ fprintf(stdout, "\nb%d=%f\n", i + 1, B[0][i + 1]);
+ if (n_predictors > 1) {
+ double Rsqi, SEi, sumsqX_corr;
+
+ /* corrected sum of squares for predictor [i] */
+ sumsqX_corr = sumsqX[i] - sumX[i] * sumX[i] / (count - n_predictors - 1);
+
+ /* standard error SE for predictor [i] */
+
+ /* SE[i] with only one predictor: sqrt(SE / sumsqX_corr)
+ * this does not work with more than one predictor */
+ /* in R, SEi is sqrt(diag(R) * resvar) with
+ * R = ???
+ * resvar = rss / rdf = SE global
+ * rss = sum of squares of the residuals
+ * rdf = residual degrees of freedom = count - n_predictors - 1 */
+ SEi = sqrt(SE / (Rsq * sumsqX_corr));
+ /*
+ fprintf(stdout, "SE%d=%f\n", i + 1, SEi);
+ */
+
+ /* Sum of squares for predictor [i] */
+ /*
+ fprintf(stdout, "SSerr%d=%f\n", i + 1, SSerr_without[i] - SSerr);
+ */
+
+ /* R squared of the model without predictor [i] */
+ /* Rsqi = 1 - SSerr_without[i] / SStot; */
+ /* the additional amount of variance explained
+ * when including predictor [i] :
+ * Rsq - Rsqi */
+ Rsqi = (SSerr_without[i] - SSerr) / SStot;
+ fprintf(stdout, "Rsq%d=%f\n", i + 1, Rsqi);
+
+ /* t score for Student's t distribution, unused */
+ t = (B[0][i + 1]) / SEi;
+ /*
+ fprintf(stdout, "t%d=%f\n", i + 1, t);
+ */
+
+ /* F score for Fisher's F distribution
+ * here: F score to test if including predictor [i]
+ * yields a significant improvement
+ * after Lothar Sachs, Angewandte Statistik:
+ * F = (Rsq - Rsqi) * (count - n_predictors - 1) / (1 - Rsq) */
+ /* same like Sumsq / SE */
+ /* same like (SSerr_without[i] / SSerr - 1) * (count - n_predictors - 1) */
+ /* same like R-stats when entered in R-stats as last predictor */
+ F = (SSerr_without[i] / SSerr - 1) * (count - n_predictors - 1);
+ fprintf(stdout, "F%d=%f\n", i + 1, F);
+
+ /* AIC, corrected AIC, and BIC information criteria for
+ * the model without predictor [i] */
+ AIC = count * log(SSerr_without[i] / count) + 2 * (n_predictors);
+ fprintf(stdout, "AIC%d=%f\n", i + 1, AIC);
+ AICc = AIC + (2 * (n_predictors - 1) * n_predictors) / (count - n_predictors - 2);
+ fprintf(stdout, "AICc%d=%f\n", i + 1, AICc);
+ BIC = count * log(SSerr_without[i] / count) + (n_predictors - 1) * log(count);
+ fprintf(stdout, "BIC%d=%f\n", i + 1, BIC);
+ }
}
+
+ for (i = 0; i < n_predictors; i++) {
+ Rast_close(mapx_fd[i]);
+ G_free(mapx_buf[i]);
+ }
+ Rast_close(mapy_fd);
+ G_free(mapy_buf);
+
+ if (mapres_fd > -1) {
+ struct History history;
+
+ Rast_close(mapres_fd);
+ G_free(mapres_buf);
+
+ Rast_short_history(output_res->answer, "raster", &history);
+ Rast_command_history(&history);
+ Rast_write_history(output_res->answer, &history);
+ }
+
exit(EXIT_SUCCESS);
}
Deleted: grass-addons/grass7/raster/r.regression.multi/r.regression.line.html
===================================================================
--- grass-addons/grass7/raster/r.regression.multi/r.regression.line.html 2011-10-07 09:22:02 UTC (rev 48668)
+++ grass-addons/grass7/raster/r.regression.multi/r.regression.line.html 2011-10-07 09:24:50 UTC (rev 48669)
@@ -1,48 +0,0 @@
-<h2>DESCRIPTION</h2>
-
-<em>r.regression.line</em> Calculates linear regression from two raster maps,
-according to the formula y = a + b*x, where x and y represent raster maps.
-Optionally saves regression coefficients to an ASCII file.
-The result includes the following coefficients:
-offset/intercept (a) and gain/slope (b), correlation coefficient (R),
-number of elements (N), means (medX, medY), standard deviations
-(sdX, sdY), and the F test for testing the significance of the
-regression model as a whole (F).
-
-<h2>NOTES</h2>
-The results for offset/intercept (a) and gain/slope (b) are
-identical to that obtained from R-stats's lm() function.
-
-<h2>EXAMPLE</h2>
-
-Comparison of the old and the new DEM in Spearfish:
-<div class="code"><pre>
-g.region rast=elevation.10m -p
-r.regression.line map1=elevation.dem map2=elevation.10m
-</pre></div>
-<p>
-
-Using the script style flag AND <em>eval</em> to make results
-available in the shell:
-<div class="code"><pre>
-g.region rast=elevation.10m -p
-eval `r.regression.line -g map1=elevation.dem map2=elevation.10m`
-echo $a
-479.615
-
-echo $b
-0.645631
-
-echo $R
-0.804441
-</pre></div>
-
-
-<h2>AUTHOR</h2>
-
-Dr. Agustin Lobo - alobo at ija.csic.es<br>
-Updated to GRASS 5.7 Michael Barton, Arizona State University<br>
-Script style output Markus Neteler<br>
-Conversion to C module Markus Metz
-
-<p><i>Last changed: $Date$</i>
Copied: grass-addons/grass7/raster/r.regression.multi/r.regression.multi.html (from rev 48668, grass-addons/grass7/raster/r.regression.multi/r.regression.line.html)
===================================================================
--- grass-addons/grass7/raster/r.regression.multi/r.regression.multi.html (rev 0)
+++ grass-addons/grass7/raster/r.regression.multi/r.regression.multi.html 2011-10-07 09:24:50 UTC (rev 48669)
@@ -0,0 +1,74 @@
+<h2>DESCRIPTION</h2>
+
+<em>r.regression.multi</em> calculates a multiple linear regression from
+raster maps, according to the formula
+<div class="code"><pre>
+Y = b0 + sum(bi*Xi) + E
+</pre></div>
+where
+<div class="code"><pre>
+X = {X1, X2, ..., Xm}
+m = number of explaining variables
+Y = {y1, y2, ..., yn}
+Xi = {xi1, xi2, ..., xin}
+E = {e1, e2, ..., en}
+n = number of observations (cases)
+</pre></div>
+
+In R notation:
+<div class="code"><pre>
+Y ~ sum(bi*Xi)
+b0 is the intercept, X0 is set to 1
+</pre></div>
+
+<p>
+<em>r.regression.multi</em> is designed for large datasets that can not
+be processed in R. A p value is therefore not provided, because even
+very small, meaningless effects will become significant with a large
+number of cells. Instead it is recommended to judge by the estimator b,
+the amount of variance explained (R squared for a given variable) and
+the gain in AIC (AIC without a given variable minus AIC global must be
+positive) whether the inclusion of a given explaining variable in the
+model is justified.
+
+<h4>The global model</h4>
+The <em>b</em> coefficients (b0 is offset), R squared or coefficient of
+determination (Rsq) and F are identical to the ones obtained from
+R-stats's lm() function and R-stats's anova() function. The AIC value
+is identical to the one obtained from R-stats's stepAIC() function
+(in case of backwards stepping, identical to the Start value). The
+AIC value corrected for the number of explaining variables and the BIC
+(Bayesian Information Criterion) value follow the logic of AIC.
+
+<h4>The explaining variables</h4>
+R squared for each explaining variable represents the additional amount
+of explained variance when including this variable compared to when
+excluding this variable, that is, this amount of variance is explained
+by the current explaining variable after taking into consideration all
+the other explaining variables.
+<p>
+The F score for each explaining variable allows to test if the inclusion
+of this variable significantly increases the explaining power of the
+model, relative to the global model excluding this explaining variable.
+That means that the F value for a given explaining variable is only
+identical to the F value of the R-function <em>summary.aov</em> if the
+given explaining variable is the last variable in the R-formula. While
+R successively includes one variable after another in the order
+specified by the formula and at each step calculates the F value
+expressing the gain by including the current variable in addition to the
+previous variables, <em>r.regression.multi</em> calculates the F-value
+expressing the gain by including the current variable in addition to all
+other variables, not only the previous variables.
+<p>
+The AIC value is identical to the one obtained from the R-function
+stepAIC() when excluding this variable from the full model. The AIC
+value corrected for the number of explaining variables and the BIC value
+(Bayesian Information Criterion) value follow the logic of AIC. BIC is
+identical to the R-function stepAIC with k = log(n). AICc is not
+available through the R-function stepAIC.
+
+<h2>AUTHOR</h2>
+
+Markus Metz
+
+<p><i>Last changed: $Date$</i>
More information about the grass-commit
mailing list