[GRASS-SVN] r49129 - grass/branches/develbranch_6/imagery/i.pca

svn_grass at osgeo.org svn_grass at osgeo.org
Tue Nov 8 04:22:01 EST 2011


Author: mmetz
Date: 2011-11-08 01:22:01 -0800 (Tue, 08 Nov 2011)
New Revision: 49129

Modified:
   grass/branches/develbranch_6/imagery/i.pca/description.html
   grass/branches/develbranch_6/imagery/i.pca/main.c
Log:
i.pca: backport NULL/MASK fixes and normalizing option

Modified: grass/branches/develbranch_6/imagery/i.pca/description.html
===================================================================
--- grass/branches/develbranch_6/imagery/i.pca/description.html	2011-11-08 09:01:29 UTC (rev 49128)
+++ grass/branches/develbranch_6/imagery/i.pca/description.html	2011-11-08 09:22:01 UTC (rev 49129)
@@ -32,6 +32,13 @@
 
 <DD>If output is rescaled, the output raster will be of type CELL. If 
 the output is not rescaled, the output raster will be of type DCELL.
+
+<DT><B>-n</B> <EM>normalize input raster maps</EM>
+<DD>By default, the values of the input raster maps are centered for each
+map separately with <EM>x - mean</EM>. With <EM>-n</EM>, the input raster
+maps are normalized for each map separately with <EM>(x - mean) / stddev</EM>.
+Normalizing is highly recommended when the input raster maps have different
+units, e.g. represent different environmental parameters.
 </DL>
 
 
@@ -42,7 +49,7 @@
 region in Australia.
 <P>
 Eigenvalue and eigenvector information is stored in the output maps'
-history files. View with <em>r.info</em>.
+history files. View with <em>r.info -h</em>.
 
 
 <H2>EXAMPLE</H2>
@@ -54,9 +61,9 @@
 
 r.info -h spot_pca.1
    Eigen values, (vectors), and [percent importance]:
-   PC1   1170.12 ( -0.63 -0.65 -0.43 ) [ 88.07% ]
-   PC2    152.49 (  0.23  0.37 -0.90 ) [ 11.48% ]
-   PC3      6.01 (  0.75 -0.66 -0.08 ) [  0.45% ]
+   PC1   1159.75 ( 0.6237, 0.6503, 0.4337) [88.38%]
+   PC2    146.50 ( 0.2403, 0.3685,-0.8981) [11.16%]
+   PC3      5.97 (-0.7438, 0.6643, 0.0735) [ 0.45%]
 </pre></div>
 
 

Modified: grass/branches/develbranch_6/imagery/i.pca/main.c
===================================================================
--- grass/branches/develbranch_6/imagery/i.pca/main.c	2011-11-08 09:01:29 UTC (rev 49128)
+++ grass/branches/develbranch_6/imagery/i.pca/main.c	2011-11-08 09:22:01 UTC (rev 49129)
@@ -33,9 +33,10 @@
 /* function prototypes */
 static CELL round_c(double);
 static int set_output_scale(struct Option *, int *, int *, int *);
-static int calc_mu(int *, double *, int);
-static int calc_covariance(int *, double **, double *, int);
-static int write_pca(double **, int *, char *, int, int, int, int);
+static int calc_mu(int *, double *, double *, int);
+static int calc_covariance(int *, double **, double *, double *, int);
+static int write_pca(double **, double *, double *, int *, char *, int,
+                     int, int, int);
 
 #ifdef PCA_DEBUG
 static int dump_eigen(int, double **, double *);
@@ -44,9 +45,10 @@
 
 int main(int argc, char *argv[])
 {
-    int i, j;			/* Loop control variables */
+    int i;			/* Loop control variables */
     int bands;			/* Number of image bands */
     double *mu;			/* Mean vector for image bands */
+    double *stddev;		/* Stddev vector for image bands */
     double **covar;		/* Covariance Matrix */
     double *eigval;
     double **eigmat;
@@ -55,6 +57,7 @@
 
     struct GModule *module;
     struct Option *opt_in, *opt_out, *opt_scale;
+    struct Flag *flag_norm;
 
     /* initialize GIS engine */
     G_gisinit(argv[0]);
@@ -89,6 +92,10 @@
 	_("For no rescaling use 0,0");
     opt_scale->guisection = _("Rescale");
     
+    flag_norm = G_define_flag();
+    flag_norm->key = 'n';
+    flag_norm->description = (_("Normalize (center and scale) input maps"));
+    
     if (G_parser(argc, argv))
 	exit(EXIT_FAILURE);
 
@@ -113,7 +120,11 @@
     inp_fd = G_alloc_ivector(bands);
     eigmat = G_alloc_matrix(bands, bands);
     eigval = G_alloc_vector(bands);
- 
+    
+    if (flag_norm->answer)
+	stddev = G_alloc_vector(bands);
+    else
+	stddev = NULL;
 
     /* open and check input/output files */
     for (i = 0; i < bands; i++) {
@@ -132,19 +143,11 @@
     }
 
     G_verbose_message(_("Calculating covariance matrix..."));
-    calc_mu(inp_fd, mu, bands);
+    if (!calc_mu(inp_fd, mu, stddev, bands))
+	G_fatal_error(_("No non-null values"));
 
-    calc_covariance(inp_fd, covar, mu, bands);
+    calc_covariance(inp_fd, covar, mu, stddev, bands);
 
-    for (i = 0; i < bands; i++) {
-	for (j = 0; j < bands; j++) {
-	    covar[i][j] =
-		covar[i][j] /
-		((double)((G_window_rows() * G_window_cols()) - 1));
-	    G_debug(3, "covar[%d][%d] = %f", i, j, covar[i][j]);
-	}
-    }
-
     G_math_d_copy(covar[0], eigmat[0], bands*bands);
     G_debug(1, "Calculating eigenvalues and eigenvectors...");
     G_math_eigen(eigmat, eigval, bands);
@@ -161,8 +164,8 @@
     G_math_d_A_T(eigmat, bands);
 
     /* write output images */
-    write_pca(eigmat, inp_fd, opt_out->answer, bands, scale, scale_min,
-	      scale_max);
+    write_pca(eigmat, mu, stddev, inp_fd, opt_out->answer, bands,
+              scale, scale_min, scale_max);
 
     /* write colors and history to output */
     for (i = 0; i < bands; i++) {
@@ -176,12 +179,14 @@
 	/* close output file */
 	G_unopen_cell(inp_fd[i]);
     }
-    /* allocate memory */
+    
+    /* free memory */
     G_free_matrix(covar);
     G_free_vector(mu);
     G_free_ivector(inp_fd);
     G_free_matrix(eigmat);
     G_free_vector(eigval);
+
     exit(EXIT_SUCCESS);
 }
 
@@ -232,130 +237,142 @@
 }
 
 
-static int calc_mu(int *fds, double *mu, int bands)
+static int calc_mu(int *fds, double *mu, double *stddev, int bands)
 {
     int i;
+    int row, col;
     int rows = G_window_rows();
     int cols = G_window_cols();
-    void *rowbuf = NULL;
+    off_t count = 0;
+    double *sumsq;
+    DCELL **rowbuf = (DCELL **) G_malloc(bands * sizeof(DCELL *));
 
     for (i = 0; i < bands; i++) {
-	RASTER_MAP_TYPE maptype;
-	int row, col;
-	double sum = 0.;
+	rowbuf[i] = G_allocate_d_raster_buf();
+    }
 
-	maptype = G_get_raster_map_type(fds[i]);
+    if (stddev) {
+	G_message(_("Computing means and standard deviations..."));
+	sumsq = (double *)G_calloc(bands, sizeof(double));
+    }
+    else {
+	G_message(_("Computing means..."));
+	sumsq = NULL;
+    }
 
-	/* don't assume each image is of the same type */
-	if (rowbuf)
-	    G_free(rowbuf);
-	if ((rowbuf = G_allocate_raster_buf(maptype)) == NULL)
-	    G_fatal_error(_("Unable allocate memory for row buffer"));
+    for (row = 0; row < rows; row++) {
+	G_percent(row, rows, 2);
+	for (i = 0; i < bands; i++)
+	    G_get_d_raster_row(fds[i], rowbuf[i], row);
 
-	G_message(_("Computing means for band %d..."), i + 1);
-	for (row = 0; row < rows; row++) {
-	    void *ptr = rowbuf;
-
-	    G_percent(row, rows - 1, 2);
-
-	    if (G_get_raster_row(fds[i], rowbuf, row, maptype) < 0)
-		G_fatal_error(_("Unable to read raster map row %d"), row);
-
-	    for (col = 0; col < cols; col++) {
-		/* skip null cells */
-		if (G_is_null_value(ptr, maptype)) {
-		    ptr = G_incr_void_ptr(ptr, G_raster_size(maptype));
-		    continue;
-		}
-
-		sum += G_get_raster_value_d(ptr, maptype);
-		ptr = G_incr_void_ptr(ptr, G_raster_size(maptype));
+	for (col = 0; col < cols; col++) {
+	    /* ignore cells where any of the maps has null value */
+	    for (i = 0; i < bands; i++)
+		if (G_is_d_null_value(&rowbuf[i][col]))
+		    break;
+	    if (i != bands)
+		continue;
+	    count++;
+	    for (i = 0; i < bands; i++) {
+		mu[i] += rowbuf[i][col];
+		if (stddev)
+		    sumsq[i] += rowbuf[i][col] * rowbuf[i][col];
 	    }
 	}
+    }
+    G_percent(1, 1, 1);
+    
+    if (count < 2)
+	return 0;
 
-	mu[i] = sum / (double)(rows * cols);
+    for (i = 0; i < bands; i++) {
+	if (stddev)
+	    /* mu[i] is here still the sum */
+	    stddev[i] = sqrt((sumsq[i] - mu[i] * mu[i] / count) /
+	                     (count - 1));
+
+	mu[i] = mu[i] / count;
+	G_free(rowbuf[i]);
     }
 
     if (rowbuf)
 	G_free(rowbuf);
+    if (sumsq)
+	G_free(sumsq);
 
-    return 0;
+    return 1;
 }
 
 
-static int calc_covariance(int *fds, double **covar, double *mu, int bands)
+static int calc_covariance(int *fds, double **covar, double *mu, 
+                           double *stddev, int bands)
 {
-    int j, k;
+    int i, j;
+    int row, col;
     int rows = G_window_rows();
     int cols = G_window_cols();
-    int row, col;
+    off_t count = 0;
+    DCELL **rowbuf = (DCELL **) G_malloc(bands * sizeof(DCELL *));
 
-    for (j = 0; j < bands; j++) {
-	RASTER_MAP_TYPE maptype = G_get_raster_map_type(fds[j]);
-	void *rowbuf1 = NULL;
-	void *rowbuf2 = NULL;
+    for (i = 0; i < bands; i++) {
+	rowbuf[i] = G_allocate_d_raster_buf();
+    }
 
-	/* don't assume each image is of the same type */
-	if (rowbuf1)
-	    G_free(rowbuf1);
-	if ((rowbuf1 = G_allocate_raster_buf(maptype)) == NULL)
-	    G_fatal_error(_("Unable allocate memory for row buffer"));
+    G_message(_("Computing covariance matrix..."));
 
-	G_message(_("Computing row %d (of %d) of covariance matrix..."),
-		  j + 1, bands);
-	for (row = 0; row < rows; row++) {
-	    void *ptr1, *ptr2;
+    for (row = 0; row < rows; row++) {
+	G_percent(row, rows, 2);
+	for (i = 0; i < bands; i++)
+	    G_get_d_raster_row(fds[i], rowbuf[i], row);
 
-	    G_percent(row, rows - 1, 2);
+	for (col = 0; col < cols; col++) {
+	    /* ignore cells where any of the maps has null value */
+	    for (i = 0; i < bands; i++)
+		if (G_is_d_null_value(&rowbuf[i][col]))
+		    break;
+	    if (i != bands)
+		continue;
+	    count++;
+	    for (i = 0; i < bands; i++) {
+		DCELL dval1 = rowbuf[i][col];
 
-	    if (G_get_raster_row(fds[j], rowbuf1, row, maptype) < 0)
-		G_fatal_error(_("Unable to read raster map row %d"), row);
+		for (j = i; j < bands; j++) {
+		    DCELL dval2 = rowbuf[j][col];
 
-	    for (k = j; k < bands; k++) {
-		RASTER_MAP_TYPE maptype2 = G_get_raster_map_type(fds[k]);
-
-		/* don't assume each image is of the same type */
-		if (rowbuf2)
-		    G_free(rowbuf2);
-		if ((rowbuf2 = G_allocate_raster_buf(maptype2)) == NULL)
-		    G_fatal_error(_("Unable to allocate memory for row buffer"));
-
-		if (G_get_raster_row(fds[k], rowbuf2, row, maptype2) < 0)
-		    G_fatal_error(_("Unable to read raster map row %d"), row);
-
-		ptr1 = rowbuf1;
-		ptr2 = rowbuf2;
-
-		for (col = 0; col < cols; col++) {
-		    /* skip null cells */
-		    if (G_is_null_value(ptr1, maptype) ||
-			G_is_null_value(ptr2, maptype2)) {
-			ptr1 = G_incr_void_ptr(ptr1, G_raster_size(maptype));
-			ptr2 = G_incr_void_ptr(ptr2, G_raster_size(maptype2));
-			continue;
+		    if (stddev) {
+			covar[i][j] += (dval1 - mu[i]) * (dval2 - mu[j]) /
+			               (stddev[i] * stddev[j]);
 		    }
+		    else {
+			covar[i][j] += (dval1 - mu[i]) * (dval2 - mu[j]);
+		    }
 
-		    covar[j][k] +=
-			((double)G_get_raster_value_d(ptr1, maptype) -
-			 mu[j]) * ((double)G_get_raster_value_d(ptr2,
-						   maptype2) - mu[k]);
-
-		    ptr1 = G_incr_void_ptr(ptr1, G_raster_size(maptype));
-		    ptr2 = G_incr_void_ptr(ptr2, G_raster_size(maptype2));
 		}
-
-		covar[k][j] = covar[j][k];
 	    }
 	}
     }
+    G_percent(1, 1, 1);
+    
+    for (i = 0; i < bands; i++) {
+	for (j = i; j < bands; j++) {
+	    covar[i][j] = covar[i][j] / (count - 1);
+	    G_debug(3, "covar[%d][%d] = %f", i, j, covar[i][j]);
+	    if (j != i)
+		covar[j][i] = covar[i][j];
+	}
 
+	G_free(rowbuf[i]);
+    }
+    G_free(rowbuf);
+
     return 0;
 }
 
 
 static int
-write_pca(double **eigmat, int *inp_fd, char *out_basename,
-	  int bands, int scale, int scale_min, int scale_max)
+write_pca(double **eigmat, double *mu, double *stddev,
+          int *inp_fd, char *out_basename, int bands, 
+	  int scale, int scale_min, int scale_max)
 {
     int i, j;
     void *outbuf, *outptr;
@@ -365,19 +382,19 @@
     double new_range = 0.;
     int rows = G_window_rows();
     int cols = G_window_cols();
-    int cell_mapsiz = G_raster_size(CELL_TYPE);
-    int dcell_mapsiz = G_raster_size(DCELL_TYPE);
+    /* why CELL_TYPE when scaling output ? */
+    int outmap_type = (scale) ? CELL_TYPE : DCELL_TYPE;
+    int outcell_mapsiz = G_raster_size(outmap_type);
     DCELL *d_buf;
 
     /* 2 passes for rescale.  1 pass for no rescale */
     int PASSES = (scale) ? 2 : 1;
 
     /* temporary row storage */
-    d_buf = (DCELL *) G_malloc(cols * sizeof(double));
+    d_buf = G_allocate_d_raster_buf();
 
     /* allocate memory for output row buffer */
-    outbuf = (scale) ? G_allocate_raster_buf(CELL_TYPE) :
-	G_allocate_raster_buf(DCELL_TYPE);
+    outbuf = G_allocate_raster_buf(outmap_type);
 
     if (!outbuf)
 	G_fatal_error(_("Unable to allocate memory for raster row"));
@@ -392,17 +409,8 @@
 	G_message(_("Transforming <%s>..."), name);
 
 	/* open a new file for output */
-	if (scale)
-	    out_fd = G_open_cell_new(name);
-	else {
-	    out_fd = G_open_fp_cell_new(name);
-	    G_set_fp_type(DCELL_TYPE);
-	}
+	out_fd = G_open_raster_new(name, outmap_type);
 
-	if (out_fd < 0)
-	    G_fatal_error(_("Unable to create raster map <%s>"),
-			  G_fully_qualified_name(name, G_mapset()));
-
 	for (pass = 1; pass <= PASSES; pass++) {
 	    void *rowbuf = NULL;
 	    int row, col;
@@ -443,17 +451,13 @@
 		    /* add into the output cell eigmat[i][j] * corresp cell 
 		     * of j-th band for current j */
 		    for (col = 0; col < cols; col++) {
+			DCELL dval;
+
 			/* handle null cells */
 			if (G_is_null_value(rowptr, maptype)) {
-			    if (scale) {
-				G_set_null_value(outptr, 1, CELL_TYPE);
-				outptr = G_incr_void_ptr(outptr, cell_mapsiz);
-			    }
-			    else {
-				G_set_null_value(outptr, 1, DCELL_TYPE);
-				outptr =
-				    G_incr_void_ptr(outptr, dcell_mapsiz);
-			    }
+			    G_set_null_value(outptr, 1, outmap_type);
+			    outptr =
+				G_incr_void_ptr(outptr, outcell_mapsiz);
 
 			    rowptr =
 				G_incr_void_ptr(rowptr,
@@ -462,9 +466,11 @@
 			}
 
 			/* corresp. cell of j-th band */
-			d_buf[col] +=
-			    eigmat[i][j] * G_get_raster_value_d(rowptr,
-								maptype);
+			dval = G_get_raster_value_d(rowptr, maptype);
+			if (stddev)
+			    d_buf[col] += eigmat[i][j] * ((dval - mu[j]) / stddev[j]);
+			else
+			    d_buf[col] += eigmat[i][j] * (dval - mu[j]);
 
 			/* the cell entry is complete */
 			if (j == (bands - 1)) {
@@ -493,19 +499,18 @@
 						scale_min);
 
 				    G_set_raster_value_c(outptr, tmpcell,
-							 CELL_TYPE);
+							 outmap_type);
 				}
 			    }
 			    else {	/* (!scale) */
 
 				G_set_raster_value_d(outptr, d_buf[col],
-						     DCELL_TYPE);
+						     outmap_type);
 			    }
 			}
 
-			outptr = (scale) ?
-			    G_incr_void_ptr(outptr, cell_mapsiz) :
-			    G_incr_void_ptr(outptr, dcell_mapsiz);
+			outptr = 
+			    G_incr_void_ptr(outptr, outcell_mapsiz);
 
 			rowptr =
 			    G_incr_void_ptr(rowptr, G_raster_size(maptype));
@@ -513,14 +518,11 @@
 		}		/* for j = 0 to bands */
 
 		if (pass == PASSES) {
-		    if (scale)
-			G_put_raster_row(out_fd, outbuf, CELL_TYPE);
-		    else
-			G_put_raster_row(out_fd, outbuf, DCELL_TYPE);
+		    G_put_raster_row(out_fd, outbuf, outmap_type);
 		}
 	    }
 
-	    G_percent(row, rows, 2);
+	    G_percent(1, 1, 1);
 
 	    /* close output file */
 	    if (pass == PASSES)



More information about the grass-commit mailing list