[GRASS-SVN] r43812 - grass/trunk/raster/r.univar

svn_grass at osgeo.org svn_grass at osgeo.org
Thu Oct 7 06:28:04 EDT 2010


Author: mmetz
Date: 2010-10-07 10:28:04 +0000 (Thu, 07 Oct 2010)
New Revision: 43812

Modified:
   grass/trunk/raster/r.univar/globals.h
   grass/trunk/raster/r.univar/r.univar.html
   grass/trunk/raster/r.univar/r.univar_main.c
   grass/trunk/raster/r.univar/r3.univar.html
   grass/trunk/raster/r.univar/r3.univar_main.c
   grass/trunk/raster/r.univar/stats.c
Log:
add option for zonal statistics

Modified: grass/trunk/raster/r.univar/globals.h
===================================================================
--- grass/trunk/raster/r.univar/globals.h	2010-10-06 23:14:36 UTC (rev 43811)
+++ grass/trunk/raster/r.univar/globals.h	2010-10-07 10:28:04 UTC (rev 43812)
@@ -1,10 +1,11 @@
 /*
  *  Calculates univariate statistics from the non-null cells
  *
- *   Copyright (C) 2004-2007 by the GRASS Development Team
+ *   Copyright (C) 2004-2010 by the GRASS Development Team
  *   Author(s): Soeren Gebbert
  *              Based on r.univar from Hamish Bowman, University of Otago, New Zealand
  *              and Martin Landa
+ *              zonal loop by Markus Metz
  *
  *      This program is free software under the GNU General Public
  *      License (>=v2). Read the file COPYING that comes with GRASS
@@ -39,23 +40,35 @@
     FCELL *fcell_array;
     CELL *cell_array;
     int map_type;
+    void *nextp;
+    int n_alloc;
+    int first;
 } univar_stat;
 
+typedef struct
+{
+    CELL min, max, n_zones;
+    struct Categories cats;
+    char *sep;
+} zone_type;
+
 /* command line options are the same for raster and raster3d maps */
 typedef struct
 {
-    struct Option *inputfile, *percentile;
-    struct Flag *shell_style, *extended;
+    struct Option *inputfile, *zonefile, *percentile, *output_file, *separator;
+    struct Flag *shell_style, *extended, *table;
 } param_type;
 
 extern param_type param;
+extern zone_type zone_info;
 
 /* fn prototypes */
 void heapsort_double(double *data, int n);
 void heapsort_float(float *data, int n);
 void heapsort_int(int *data, int n);
 int print_stats(univar_stat * stats);
-univar_stat *create_univar_stat_struct(int map_type, int size, int n_perc);
+int print_stats_table(univar_stat * stats);
+univar_stat *create_univar_stat_struct(int map_type, int n_perc);
 void free_univar_stat_struct(univar_stat * stats);
 
 #endif

Modified: grass/trunk/raster/r.univar/r.univar.html
===================================================================
--- grass/trunk/raster/r.univar/r.univar.html	2010-10-06 23:14:36 UTC (rev 43811)
+++ grass/trunk/raster/r.univar/r.univar.html	2010-10-07 10:28:04 UTC (rev 43812)
@@ -3,11 +3,16 @@
 <em>r.univar</em> calculates the univariate statistics of a raster map.
 This includes the number of cells counted, minimum and maximum cell values,
 range, arithmetic mean, population variance, standard deviation, and 
-coefficient of variation.
+coefficient of variation. Statistics are calculated separately for every
+category/zone found in the <b>zones</b> input map if given.
 If the <b>-e</b> extended statistics flag is given the 1st quartile, median,
 3rd quartile, and given <b>percentile</b> are calculated.
 If the <b>-g</b> flag is given the results are presented in a format suitable
 for use in a shell script.
+If the <b>-t</b> flag is given the results are presented in tabular format
+with "|" as field separator. The table can immediately be converted to a
+vector attribute table which can then be linked to a vector, e.g. the vector
+that was rasterized to create the <b>zones</b> input raster.
 
 <h2>NOTES</h2>
 
@@ -20,22 +25,91 @@
 region is too large the module should exit gracefully with a memory allocation
 error. Basic statistics can be calculated using any size input region.
 <p>
-The <em>r.quantile</em> module will be significantly more efficient for
-calculating percentiles with large maps.
+Without a <b>zones</b> input raster, the <em>r.quantile</em> module will
+be significantly more efficient for calculating percentiles with large maps.
 
+<h2>EXAMPLE</h2>
 
+Calculate the raster statistics for zones within a vector map coverage
+and upload the results for mean, min and max back to the vector map.
+
+<div class="code"><pre>
+#### set the raster region to match the map
+g.region vect=fields res=10 -ap
+
+#### create rasterized version of vector map
+v.to.rast in=fields out=fields.10m use=cat type=area labelcolumn=label
+r.colors fields.10m color=random
+
+#### perform analysis
+r.univar -t map=elevation.10m zones=fields.10m | \
+  cut -f1,5,6,8 -d'|' > fields_stats.txt
+
+
+#### populate vector DB with stats
+
+# create working copy of vector map
+g.copy vect=fields,fields_stats
+
+# create new attribute columns to hold output
+v.db.addcol map=fields_stats \
+  columns='mean_elev DOUBLE PRECISION, min_elev DOUBLE PRECISION, max_elev DOUBLE PRECISION'
+
+# create SQL command file, and execute it
+sed -e '1d' fields_stats.txt | awk -F'|' \
+  '{print "UPDATE fields_stats SET min_elev = "$2", max_elev = "$3", \
+  mean_elev = "$4" WHERE cat = "$1";"}' \
+   > fields_stats_sqlcmd.txt
+
+db.execute input=fields_stats_sqlcmd.txt
+<!--
+
+###### alternate method with db.in.ogr:  (needs work) ######
+
+#### convert text file table to a database table
+# not safe for commas in the label
+tr '|' ',' < fields_stats.txt > fields_stats.csv
+echo '"Integer","String","Real","Real","Real"' > fields_stats.csvt
+
+# import table
+db.in.ogr dsn=fields_stats.csv output=fields_data
+
+# view table
+db.select fields_data
+
+# remove temporary files
+rm fields_stats.csv fields_stats.csvt fields_stats.txt
+
+
+#### populate vector DB with stats
+
+# create working copy of vector map
+g.copy vect=fields,fields_stats
+
+# create new attribute columns to hold output
+v.db.addcol map=fields_stats \
+  columns='mean_elev DOUBLE PRECISION, min_elev DOUBLE PRECISION, max_elev DOUBLE PRECISION'
+
+# perform DB step  (broken)
+## how to automatically collate by key column, ie copy between tables?
+## SELECT INTO? JOIN?
+echo "INSERT INTO fields_stats (mean_elev,min_elev,max_elev) SELECT mean,min,max FROM fields_data" | db.execute
+-->
+
+#### view completed table
+v.db.select fields_stats
+</pre></div>
+
+
 <h2>TODO</h2>
 
 <i>mode, skewness, kurtosis</i>
 
-
-
 <h2>SEE ALSO</h2>
 
 <em>
 <a href="g.region.html">g.region</a><br>
 <a href="r3.univar.html">r3.univar</a><br>
-<a href="r.univar.sh.html">r.univar.sh</a><br>
 <a href="r.average.html">r.average</a><br>
 <a href="r.median.html">r.median</a><br>
 <a href="r.mode.html">r.mode</a><br>
@@ -43,16 +117,17 @@
 <a href="r.sum.html">r.sum</a><br>
 <a href="r.series.html">r.series</a><br>
 <a href="r.stats.html">r.stats</a><br>
+<a href="v.rast.stats.html">v.rast.stats</a><br>
 <a href="r.statistics.html">r.statistics</a><br>
 <a href="v.univar.html">v.univar</a><br>
-<a href="v.univar.sh.html">v.univar.sh</a><br>
 </em>
 
 
 <h2>AUTHORS</h2>
 
 Hamish Bowman, Otago University, New Zealand<br>
-Extended statistics by Martin Landa
+Extended statistics by Martin Landa<br>
+Zonal loop by Markus Metz
 
 <p>
 <i>Last changed: $Date$</i>

Modified: grass/trunk/raster/r.univar/r.univar_main.c
===================================================================
--- grass/trunk/raster/r.univar/r.univar_main.c	2010-10-06 23:14:36 UTC (rev 43811)
+++ grass/trunk/raster/r.univar/r.univar_main.c	2010-10-07 10:28:04 UTC (rev 43812)
@@ -19,14 +19,26 @@
 #include "globals.h"
 
 param_type param;
+zone_type zone_info;
 
 /* ************************************************************************* */
 /* Set up the arguments we are expecting ********************************** */
 /* ************************************************************************* */
-void set_params(void)
+void set_params()
 {
     param.inputfile = G_define_standard_option(G_OPT_R_MAPS);
 
+    param.zonefile = G_define_standard_option(G_OPT_R_MAP);
+    param.zonefile->key = "zones";
+    param.zonefile->required = NO;
+    param.zonefile->description =
+	_("Raster map used for zoning, must be of type CELL");
+
+    param.output_file = G_define_standard_option(G_OPT_F_OUTPUT);
+    param.output_file->required = NO;
+    param.output_file->description =
+	_("Name for output file (if omitted or \"-\" output to stdout)");
+
     param.percentile = G_define_option();
     param.percentile->key = "percentile";
     param.percentile->type = TYPE_DOUBLE;
@@ -37,6 +49,9 @@
     param.percentile->description =
 	_("Percentile to calculate (requires extended statistics flag)");
 
+    param.separator = G_define_standard_option(G_OPT_F_SEP);
+    param.separator->description = _("Special characters: space, comma, tab");
+
     param.shell_style = G_define_flag();
     param.shell_style->key = 'g';
     param.shell_style->description =
@@ -46,12 +61,16 @@
     param.extended->key = 'e';
     param.extended->description = _("Calculate extended statistics");
 
+    param.table = G_define_flag();
+    param.table->key = 't';
+    param.table->description = _("Table output format instead of standard output format");
+
     return;
 }
 
 static int open_raster(const char *infile);
-static univar_stat *univar_stat_with_percentiles(int map_type, int size);
-static void process_raster(univar_stat * stats, int fd,
+static univar_stat *univar_stat_with_percentiles(int map_type);
+static void process_raster(univar_stat * stats, int fd, int fdz,
 			   const struct Cell_head *region);
 
 /* *************************************************************** */
@@ -65,8 +84,11 @@
     struct Cell_head region;
     struct GModule *module;
     univar_stat *stats;
+    char **p, *z;
+    int fd, fdz, cell_type, min, max;
+    struct Range zone_range;
+    const char *mapset, *name;
 
-
     G_gisinit(argv[0]);
 
     module = G_define_module();
@@ -81,56 +103,101 @@
     if (G_parser(argc, argv))
 	exit(EXIT_FAILURE);
 
+    name = param.output_file->answer;
+    if (name != NULL && strcmp(name, "-") != 0) {
+	if (NULL == freopen(name, "w", stdout)) {
+	    G_fatal_error(_("Unable to open file <%s> for writing"), name);
+	}
+    }
+
     G_get_window(&region);
     rows = region.rows;
     cols = region.cols;
 
-    /* count the rasters given */
-    {
-	const char **p;
+    /* table field separator */
+    zone_info.sep = param.separator->answer;
+    if (strcmp(zone_info.sep, "\\t") == 0)
+	zone_info.sep = "\t";
+    if (strcmp(zone_info.sep, "tab") == 0)
+	zone_info.sep = "\t";
+    if (strcmp(zone_info.sep, "space") == 0)
+	zone_info.sep = " ";
+    if (strcmp(zone_info.sep, "comma") == 0)
+	zone_info.sep = ",";
 
-	for (p = (const char **)param.inputfile->answers, rasters = 0;
-	     *p; p++, rasters++) ;
+    zone_info.min = 0.0 / 0.0;	/*set to nan as default */
+    zone_info.max = 0.0 / 0.0;	/*set to nan as default */
+    zone_info.n_zones = 0;
+
+    fdz = -1;
+    
+    /* open zoning raster */
+    if ((z = param.zonefile->answer)) {
+	mapset = G_find_raster2(z, "");
+
+	fdz = open_raster(z);
+	
+	cell_type = Rast_get_map_type(fdz);
+	if (cell_type != CELL_TYPE)
+	    G_fatal_error("Zoning raster must be of type CELL");
+
+	if (Rast_read_range(z, mapset, &zone_range) == -1)
+	    G_fatal_error("Can not read range for zoning raster");
+	Rast_get_range_min_max(&zone_range, &min, &max);
+	if (Rast_read_cats(z, mapset, &(zone_info.cats)))
+	    G_warning("no category support for zoning raster");
+
+	zone_info.min = min;
+	zone_info.max = max;
+	zone_info.n_zones = max - min + 1;
     }
 
-    /* process them all */
-    {
-	size_t cells = rasters * cols * rows;
-	int map_type = param.extended->answer ? -2 : -1;
-	char **p;
+    /* count the input rasters given */
+    for (p = (char **)param.inputfile->answers, rasters = 0;
+	 *p; p++, rasters++) ;
 
-	stats = ((map_type == -1)
-		 ? create_univar_stat_struct(-1, cells, 0)
-		 : 0);
+    /* process all input rasters */
+    int map_type = param.extended->answer ? -2 : -1;
 
-	for (p = param.inputfile->answers; *p; p++) {
-	    int fd = open_raster(*p);
+    stats = ((map_type == -1)
+	     ? create_univar_stat_struct(-1, 0)
+	     : 0);
 
-	    if (map_type != -1) {
-		/* NB: map_type must match when doing extended stats */
-		int this_type = Rast_get_map_type(fd);
+    for (p = param.inputfile->answers; *p; p++) {
+	fd = open_raster(*p);
 
-		assert(this_type > -1);
-		if (map_type < -1) {
-		    assert(stats == 0);
-		    map_type = this_type;
-		    stats = univar_stat_with_percentiles(map_type, cells);
-		}
-		else if (this_type != map_type) {
-		    G_fatal_error(_("Raster <%s> type mismatch"), *p);
-		}
+	if (map_type != -1) {
+	    /* NB: map_type must match when doing extended stats */
+	    int this_type = Rast_get_map_type(fd);
+
+	    assert(this_type > -1);
+	    if (map_type < -1) {
+		/* extended stats */
+		assert(stats == 0);
+		map_type = this_type;
+		stats = univar_stat_with_percentiles(map_type);
 	    }
+	    else if (this_type != map_type) {
+		G_fatal_error(_("Raster <%s> type mismatch"), *p);
+	    }
+	}
 
-	    process_raster(stats, fd, &region);
-	}
+	process_raster(stats, fd, fdz, &region);
+
+	/* close input raster */
+	Rast_close(fd);
     }
 
-    if (!(param.shell_style->answer))
-	G_percent(rows, rows, 2);	/* finish it off */
+    /* close zoning raster */
+    if (z)
+	Rast_close(fdz);
 
     /* create the output */
-    print_stats(stats);
-
+    if (param.table->answer)
+	print_stats_table(stats);
+    else
+	print_stats(stats);
+	
     /* release memory */
     free_univar_stat_struct(stats);
 
@@ -139,20 +206,36 @@
 
 static int open_raster(const char *infile)
 {
-    return Rast_open_old(infile, "");
+    const char *mapset;
+    int fd;
+
+    mapset = G_find_raster2(infile, "");
+    if (mapset == NULL) {
+	G_fatal_error(_("Raster map <%s> not found"), infile);
+    }
+
+    fd = Rast_open_old(infile, mapset);
+
+    return fd;
 }
 
-static univar_stat *univar_stat_with_percentiles(int map_type, int size)
+static univar_stat *univar_stat_with_percentiles(int map_type)
 {
     univar_stat *stats;
-    int i;
+    int i, j;
+    int n_zones = zone_info.n_zones;
 
+    if (n_zones == 0)
+	n_zones = 1;
+
     i = 0;
     while (param.percentile->answers[i])
 	i++;
-    stats = create_univar_stat_struct(map_type, size, i);
-    for (i = 0; i < stats->n_perc; i++) {
-	sscanf(param.percentile->answers[i], "%lf", &stats->perc[i]);
+    stats = create_univar_stat_struct(map_type, i);
+    for (i = 0; i < n_zones; i++) {
+	for (j = 0; j < stats[i].n_perc; j++) {
+	    sscanf(param.percentile->answers[j], "%lf", &(stats[i].perc[j]));
+	}
     }
 
     /* . */
@@ -160,72 +243,123 @@
 }
 
 static void
-process_raster(univar_stat * stats, int fd, const struct Cell_head *region)
+process_raster(univar_stat * stats, int fd, int fdz, const struct Cell_head *region)
 {
-    /* use Rast_window_rows(), Rast_window_cols() here? */
+    /* use G_window_rows(), G_window_cols() here? */
     const int rows = region->rows;
     const int cols = region->cols;
-    int first = (stats->n < 1);
 
     const RASTER_MAP_TYPE map_type = Rast_get_map_type(fd);
-    void *nextp
-	= ((!param.extended->answer) ? 0
-	   : (map_type == DCELL_TYPE) ? (void *)stats->dcell_array
-	   : (map_type == FCELL_TYPE) ? (void *)stats->fcell_array
-	   : (void *)stats->cell_array);
     const size_t value_sz = Rast_cell_size(map_type);
     unsigned int row;
     void *raster_row;
+    CELL *zoneraster_row;
+    int n_zones = zone_info.n_zones;
+    
+    raster_row = Rast_allocate_buf(map_type);
+    if (n_zones)
+	zoneraster_row = Rast_allocate_c_buf();
 
-    raster_row = G_calloc(cols, value_sz);
-
     for (row = 0; row < rows; row++) {
 	void *ptr;
+	CELL *zptr;
 	unsigned int col;
 
 	Rast_get_row(fd, raster_row, row, map_type);
+	if (n_zones) {
+	    Rast_get_c_row(fdz, zoneraster_row, row);
+	    zptr = zoneraster_row;
+	}
 
 	ptr = raster_row;
 
 	for (col = 0; col < cols; col++) {
+	    double val;
+	    int zone = 0;
 
+	    if (n_zones) {
+		/* skip NULL cells in zone map */
+		if (Rast_is_c_null_value(zptr)) {
+		    ptr = G_incr_void_ptr(ptr, value_sz);
+		    zptr++;
+		    continue;
+		}
+		zone = *zptr - zone_info.min;
+	    }
+
+	    /* count all including NULL cells in input map */
+	    stats[zone].size++;
+	    
+	    /* can't do stats with NULL cells in input map */
 	    if (Rast_is_null_value(ptr, map_type)) {
 		ptr = G_incr_void_ptr(ptr, value_sz);
+		if (n_zones)
+		    zptr++;
 		continue;
 	    }
 
-	    if (nextp) {
+	    if (param.extended->answer) {
+		/* check allocated memory */
+		if (stats[zone].n >= stats[zone].n_alloc) {
+		    stats[zone].n_alloc += 1000;
+		    size_t msize;
+		    switch (map_type) {
+			case DCELL_TYPE:
+			    msize = stats[zone].n_alloc * sizeof(DCELL);
+			    stats[zone].dcell_array =
+				(DCELL *)G_realloc((void *)stats[zone].dcell_array, msize);
+			    stats[zone].nextp = (void *)&(stats[zone].dcell_array[stats[zone].n]);
+			    break;
+			case FCELL_TYPE:
+			    msize = stats[zone].n_alloc * sizeof(FCELL);
+			    stats[zone].fcell_array =
+				(FCELL *)G_realloc((void *)stats[zone].fcell_array, msize);
+			    stats[zone].nextp = (void *)&(stats[zone].fcell_array[stats[zone].n]);
+			    break;
+			case CELL_TYPE:
+			    msize = stats[zone].n_alloc * sizeof(CELL);
+			    stats[zone].cell_array =
+				(CELL *)G_realloc((void *)stats[zone].cell_array, msize);
+			    stats[zone].nextp = (void *)&(stats[zone].cell_array[stats[zone].n]);
+			    break;
+			default:
+			    break;
+		    }
+		}
 		/* put the value into stats->XXXcell_array */
-		memcpy(nextp, ptr, value_sz);
-		nextp = G_incr_void_ptr(nextp, value_sz);
+		memcpy(stats[zone].nextp, ptr, value_sz);
+		stats[zone].nextp = G_incr_void_ptr(stats[zone].nextp, value_sz);
 	    }
 
-	    {
-		double val = ((map_type == DCELL_TYPE) ? *((DCELL *) ptr)
-			      : (map_type == FCELL_TYPE) ? *((FCELL *) ptr)
-			      : *((CELL *) ptr));
+	    val = ((map_type == DCELL_TYPE) ? *((DCELL *) ptr)
+			  : (map_type == FCELL_TYPE) ? *((FCELL *) ptr)
+			  : *((CELL *) ptr));
 
-		stats->sum += val;
-		stats->sumsq += val * val;
-		stats->sum_abs += fabs(val);
+	    stats[zone].sum += val;
+	    stats[zone].sumsq += val * val;
+	    stats[zone].sum_abs += fabs(val);
 
-		if (first) {
-		    stats->max = val;
-		    stats->min = val;
-		    first = FALSE;
-		}
-		else {
-		    if (val > stats->max)
-			stats->max = val;
-		    if (val < stats->min)
-			stats->min = val;
-		}
+	    if (stats[zone].first) {
+		stats[zone].max = val;
+		stats[zone].min = val;
+		stats[zone].first = FALSE;
 	    }
+	    else {
+		if (val > stats[zone].max)
+		    stats[zone].max = val;
+		if (val < stats[zone].min)
+		    stats[zone].min = val;
+	    }
 
 	    ptr = G_incr_void_ptr(ptr, value_sz);
-	    stats->n++;
+	    if (n_zones)
+		zptr++;
+	    stats[zone].n++;
 	}
 	if (!(param.shell_style->answer))
 	    G_percent(row, rows, 2);
     }
+    if (!(param.shell_style->answer))
+	G_percent(rows, rows, 2);	/* finish it off */
+
 }

Modified: grass/trunk/raster/r.univar/r3.univar.html
===================================================================
--- grass/trunk/raster/r.univar/r3.univar.html	2010-10-06 23:14:36 UTC (rev 43811)
+++ grass/trunk/raster/r.univar/r3.univar.html	2010-10-07 10:28:04 UTC (rev 43812)
@@ -3,11 +3,16 @@
 <em>r3.univar</em> calculates the univariate statistics for raster3d maps.
 This includes the number of cells counted, minimum and maximum cell values,
 range, arithmetic mean, population variance, standard deviation, and 
-coefficient of variation.
+coefficient of variation. Statistics are calculated separately for every
+category/zone found in the <b>zones</b> input map if given.
 If the <b>-e</b> extended statistics flag is given the 1st quartile, median,
 3rd quartile, and given <b>percentile</b> are calculated.
 If the <b>-g</b> flag is given the results are presented in a format suitable
 for use in a shell script.
+If the <b>-t</b> flag is given the results are presented in tabular format
+with "|" as field separator. The table can immediately be converted to a
+vector attribute table which can then be linked to a vector, e.g. the vector
+that was rasterized to create the <b>zones</b> input raster.
 
 <h2>NOTES</h2>
 
@@ -19,6 +24,7 @@
 extended statistics flag is used with a very large region setting. If the
 region is too large the module should exit gracefully with a memory allocation
 error. Basic statistics can be calculated using any size input region.
+
 <!-- no rast3D support?
 <p>
 The <em>r.quantile</em> module will be significantly more efficient for
@@ -36,7 +42,6 @@
 <em>
 <a href="g.region.html">g.region</a><br>
 <a href="r.univar.html">r.univar</a><br>
-<a href="r.univar.sh.html">r.univar.sh</a><br>
 <a href="r.average.html">r.average</a><br>
 <a href="r.median.html">r.median</a><br>
 <a href="r.mode.html">r.mode</a><br>
@@ -46,7 +51,6 @@
 <a href="r.stats.html">r.stats</a><br>
 <a href="r.statistics.html">r.statistics</a><br>
 <a href="v.univar.html">v.univar</a><br>
-<a href="v.univar.sh.html">v.univar.sh</a><br>
 </em>
 
 
@@ -55,7 +59,8 @@
 Soeren Gebbert<br>
 Code is based on r.univar from<br>
 Hamish Bowman, Otago University, New Zealand<br>
-and Martin Landa
+and Martin Landa<br>
+Zonal loop by Markus Metz
 
 
 <p>

Modified: grass/trunk/raster/r.univar/r3.univar_main.c
===================================================================
--- grass/trunk/raster/r.univar/r3.univar_main.c	2010-10-06 23:14:36 UTC (rev 43811)
+++ grass/trunk/raster/r.univar/r3.univar_main.c	2010-10-07 10:28:04 UTC (rev 43812)
@@ -15,17 +15,31 @@
  *
  */
 
+#include <string.h>
 #include "globals.h"
+#include "grass/G3d.h"
 
 param_type param;
+zone_type zone_info;
 
 /* ************************************************************************* */
 /* Set up the arguments we are expecting ********************************** */
 /* ************************************************************************* */
-void set_params(void)
+void set_params()
 {
     param.inputfile = G_define_standard_option(G_OPT_R3_INPUT);
 
+    param.zonefile = G_define_standard_option(G_OPT_R3_INPUT);
+    param.zonefile->key = "zones";
+    param.zonefile->required = NO;
+    param.zonefile->description =
+	_("3D Raster map used for zoning, must be of type CELL");
+
+    param.output_file = G_define_standard_option(G_OPT_F_OUTPUT);
+    param.output_file->required = NO;
+    param.output_file->description =
+	_("Name for output file (if omitted or \"-\" output to stdout)");
+
     param.percentile = G_define_option();
     param.percentile->key = "percentile";
     param.percentile->type = TYPE_DOUBLE;
@@ -36,6 +50,9 @@
     param.percentile->description =
 	_("Percentile to calculate (requires extended statistics flag)");
 
+    param.separator = G_define_standard_option(G_OPT_F_SEP);
+    param.separator->description = _("Special characters: space, comma, tab");
+
     param.shell_style = G_define_flag();
     param.shell_style->key = 'g';
     param.shell_style->description =
@@ -45,6 +62,10 @@
     param.extended->key = 'e';
     param.extended->description = _("Calculate extended statistics");
 
+    param.table = G_define_flag();
+    param.table->key = 't';
+    param.table->description = _("Table output format instead of standard output format");
+
     return;
 }
 
@@ -59,15 +80,19 @@
     double val_d;		/* for misc use */
     int first = TRUE;		/* min/max init flag */
 
-    int map_type;
+    int map_type, zmap_type;
     univar_stat *stats;
 
-    char *infile;
-    void *map;
+    char *infile, *zonemap;
+    void *map, *zmap = NULL;
     G3D_Region region;
     unsigned int i;
     unsigned int rows, cols, depths;
     unsigned int x, y, z;
+    double dmin, dmax;
+    int zone, use_zone = 0;
+    char *mapset, *name;
+    struct FPRange zone_range;
 
     struct GModule *module;
 
@@ -85,7 +110,6 @@
     if (G_parser(argc, argv))
 	exit(EXIT_FAILURE);
 
-
     /*Set the defaults */
     G3d_initDefaults();
 
@@ -96,13 +120,77 @@
     rows = region.rows;
     depths = region.depths;
 
+    name = param.output_file->answer;
+    if (name != NULL && strcmp(name, "-") != 0) {
+	if (NULL == freopen(name, "w", stdout)) {
+	    G_fatal_error(_("Unable to open file <%s> for writing"), name);
+	}
+    }
 
+    /* table field separator */
+    zone_info.sep = param.separator->answer;
+    if (strcmp(zone_info.sep, "\\t") == 0)
+	zone_info.sep = "\t";
+    if (strcmp(zone_info.sep, "tab") == 0)
+	zone_info.sep = "\t";
+    if (strcmp(zone_info.sep, "space") == 0)
+	zone_info.sep = " ";
+    if (strcmp(zone_info.sep, "comma") == 0)
+	zone_info.sep = ",";
+
+    dmin = 0.0 / 0.0;	/*set to nan as default */
+    dmax = 0.0 / 0.0;	/*set to nan as default */
+    zone_info.min = 0.0 / 0.0;	/*set to nan as default */
+    zone_info.max = 0.0 / 0.0;	/*set to nan as default */
+    zone_info.n_zones = 0;
+
+    /* open 3D zoning raster with default region */
+    if ((zonemap = param.zonefile->answer) != NULL) {
+	if (NULL == (mapset = G_find_grid3(zonemap, "")))
+	    G3d_fatalError(_("Requested g3d map <%s> not found"), zonemap);
+
+	zmap =
+	    G3d_openCellOld(zonemap, G_find_grid3(zonemap, ""), &region,
+			    G3D_TILE_SAME_AS_FILE, G3D_USE_CACHE_DEFAULT);
+
+	if (zmap == NULL)
+	    G3d_fatalError(_("Error opening g3d map <%s>"), zonemap);
+
+	if (G3d_tileTypeMap(zmap) != CELL_TYPE)
+	    G_fatal_error("Zoning raster must be of type CELL");
+
+	zmap_type = G3d_tileTypeMap(zmap);
+	
+	if (zmap_type != CELL_TYPE)
+	    G_fatal_error("Zoning raster must be of type CELL");
+
+	if (G3d_readRange(zonemap, mapset, &zone_range) == -1)
+	    G_fatal_error("Can not read range for zoning raster");
+	G3d_range_min_max(zmap, &dmin, &dmax);
+	if (Rast_read_cats(zonemap, mapset, &(zone_info.cats)))
+	    G_warning("no category support for zoning raster");
+
+	/* properly round dmin and dmax */
+	if (dmin < 0)
+	    zone_info.min = dmin - 0.5;
+	else
+	    zone_info.min = dmin + 0.5;
+	if (dmax < 0)
+	    zone_info.max = dmax - 0.5;
+	else
+	    zone_info.max = dmax + 0.5;
+	    
+	zone_info.n_zones = zone_info.max - zone_info.min + 1;
+
+	use_zone = 1;
+    }
+
+    /*Open 3D input raster with default region */
     infile = param.inputfile->answer;
 
     if (NULL == G_find_grid3(infile, ""))
 	G3d_fatalError(_("Requested g3d map <%s> not found"), infile);
 
-    /*Open all maps with default region */
     map =
 	G3d_openCellOld(infile, G_find_grid3(infile, ""), &region,
 			G3D_TILE_SAME_AS_FILE, G3D_USE_CACHE_DEFAULT);
@@ -115,71 +203,102 @@
     i = 0;
     while (param.percentile->answers[i])
 	i++;
-    stats = create_univar_stat_struct(map_type, cols * rows * depths, i);
-    for (i = 0; i < stats->n_perc; i++) {
-	sscanf(param.percentile->answers[i], "%lf", &stats->perc[i]);
+    stats = create_univar_stat_struct(map_type, i);
+    for (i = 0; i < zone_info.n_zones; i++) {
+	unsigned int j;
+	for (j = 0; j < stats[i].n_perc; j++) {
+	    sscanf(param.percentile->answers[j], "%lf", &(stats[i].perc[j]));
+	}
     }
 
-    stats->n = 0;
     for (z = 0; z < depths; z++) {	/*From the bottom to the top */
 	if (!(param.shell_style->answer))
 	    G_percent(z, depths - 1, 10);
 	for (y = 0; y < rows; y++) {
 	    for (x = 0; x < cols; x++) {
+		zone = 0;
+		if (zone_info.n_zones)
+		    G3d_getValue(zmap, x, y, z, &zone, CELL_TYPE);
 		if (map_type == FCELL_TYPE) {
 		    G3d_getValue(map, x, y, z, &val_f, map_type);
 		    if (!G3d_isNullValueNum(&val_f, map_type)) {
-			if (param.extended->answer)
-			    stats->fcell_array[stats->n] = val_f;
+			if (param.extended->answer) {
+			    if (stats[zone].n >= stats[zone].n_alloc) {
+				size_t msize;
+				stats[zone].n_alloc += 1000;
+				msize = stats[zone].n_alloc * sizeof(FCELL);
+				stats[zone].fcell_array =
+				    (FCELL *)G_realloc((void *)stats[zone].fcell_array, msize);
+			    }
 
-			stats->sum += val_f;
-			stats->sumsq += (val_f * val_f);
-			stats->sum_abs += fabs(val_f);
+			    stats[zone].fcell_array[stats[zone].n] = val_f;
+			}
 
-			if (first) {
-			    stats->max = val_f;
-			    stats->min = val_f;
-			    first = FALSE;
+			stats[zone].sum += val_f;
+			stats[zone].sumsq += (val_f * val_f);
+			stats[zone].sum_abs += fabs(val_f);
+
+			if (stats[zone].first) {
+			    stats[zone].max = val_f;
+			    stats[zone].min = val_f;
+			    stats[zone].first = FALSE;
 			}
 			else {
-			    if (val_f > stats->max)
-				stats->max = val_f;
-			    if (val_f < stats->min)
-				stats->min = val_f;
+			    if (val_f > stats[zone].max)
+				stats[zone].max = val_f;
+			    if (val_f < stats[zone].min)
+				stats[zone].min = val_f;
 			}
-			stats->n++;
+			stats[zone].n++;
 		    }
 		}
 		else if (map_type == DCELL_TYPE) {
 		    G3d_getValue(map, x, y, z, &val_d, map_type);
 		    if (!G3d_isNullValueNum(&val_d, map_type)) {
-			if (param.extended->answer)
-			    stats->dcell_array[stats->n] = val_d;
+			if (param.extended->answer) {
+			    if (stats[zone].n >= stats[zone].n_alloc) {
+				size_t msize;
+				stats[zone].n_alloc += 1000;
+				msize = stats[zone].n_alloc * sizeof(DCELL);
+				stats[zone].dcell_array =
+				    (DCELL *)G_realloc((void *)stats[zone].dcell_array, msize);
+				}
 
-			stats->sum += val_d;
-			stats->sumsq += val_d * val_d;
-			stats->sum_abs += fabs(val_d);
+			    stats[zone].dcell_array[stats[zone].n] = val_d;
+			}
 
+			stats[zone].sum += val_d;
+			stats[zone].sumsq += val_d * val_d;
+			stats[zone].sum_abs += fabs(val_d);
+
 			if (first) {
-			    stats->max = val_d;
-			    stats->min = val_d;
-			    first = FALSE;
+			    stats[zone].max = val_d;
+			    stats[zone].min = val_d;
+			    stats[zone].first = FALSE;
 			}
 			else {
-			    if (val_d > stats->max)
-				stats->max = val_d;
-			    if (val_d < stats->min)
-				stats->min = val_d;
+			    if (val_d > stats[zone].max)
+				stats[zone].max = val_d;
+			    if (val_d < stats[zone].min)
+				stats[zone].min = val_d;
 			}
-			stats->n++;
+			stats[zone].n++;
 		    }
 		}
 	    }
 	}
     }
 
+    /* close maps */
+    G3d_closeCell(map);
+    if (zone_info.n_zones)
+	G3d_closeCell(zmap);
+
     /* create the output */
-    print_stats(stats);
+    if (param.table->answer)
+	print_stats_table(stats);
+    else
+	print_stats(stats);
 
     /* release memory */
     free_univar_stat_struct(stats);

Modified: grass/trunk/raster/r.univar/stats.c
===================================================================
--- grass/trunk/raster/r.univar/stats.c	2010-10-06 23:14:36 UTC (rev 43811)
+++ grass/trunk/raster/r.univar/stats.c	2010-10-07 10:28:04 UTC (rev 43812)
@@ -16,39 +16,55 @@
 /* *************************************************************** */
 /* **** univar_stat constructor ********************************** */
 /* *************************************************************** */
-univar_stat *create_univar_stat_struct(int map_type, int size, int n_perc)
+univar_stat *create_univar_stat_struct(int map_type, int n_perc)
 {
     univar_stat *stats;
+    int i;
+    int n_zones = zone_info.n_zones;
 
-    stats = (univar_stat *) G_calloc(1, sizeof(univar_stat));
+    if (n_zones == 0)
+	n_zones = 1;
 
-    stats->sum = 0.0;
-    stats->sumsq = 0.0;
-    stats->min = 0.0 / 0.0;	/*set to nan as default */
-    stats->max = 0.0 / 0.0;	/*set to nan as default */
-    stats->n_perc = n_perc;
-    if (n_perc > 0)
-	stats->perc = (double *)G_malloc(n_perc * sizeof(double));
-    else
-	stats->perc = NULL;
-    stats->sum_abs = 0.0;
-    stats->n = 0;
-    stats->size = size;
-    stats->dcell_array = NULL;
-    stats->fcell_array = NULL;
-    stats->cell_array = NULL;
-    stats->map_type = map_type;
+    stats = (univar_stat *) G_calloc(n_zones, sizeof(univar_stat));
 
-    /* alloacte memory for extended computation */
-    if (param.extended->answer) {
-	if (map_type == DCELL_TYPE)
-	    stats->dcell_array =
-		(DCELL *) G_calloc(stats->size, sizeof(DCELL));
-	if (map_type == FCELL_TYPE)
-	    stats->fcell_array =
-		(FCELL *) G_calloc(stats->size, sizeof(FCELL));
-	if (map_type == CELL_TYPE)
-	    stats->cell_array = (CELL *) G_calloc(stats->size, sizeof(CELL));
+    for (i = 0; i < n_zones; i++) {
+	stats[i].sum = 0.0;
+	stats[i].sumsq = 0.0;
+	stats[i].min = 0.0 / 0.0;	/* set to nan as default */
+	stats[i].max = 0.0 / 0.0;	/* set to nan as default */
+	stats[i].n_perc = n_perc;
+	if (n_perc > 0)
+	    stats[i].perc = (double *)G_malloc(n_perc * sizeof(double));
+	else
+	    stats[i].perc = NULL;
+	stats[i].sum_abs = 0.0;
+	stats[i].n = 0;
+	stats[i].size = 0;
+	stats[i].dcell_array = NULL;
+	stats[i].fcell_array = NULL;
+	stats[i].cell_array = NULL;
+	stats[i].map_type = map_type;
+
+	stats[i].n_alloc = 0;
+
+	stats[i].first = TRUE;
+
+	/* allocate memory for extended computation */
+	/* changed to on-demand block allocation */
+
+/*	if (param.extended->answer) {
+	    if (map_type == DCELL_TYPE) {
+		stats[i].dcell_array = NULL;
+	    }
+	    else if (map_type == FCELL_TYPE) {
+		stats[i].fcell_array =NULL;
+	    }
+	    else if (map_type == CELL_TYPE) {
+		stats[i].cell_array = NULL;
+	    }
+	}
+*/
+
     }
 
     return stats;
@@ -60,15 +76,23 @@
 /* *************************************************************** */
 void free_univar_stat_struct(univar_stat * stats)
 {
-    if (stats->perc)
-	G_free(stats->perc);
-    if (stats->dcell_array)
-	G_free(stats->dcell_array);
-    if (stats->fcell_array)
-	G_free(stats->fcell_array);
-    if (stats->cell_array)
-	G_free(stats->cell_array);
+    int i;
+    int n_zones = zone_info.n_zones;
 
+    if (n_zones == 0)
+	n_zones = 1;
+
+    for (i = 0; i < n_zones; i++){
+	if (stats[i].perc)
+	    G_free(stats[i].perc);
+	if (stats[i].dcell_array)
+	    G_free(stats[i].dcell_array);
+	if (stats[i].fcell_array)
+	    G_free(stats[i].fcell_array);
+	if (stats[i].cell_array)
+	    G_free(stats[i].cell_array);
+    }
+
     G_free(stats);
 
     return;
@@ -80,187 +104,391 @@
 /* *************************************************************** */
 int print_stats(univar_stat * stats)
 {
-    char sum_str[100];
-    double mean, variance, stdev, var_coef;
+    int z, n_zones = zone_info.n_zones;
 
-    /*for extendet stats */
-    double quartile_25 = 0.0, quartile_75 = 0.0, *quartile_perc;
-    double median = 0.0;
-    unsigned int i;
-    int qpos_25, qpos_75, *qpos_perc;
+    if (n_zones == 0)
+	n_zones = 1;
 
+    for (z = 0; z < n_zones; z++) {
+	char sum_str[100];
+	double mean, variance, stdev, var_coef;
 
-    /* all these calculations get promoted to doubles, so any DIV0 becomes nan */
-    mean = stats->sum / stats->n;
-    variance = (stats->sumsq - stats->sum * stats->sum / stats->n) / stats->n;
-    if (variance < GRASS_EPSILON)
-	variance = 0.0;
-    stdev = sqrt(variance);
-    var_coef = (stdev / mean) * 100.;	/* perhaps stdev/fabs(mean) ? */
+	/*for extendet stats */
+	double quartile_25 = 0.0, quartile_75 = 0.0, *quartile_perc;
+	double median = 0.0;
+	unsigned int i;
+	int qpos_25, qpos_75, *qpos_perc;
 
-    sprintf(sum_str, "%.10f", stats->sum);
-    G_trim_decimal(sum_str);
+	/* stats collected for this zone? */
+	if (stats[z].n == 0)
+	    continue;
 
-    if (!param.shell_style->answer) {
-	fprintf(stdout, "total null and non-null cells: %d\n", stats->size);
-	fprintf(stdout, "total null cells: %d\n\n", stats->size - stats->n);
-	fprintf(stdout, "Of the non-null cells:\n----------------------\n");
-    }
+	/* all these calculations get promoted to doubles, so any DIV0 becomes nan */
+	mean = stats[z].sum / stats[z].n;
+	variance = (stats[z].sumsq - stats[z].sum * stats[z].sum / stats[z].n) / stats[z].n;
+	if (variance < GRASS_EPSILON)
+	    variance = 0.0;
+	stdev = sqrt(variance);
+	var_coef = (stdev / mean) * 100.;	/* perhaps stdev/fabs(mean) ? */
 
-    if (param.shell_style->answer) {
-	fprintf(stdout, "n=%d\n", stats->n);
-	fprintf(stdout, "null_cells=%d\n", stats->size - stats->n);
-	fprintf(stdout, "cells=%d\n", stats->size);
-	fprintf(stdout, "min=%.15g\n", stats->min);
-	fprintf(stdout, "max=%.15g\n", stats->max);
-	fprintf(stdout, "range=%.15g\n", stats->max - stats->min);
-	fprintf(stdout, "mean=%.15g\n", mean);
-	fprintf(stdout, "mean_of_abs=%.15g\n", stats->sum_abs / stats->n);
-	fprintf(stdout, "stddev=%.15g\n", stdev);
-	fprintf(stdout, "variance=%.15g\n", variance);
-	fprintf(stdout, "coeff_var=%.15g\n", var_coef);
-	fprintf(stdout, "sum=%s\n", sum_str);
-    }
-    else {
-	fprintf(stdout, "n: %d\n", stats->n);
-	fprintf(stdout, "minimum: %g\n", stats->min);
-	fprintf(stdout, "maximum: %g\n", stats->max);
-	fprintf(stdout, "range: %g\n", stats->max - stats->min);
-	fprintf(stdout, "mean: %g\n", mean);
-	fprintf(stdout, "mean of absolute values: %g\n",
-		stats->sum_abs / stats->n);
-	fprintf(stdout, "standard deviation: %g\n", stdev);
-	fprintf(stdout, "variance: %g\n", variance);
-	fprintf(stdout, "variation coefficient: %g %%\n", var_coef);
-	fprintf(stdout, "sum: %s\n", sum_str);
-    }
+	sprintf(sum_str, "%.10f", stats[z].sum);
+	G_trim_decimal(sum_str);
 
+	if (zone_info.n_zones) {
+	    int z_cat = z + zone_info.min;
+	    
+	    fprintf(stdout, "\nzone %d %s\n\n", z_cat, Rast_get_c_cat(&z_cat, &(zone_info.cats)));
+	}
 
-    /* TODO: mode, skewness, kurtosis */
-    if (param.extended->answer) {
-	qpos_perc = (int *)G_calloc(stats->n_perc, sizeof(int));
-	quartile_perc = (double *)G_calloc(stats->n_perc, sizeof(double));
-	for (i = 0; i < stats->n_perc; i++) {
-	    qpos_perc[i] = (int)(stats->n * 1e-2 * stats->perc[i] - 0.5);
+	if (!param.shell_style->answer) {
+	    fprintf(stdout, "total null and non-null cells: %d\n", stats[z].size);
+	    fprintf(stdout, "total null cells: %d\n\n", stats[z].size - stats[z].n);
+	    fprintf(stdout, "Of the non-null cells:\n----------------------\n");
 	}
-	qpos_25 = (int)(stats->n * 0.25 - 0.5);
-	qpos_75 = (int)(stats->n * 0.75 - 0.5);
 
-	switch (stats->map_type) {
-	case CELL_TYPE:
-	    heapsort_int(stats->cell_array, stats->n);
+	if (param.shell_style->answer) {
+	    fprintf(stdout, "n=%d\n", stats[z].n);
+	    fprintf(stdout, "null_cells=%d\n", stats[z].size - stats[z].n);
+	    fprintf(stdout, "cells=%d\n", stats->size);
+	    fprintf(stdout, "min=%.15g\n", stats[z].min);
+	    fprintf(stdout, "max=%.15g\n", stats[z].max);
+	    fprintf(stdout, "range=%.15g\n", stats[z].max - stats[z].min);
+	    fprintf(stdout, "mean=%.15g\n", mean);
+	    fprintf(stdout, "mean_of_abs=%.15g\n", stats[z].sum_abs / stats[z].n);
+	    fprintf(stdout, "stddev=%.15g\n", stdev);
+	    fprintf(stdout, "variance=%.15g\n", variance);
+	    fprintf(stdout, "coeff_var=%.15g\n", var_coef);
+	    fprintf(stdout, "sum=%s\n", sum_str);
+	}
+	else {
+	    fprintf(stdout, "n: %d\n", stats[z].n);
+	    fprintf(stdout, "minimum: %g\n", stats[z].min);
+	    fprintf(stdout, "maximum: %g\n", stats[z].max);
+	    fprintf(stdout, "range: %g\n", stats[z].max - stats[z].min);
+	    fprintf(stdout, "mean: %g\n", mean);
+	    fprintf(stdout, "mean of absolute values: %g\n",
+		    stats[z].sum_abs / stats[z].n);
+	    fprintf(stdout, "standard deviation: %g\n", stdev);
+	    fprintf(stdout, "variance: %g\n", variance);
+	    fprintf(stdout, "variation coefficient: %g %%\n", var_coef);
+	    fprintf(stdout, "sum: %s\n", sum_str);
+	}
 
-	    quartile_25 = (double)stats->cell_array[qpos_25];
-	    if (stats->n % 2)	/* odd */
-		median = (double)stats->cell_array[(int)(stats->n / 2)];
-	    else		/* even */
-		median =
-		    (double)(stats->cell_array[stats->n / 2 - 1] +
-			     stats->cell_array[stats->n / 2]) / 2.0;
-	    quartile_75 = (double)stats->cell_array[qpos_75];
-	    for (i = 0; i < stats->n_perc; i++) {
-		quartile_perc[i] = (double)stats->cell_array[qpos_perc[i]];
+
+	/* TODO: mode, skewness, kurtosis */
+	if (param.extended->answer) {
+	    qpos_perc = (int *)G_calloc(stats[z].n_perc, sizeof(int));
+	    quartile_perc = (double *)G_calloc(stats[z].n_perc, sizeof(double));
+	    for (i = 0; i < stats[z].n_perc; i++) {
+		qpos_perc[i] = (int)(stats[z].n * 1e-2 * stats[z].perc[i] - 0.5);
 	    }
-	    break;
+	    qpos_25 = (int)(stats[z].n * 0.25 - 0.5);
+	    qpos_75 = (int)(stats[z].n * 0.75 - 0.5);
 
-	case FCELL_TYPE:
-	    heapsort_float(stats->fcell_array, stats->n);
+	    switch (stats[z].map_type) {
+	    case CELL_TYPE:
+		heapsort_int(stats[z].cell_array, stats[z].n);
 
-	    quartile_25 = (double)stats->fcell_array[qpos_25];
-	    if (stats->n % 2)	/* odd */
-		median = (double)stats->fcell_array[(int)(stats->n / 2)];
-	    else		/* even */
-		median =
-		    (double)(stats->fcell_array[stats->n / 2 - 1] +
-			     stats->fcell_array[stats->n / 2]) / 2.0;
-	    quartile_75 = (double)stats->fcell_array[qpos_75];
-	    for (i = 0; i < stats->n_perc; i++) {
-		quartile_perc[i] = (double)stats->fcell_array[qpos_perc[i]];
+		quartile_25 = (double)stats[z].cell_array[qpos_25];
+		if (stats[z].n % 2)	/* odd */
+		    median = (double)stats[z].cell_array[(int)(stats[z].n / 2)];
+		else		/* even */
+		    median =
+			(double)(stats[z].cell_array[stats[z].n / 2 - 1] +
+				 stats[z].cell_array[stats[z].n / 2]) / 2.0;
+		quartile_75 = (double)stats[z].cell_array[qpos_75];
+		for (i = 0; i < stats[z].n_perc; i++) {
+		    quartile_perc[i] = (double)stats[z].cell_array[qpos_perc[i]];
+		}
+		break;
+
+	    case FCELL_TYPE:
+		heapsort_float(stats[z].fcell_array, stats[z].n);
+
+		quartile_25 = (double)stats[z].fcell_array[qpos_25];
+		if (stats[z].n % 2)	/* odd */
+		    median = (double)stats[z].fcell_array[(int)(stats[z].n / 2)];
+		else		/* even */
+		    median =
+			(double)(stats[z].fcell_array[stats[z].n / 2 - 1] +
+				 stats[z].fcell_array[stats[z].n / 2]) / 2.0;
+		quartile_75 = (double)stats[z].fcell_array[qpos_75];
+		for (i = 0; i < stats[z].n_perc; i++) {
+		    quartile_perc[i] = (double)stats[z].fcell_array[qpos_perc[i]];
+		}
+		break;
+
+	    case DCELL_TYPE:
+		heapsort_double(stats[z].dcell_array, stats[z].n);
+
+		quartile_25 = stats[z].dcell_array[qpos_25];
+		if (stats[z].n % 2)	/* odd */
+		    median = stats[z].dcell_array[(int)(stats[z].n / 2)];
+		else		/* even */
+		    median =
+			(stats[z].dcell_array[stats[z].n / 2 - 1] +
+			 stats[z].dcell_array[stats[z].n / 2]) / 2.0;
+		quartile_75 = stats[z].dcell_array[qpos_75];
+		for (i = 0; i < stats[z].n_perc; i++) {
+		    quartile_perc[i] = stats[z].dcell_array[qpos_perc[i]];
+		}
+		break;
+
+	    default:
+		break;
 	    }
-	    break;
 
-	case DCELL_TYPE:
-	    heapsort_double(stats->dcell_array, stats->n);
+	    if (param.shell_style->answer) {
+		fprintf(stdout, "first_quartile=%g\n", quartile_25);
+		fprintf(stdout, "median=%g\n", median);
+		fprintf(stdout, "third_quartile=%g\n", quartile_75);
+		for (i = 0; i < stats[z].n_perc; i++) {
+		    char buf[24];
+		    sprintf(buf, "%.15g", stats[z].perc[i]);
+		    G_strchg(buf, '.', '_');
+		    fprintf(stdout, "percentile_%s=%g\n", buf,
+			    quartile_perc[i]);
+		}
+	    }
+	    else {
+		fprintf(stdout, "1st quartile: %g\n", quartile_25);
+		if (stats[z].n % 2)
+		    fprintf(stdout, "median (odd number of cells): %g\n", median);
+		else
+		    fprintf(stdout, "median (even number of cells): %g\n",
+			    median);
+		fprintf(stdout, "3rd quartile: %g\n", quartile_75);
 
-	    quartile_25 = stats->dcell_array[qpos_25];
-	    if (stats->n % 2)	/* odd */
-		median = stats->dcell_array[(int)(stats->n / 2)];
-	    else		/* even */
-		median =
-		    (stats->dcell_array[stats->n / 2 - 1] +
-		     stats->dcell_array[stats->n / 2]) / 2.0;
-	    quartile_75 = stats->dcell_array[qpos_75];
-	    for (i = 0; i < stats->n_perc; i++) {
-		quartile_perc[i] = stats->dcell_array[qpos_perc[i]];
+
+		for (i = 0; i < stats[z].n_perc; i++) {
+		    if (stats[z].perc[i] == (int)stats[z].perc[i]) {
+			/* percentile is an exact integer */
+			if ((int)stats[z].perc[i] % 10 == 1 && (int)stats[z].perc[i] != 11)
+			    fprintf(stdout, "%dst percentile: %g\n", (int)stats[z].perc[i],
+				    quartile_perc[i]);
+			else if ((int)stats[z].perc[i] % 10 == 2 && (int)stats[z].perc[i] != 12)
+			    fprintf(stdout, "%dnd percentile: %g\n", (int)stats[z].perc[i],
+				    quartile_perc[i]);
+			else if ((int)stats[z].perc[i] % 10 == 3 && (int)stats[z].perc[i] != 13)
+			    fprintf(stdout, "%drd percentile: %g\n", (int)stats[z].perc[i],
+				    quartile_perc[i]);
+			else
+			    fprintf(stdout, "%dth percentile: %g\n", (int)stats[z].perc[i],
+				    quartile_perc[i]);
+		    }
+		    else {
+			/* percentile is not an exact integer */
+			fprintf(stdout, "%.15g percentile: %g\n", stats[z].perc[i],
+				quartile_perc[i]);
+		    }
+		}
 	    }
-	    break;
-
-	default:
-	    break;
+	    G_free((void *)quartile_perc);
+	    G_free((void *)qpos_perc);
 	}
 
-	if (param.shell_style->answer) {
-	    fprintf(stdout, "first_quartile=%g\n", quartile_25);
-	    fprintf(stdout, "median=%g\n", median);
-	    fprintf(stdout, "third_quartile=%g\n", quartile_75);
-	    for (i = 0; i < stats->n_perc; i++) {
+	/* G_message() prints to stderr not stdout: disabled. this \n is printed above with zone */
+	/* if (!(param.shell_style->answer))
+	    G_message("\n"); */
+    }
+
+    return 1;
+}
+
+int print_stats_table(univar_stat * stats)
+{
+    unsigned int i;
+    int z, n_zones = zone_info.n_zones;
+
+    if (n_zones == 0)
+	n_zones = 1;
+
+    /* print column headers */
+
+    if (zone_info.n_zones) {
+	fprintf(stdout, "zone%s", zone_info.sep);
+	fprintf(stdout, "label%s", zone_info.sep);
+    }
+    fprintf(stdout, "non_null_cells%s", zone_info.sep);
+    fprintf(stdout, "null_cells%s", zone_info.sep);
+    fprintf(stdout, "min%s", zone_info.sep);
+    fprintf(stdout, "max%s", zone_info.sep);
+    fprintf(stdout, "range%s", zone_info.sep);
+    fprintf(stdout, "mean%s", zone_info.sep);
+    fprintf(stdout, "mean_of_abs%s", zone_info.sep);
+    fprintf(stdout, "stddev%s", zone_info.sep);
+    fprintf(stdout, "variance%s", zone_info.sep);
+    fprintf(stdout, "coeff_var%s", zone_info.sep);
+    fprintf(stdout, "sum%s", zone_info.sep);
+    fprintf(stdout, "sum_abs");
+
+    if (param.extended->answer) {
+	fprintf(stdout, "%sfirst_quart", zone_info.sep);
+	fprintf(stdout, "%smedian", zone_info.sep);
+	fprintf(stdout, "%sthird_quart", zone_info.sep);
+	for (i = 0; i < stats[0].n_perc; i++) {
+
+	    if (stats[0].perc[i] == (int)stats[0].perc[i]) {
+		/* percentile is an exact integer */
+		fprintf(stdout, "%sperc_%d", zone_info.sep, (int)stats[0].perc[i]);
+	    }
+	    else {
+		/* percentile is not an exact integer */
 		char buf[24];
-		sprintf(buf, "%.15g", stats->perc[i]);
+		sprintf(buf, "%.15g", stats[0].perc[i]);
 		G_strchg(buf, '.', '_');
-		fprintf(stdout, "percentile_%s=%g\n", buf, quartile_perc[i]);
+		fprintf(stdout, "%sperc_%s", zone_info.sep, buf);
 	    }
 	}
-	else {
-	    fprintf(stdout, "1st quartile: %g\n", quartile_25);
-	    if (stats->n % 2)
-		fprintf(stdout, "median (odd number of cells): %g\n", median);
-	    else
-		fprintf(stdout, "median (even number of cells): %g\n",
-			median);
-	    fprintf(stdout, "3rd quartile: %g\n", quartile_75);
+    }
+    fprintf(stdout, "\n");
 
+    /* print stats */
 
-	    for (i = 0; i < stats->n_perc; i++) {
-		if (stats->perc[i] == (int)stats->perc[i]) {
-		    /* percentile is an exact integer */
-		    if ((int)stats->perc[i] % 10 == 1 && (int)stats->perc[i] != 11)
-			fprintf(stdout, "%dst percentile: %g\n", (int)stats->perc[i],
-				quartile_perc[i]);
-		    else if ((int)stats->perc[i] % 10 == 2 && (int)stats->perc[i] != 12)
-			fprintf(stdout, "%dnd percentile: %g\n", (int)stats->perc[i],
-				quartile_perc[i]);
-		    else if ((int)stats->perc[i] % 10 == 3 && (int)stats->perc[i] != 13)
-			fprintf(stdout, "%drd percentile: %g\n", (int)stats->perc[i],
-				quartile_perc[i]);
-		    else
-			fprintf(stdout, "%dth percentile: %g\n", (int)stats->perc[i],
-				quartile_perc[i]);
+    for (z = 0; z < n_zones; z++) {
+	char sum_str[100];
+	double mean, variance, stdev, var_coef;
+
+	/*for extendet stats */
+	double quartile_25 = 0.0, quartile_75 = 0.0, *quartile_perc;
+	double median = 0.0;
+	int qpos_25, qpos_75, *qpos_perc;
+
+	/* stats collected for this zone? */
+	if (stats[z].n == 0)
+	    continue;
+
+	i = 0;
+
+	/* all these calculations get promoted to doubles, so any DIV0 becomes nan */
+	mean = stats[z].sum / stats[z].n;
+	variance = (stats[z].sumsq - stats[z].sum * stats[z].sum / stats[z].n) / stats[z].n;
+	if (variance < GRASS_EPSILON)
+	    variance = 0.0;
+	stdev = sqrt(variance);
+	var_coef = (stdev / mean) * 100.;	/* perhaps stdev/fabs(mean) ? */
+
+	if (zone_info.n_zones) {
+	    int z_cat = z + zone_info.min;
+	    /* zone number */
+	    fprintf(stdout, "%d%s", z + zone_info.min, zone_info.sep);
+	    /* zone label */
+	    fprintf(stdout,"%s%s", Rast_get_c_cat(&z_cat, &(zone_info.cats)), zone_info.sep);
+	}
+
+	/* non-null cells cells */
+	fprintf(stdout, "%d%s", stats[z].n, zone_info.sep);
+	/* null cells */
+	fprintf(stdout, "%d%s", stats[z].size - stats[z].n, zone_info.sep);
+	/* min */
+	fprintf(stdout, "%.15g%s", stats[z].min, zone_info.sep);
+	/* max */
+	fprintf(stdout, "%.15g%s", stats[z].max, zone_info.sep);
+	/* range */
+	fprintf(stdout, "%.15g%s", stats[z].max - stats[z].min, zone_info.sep);
+	/* mean */
+	fprintf(stdout, "%.15g%s", mean, zone_info.sep);
+	/* mean of abs */
+	fprintf(stdout, "%.15g%s", stats[z].sum_abs / stats[z].n, zone_info.sep);
+	/* stddev */
+	fprintf(stdout, "%.15g%s", stdev, zone_info.sep);
+	/* variance */
+	fprintf(stdout, "%.15g%s", variance, zone_info.sep);
+	/* coefficient of variance */
+	fprintf(stdout, "%.15g%s", var_coef, zone_info.sep);
+	/* sum */
+	sprintf(sum_str, "%.10f", stats[z].sum);
+	G_trim_decimal(sum_str);
+	fprintf(stdout, "%s%s", sum_str, zone_info.sep);
+	/* absolute sum */
+	sprintf(sum_str, "%.10f", stats[z].sum_abs);
+	G_trim_decimal(sum_str);
+	fprintf(stdout, "%s", sum_str);
+
+	/* TODO: mode, skewness, kurtosis */
+	if (param.extended->answer) {
+	    qpos_perc = (int *)G_calloc(stats[z].n_perc, sizeof(int));
+	    quartile_perc = (double *)G_calloc(stats[z].n_perc, sizeof(double));
+	    for (i = 0; i < stats[z].n_perc; i++) {
+		qpos_perc[i] = (int)(stats[z].n * 1e-2 * stats[z].perc[i] - 0.5);
+	    }
+	    qpos_25 = (int)(stats[z].n * 0.25 - 0.5);
+	    qpos_75 = (int)(stats[z].n * 0.75 - 0.5);
+
+	    switch (stats[z].map_type) {
+	    case CELL_TYPE:
+		heapsort_int(stats[z].cell_array, stats[z].n);
+
+		quartile_25 = (double)stats[z].cell_array[qpos_25];
+		if (stats[z].n % 2)	/* odd */
+		    median = (double)stats[z].cell_array[(int)(stats[z].n / 2)];
+		else		/* even */
+		    median =
+			(double)(stats[z].cell_array[stats[z].n / 2 - 1] +
+				 stats[z].cell_array[stats[z].n / 2]) / 2.0;
+		quartile_75 = (double)stats[z].cell_array[qpos_75];
+		for (i = 0; i < stats[z].n_perc; i++) {
+		    quartile_perc[i] = (double)stats[z].cell_array[qpos_perc[i]];
 		}
-		else {
-		    /* percentile is not an exact integer */
-/*
-		    char buf[24], suffix[3];
-		    sprintf(buf, "%.15g", stats->perc[i]);
-		    if (buf[strlen(buf)-1] == '1')
-			strcpy(suffix, "st");
-		    else if (buf[strlen(buf)-1] == '2')
-			strcpy(suffix, "nd");
-		    else if (buf[strlen(buf)-1] == '3')
-			strcpy(suffix, "rd");
-		    else
-			strcpy(suffix, "th");
-*/
-		    fprintf(stdout, "%.15g percentile: %g\n", stats->perc[i],
-			    quartile_perc[i]);
+		break;
+
+	    case FCELL_TYPE:
+		heapsort_float(stats[z].fcell_array, stats[z].n);
+
+		quartile_25 = (double)stats[z].fcell_array[qpos_25];
+		if (stats[z].n % 2)	/* odd */
+		    median = (double)stats[z].fcell_array[(int)(stats[z].n / 2)];
+		else		/* even */
+		    median =
+			(double)(stats[z].fcell_array[stats[z].n / 2 - 1] +
+				 stats[z].fcell_array[stats[z].n / 2]) / 2.0;
+		quartile_75 = (double)stats[z].fcell_array[qpos_75];
+		for (i = 0; i < stats[z].n_perc; i++) {
+		    quartile_perc[i] = (double)stats[z].fcell_array[qpos_perc[i]];
 		}
+		break;
+
+	    case DCELL_TYPE:
+		heapsort_double(stats[z].dcell_array, stats[z].n);
+
+		quartile_25 = stats[z].dcell_array[qpos_25];
+		if (stats[z].n % 2)	/* odd */
+		    median = stats[z].dcell_array[(int)(stats[z].n / 2)];
+		else		/* even */
+		    median =
+			(stats[z].dcell_array[stats[z].n / 2 - 1] +
+			 stats[z].dcell_array[stats[z].n / 2]) / 2.0;
+		quartile_75 = stats[z].dcell_array[qpos_75];
+		for (i = 0; i < stats[z].n_perc; i++) {
+		    quartile_perc[i] = stats[z].dcell_array[qpos_perc[i]];
+		}
+		break;
+
+	    default:
+		break;
 	    }
+
+	    /* first quartile */
+	    fprintf(stdout, "%s%g", zone_info.sep, quartile_25);
+	    /* median */
+	    fprintf(stdout, "%s%g", zone_info.sep, median);
+	    /* third quartile */
+	    fprintf(stdout, "%s%g", zone_info.sep, quartile_75);
+	    /* percentiles */
+	    for (i = 0; i < stats[z].n_perc; i++) {
+		fprintf(stdout, "%s%g", zone_info.sep , 
+			quartile_perc[i]);
+	    }
+
+	    G_free((void *)quartile_perc);
+	    G_free((void *)qpos_perc);
 	}
-	G_free((void *)quartile_perc);
-	G_free((void *)qpos_perc);
+
+	fprintf(stdout, "\n");
+
+	/* zone z finished */
+
     }
 
-    if (!(param.shell_style->answer))
-	G_message("\n");
-
     return 1;
 }



More information about the grass-commit mailing list