[GRASS-SVN] r70394 - grass/branches/releasebranch_7_2/raster/r.stats.quantile

svn_grass at osgeo.org svn_grass at osgeo.org
Wed Jan 18 07:56:07 PST 2017


Author: mmetz
Date: 2017-01-18 07:56:07 -0800 (Wed, 18 Jan 2017)
New Revision: 70394

Modified:
   grass/branches/releasebranch_7_2/raster/r.stats.quantile/main.c
   grass/branches/releasebranch_7_2/raster/r.stats.quantile/r.stats.quantile.html
Log:
r.stats.quantile: backport from trunk r69776, 69777, 69782

Modified: grass/branches/releasebranch_7_2/raster/r.stats.quantile/main.c
===================================================================
--- grass/branches/releasebranch_7_2/raster/r.stats.quantile/main.c	2017-01-17 22:32:30 UTC (rev 70393)
+++ grass/branches/releasebranch_7_2/raster/r.stats.quantile/main.c	2017-01-18 15:56:07 UTC (rev 70394)
@@ -2,7 +2,8 @@
 /****************************************************************************
  *
  * MODULE:       r.stats.quantile
- * AUTHOR(S):    Glynn Clements <glynn gclements.plus.com> (original contributor),
+ * AUTHOR(S):    Glynn Clements <glynn gclements.plus.com> (original contributor)
+ *               Markus Metz: dynamic bins to reduce memory consumptions
  * PURPOSE:      Compute category or object oriented quantiles using two passes
  *
  *               This program is free software under the GNU General Public
@@ -18,12 +19,9 @@
 #include <grass/glocale.h>
 #include <grass/spawn.h>
 
-#define MAX_CATS 1000
-
 struct bin
 {
     unsigned long origin;
-    DCELL min, max;
     int base, count;
 };
 
@@ -32,6 +30,8 @@
     unsigned int *slots;
     unsigned long total;
     int num_values;
+    DCELL min, max, slot_size;
+    int num_slots;
     unsigned char *slot_bins;
     int num_bins;
     struct bin *bins;
@@ -43,7 +43,6 @@
 static DCELL *quants;
 static DCELL min, max;
 static int num_slots;
-static DCELL slot_size;
 
 static int rows, cols;
 
@@ -51,19 +50,27 @@
 static int num_cats;
 static struct basecat *basecats;
 
-static inline int get_slot(DCELL c)
+static inline int get_slot(struct basecat *bc, DCELL c)
 {
-    int i = (int)floor((c - min) / slot_size);
+    int i;
 
+    if (bc->num_slots == 0)
+	return -1;
+
+    i = (int)floor((c - bc->min) / bc->slot_size);
+
     if (i < 0)
 	i = 0;
-    if (i > num_slots - 1)
-	i = num_slots - 1;
+    if (i > bc->num_slots - 1)
+	i = bc->num_slots - 1;
     return i;
 }
 
 static inline double get_quantile(struct basecat *bc, int n)
 {
+    if (n >= num_quants)
+	return (double)bc->total + bc->total;
+
     return (double)bc->total * quants[n];
 }
 
@@ -71,35 +78,91 @@
 {
     CELL *basebuf = Rast_allocate_c_buf();
     DCELL *coverbuf = Rast_allocate_d_buf();
+    struct basecat *bc;
     int row, col;
+    int i;
+    int allnull;
 
     G_message(_("Computing histograms"));
 
+    for (i = 0; i < num_cats; i++) {
+	bc = &basecats[i];
+	bc->min = max;
+	bc->max = min;
+    }
+
+    allnull = 1;
     for (row = 0; row < rows; row++) {
+	G_percent(row, rows, 2);
+
 	Rast_get_c_row(basefile, basebuf, row);
 	Rast_get_d_row(coverfile, coverbuf, row);
 
 	for (col = 0; col < cols; col++) {
-	    struct basecat *bc;
-	    int i;
-
 	    if (Rast_is_c_null_value(&basebuf[col]))
 		continue;
 
 	    if (Rast_is_d_null_value(&coverbuf[col]))
 		continue;
 
-	    i = get_slot(coverbuf[col]);
+	    allnull = 0;
+
 	    bc = &basecats[basebuf[col] - cmin];
 
-	    bc->slots[i]++;
 	    bc->total++;
+
+	    if (bc->min > coverbuf[col])
+		bc->min = coverbuf[col];
+	    if (bc->max < coverbuf[col])
+		bc->max = coverbuf[col];
 	}
+    }
+    G_percent(rows, rows, 2);
 
+    if (allnull)
+	G_fatal_error(_("No cells found where both base and cover are not NULL"));
+
+    for (i = 0; i < num_cats; i++) {
+	bc = &basecats[i];
+
+	bc->num_slots = 0;
+	bc->slot_size = 0;
+
+	if (bc->max <= bc->min)
+	    continue;
+
+	bc->num_slots = 1;
+	if (bc->total * 10 > (unsigned long) num_slots)
+	    bc->num_slots = num_slots;
+
+	bc->slots = G_calloc(bc->num_slots, sizeof(unsigned int));
+	bc->slot_size = (bc->max - bc->min) / bc->num_slots;
+    }
+
+    for (row = 0; row < rows; row++) {
 	G_percent(row, rows, 2);
+
+	Rast_get_c_row(basefile, basebuf, row);
+	Rast_get_d_row(coverfile, coverbuf, row);
+
+	for (col = 0; col < cols; col++) {
+	    if (Rast_is_c_null_value(&basebuf[col]))
+		continue;
+
+	    if (Rast_is_d_null_value(&coverbuf[col]))
+		continue;
+
+	    bc = &basecats[basebuf[col] - cmin];
+
+	    if (bc->num_slots == 0)
+		continue;
+
+	    i = get_slot(bc, coverbuf[col]);
+	    bc->slots[i]++;
+	}
     }
+    G_percent(rows, rows, 2);
 
-    G_percent(rows, rows, 2);
     G_free(basebuf);
     G_free(coverbuf);
 }
@@ -119,16 +182,20 @@
 	unsigned long accum = 0;
 	int quant = 0;
 
+	if (bc->num_slots == 0)
+	    continue;
+
 	bc->bins = G_calloc(num_quants, sizeof(struct bin));
-	bc->slot_bins = G_calloc(num_slots, sizeof(unsigned char));
+	bc->slot_bins = G_calloc(bc->num_slots, sizeof(unsigned char));
 
 	next = get_quantile(bc, quant);
 
-	for (slot = 0; slot < num_slots; slot++) {
+	for (slot = 0; slot < bc->num_slots; slot++) {
 	    unsigned int count = bc->slots[slot];
 	    unsigned long accum2 = accum + count;
 
-	    if (accum2 > next) {
+	    if (accum2 > next ||
+	        (slot == bc->num_slots - 1 && accum2 == next)) {
 		struct bin *b = &bc->bins[bin];
 
 		bc->slot_bins[slot] = ++bin;
@@ -136,8 +203,6 @@
 		b->origin = accum;
 		b->base = num_values;
 		b->count = 0;
-		b->min = min + slot_size * slot;
-		b->max = min + slot_size * (slot + 1);
 
 		while (accum2 > next)
 		    next = get_quantile(bc, ++quant);
@@ -180,8 +245,12 @@
 	    if (Rast_is_d_null_value(&coverbuf[col]))
 		continue;
 
-	    i = get_slot(coverbuf[col]);
 	    bc = &basecats[basebuf[col] - cmin];
+
+	    if (bc->num_slots == 0)
+		continue;
+
+	    i = get_slot(bc, coverbuf[col]);
 	    if (!bc->slot_bins[i])
 		continue;
 
@@ -193,8 +262,8 @@
 
 	G_percent(row, rows, 2);
     }
+    G_percent(rows, rows, 2);
 
-    G_percent(rows, rows, 2);
     G_free(basebuf);
     G_free(coverbuf);
 }
@@ -221,6 +290,9 @@
 	struct basecat *bc = &basecats[cat];
 	int bin;
 
+	if (bc->num_slots == 0)
+	    continue;
+
 	G_free(bc->slot_bins);
 
 	for (bin = 0; bin < bc->num_bins; bin++) {
@@ -235,19 +307,50 @@
     G_percent(cat, num_cats, 2);
 }
 
-static void print_quantiles(void)
+static void print_quantiles(char *fs, char *name, int table_frmt)
 {
-    int cat;
+    int cat, quant;
+    struct basecat *bc;
 
     G_message(_("Printing quantiles"));
 
-    for (cat = 0; cat < num_cats; cat++) {
-	struct basecat *bc = &basecats[cat];
-	int quant;
+    if (name != NULL && strcmp(name, "-") != 0) {
+	if (NULL == freopen(name, "w", stdout)) {
+	    G_fatal_error(_("Unable to open file <%s> for writing"), name);
+	}
+    }
 
+    if (!table_frmt) {
+	for (cat = 0; cat < num_cats; cat++) {
+	    bc = &basecats[cat];
+
+	    if (bc->total == 0)
+		continue;
+
+	    for (quant = 0; quant < num_quants; quant++)
+		fprintf(stdout, "%d%s%d%s%f%s%f\n", cmin + cat, fs, quant, fs,
+		       100 * quants[quant], fs, bc->quants[quant]);
+	}
+    }
+    else {
+	fprintf(stdout, "cat");
 	for (quant = 0; quant < num_quants; quant++)
-	    printf("%d:%d:%f:%f\n", cmin + cat, quant, 100 * quants[quant], bc->quants[quant]);
+	    fprintf(stdout, "%s%f", fs, 100 * quants[quant]);
+	fprintf(stdout, "\n");
+
+	for (cat = 0; cat < num_cats; cat++) {
+	    bc = &basecats[cat];
+
+	    if (bc->total == 0)
+		continue;
+
+	    fprintf(stdout, "%d", cmin + cat);
+	    for (quant = 0; quant < num_quants; quant++)
+		fprintf(stdout, "%s%f", fs, bc->quants[quant]);
+	    fprintf(stdout, "\n");
+	}
     }
+
 }
 
 static void compute_quantiles(void)
@@ -258,31 +361,43 @@
 
     for (cat = 0; cat < num_cats; cat++) {
 	struct basecat *bc = &basecats[cat];
-	struct bin *b = &bc->bins[0];
 	int quant;
 
+	if (bc->max < bc->min)
+	    continue;
+
 	bc->quants = G_malloc(num_quants * sizeof(DCELL));
 
-	for (quant = 0; quant < num_quants; quant++) {
-	    double next = get_quantile(bc, quant);
-	    double k, v;
-	    int i0, i1;
+	if (bc->max == bc->min) {
+	    for (quant = 0; quant < num_quants; quant++)
+		bc->quants[quant] = bc->min;
+	}
+	else {
+	    struct bin *b = &bc->bins[0];
 
-	    while (b->origin + b->count < next)
-		b++;
+	    for (quant = 0; quant < num_quants; quant++) {
+		double next = get_quantile(bc, quant);
+		double k, v;
+		int i0, i1;
 
-	    k = next - b->origin;
-	    i0 = (int)floor(k);
-	    i1 = (int)ceil(k);
+		while (b->origin + b->count < next)
+		    b++;
 
-	    if (i1 > b->count - 1)
-		i1 = b->count - 1;
+		k = next - b->origin;
+		i0 = (int)floor(k);
+		i1 = (int)ceil(k);
 
-	    v = (i0 == i1)
-		? bc->values[b->base + i0]
-		: bc->values[b->base + i0] * (i1 - k) + bc->values[b->base + i1] * (k - i0);
+		if (i0 > b->count - 1)
+		    i0 = b->count - 1;
+		if (i1 > b->count - 1)
+		    i1 = b->count - 1;
 
-	    bc->quants[quant] = v;
+		v = (i0 == i1)
+		    ? bc->values[b->base + i0]
+		    : bc->values[b->base + i0] * (i1 - k) + bc->values[b->base + i1] * (k - i0);
+
+		bc->quants[quant] = v;
+	    }
 	}
     }
 }
@@ -311,8 +426,10 @@
 	if (!fp)
 	    G_fatal_error(_("Unable to open temporary file"));
 
-	for (cat = 0; cat < num_cats; cat++)
-	    fprintf(fp, "%d = %d %f\n", cmin + cat, cmin + cat, basecats[cat].quants[quant]);
+	for (cat = 0; cat < num_cats; cat++) {
+	    if (basecats[cat].total > 0)
+		fprintf(fp, "%d = %d %f\n", cmin + cat, cmin + cat, basecats[cat].quants[quant]);
+	}
 
 	fclose(fp);
 
@@ -351,6 +468,8 @@
 	    for (col = 0; col < cols; col++)
 		if (Rast_is_c_null_value(&base_buf[col]))
 		    Rast_set_d_null_value(&out_buf[col], 1);
+		else if (basecats[base_buf[col] - cmin].total == 0)
+		    Rast_set_d_null_value(&out_buf[col], 1);
 		else
 		    out_buf[col] = basecats[base_buf[col] - cmin].quants[quant];
 
@@ -377,13 +496,14 @@
     struct GModule *module;
     struct
     {
-	struct Option *quant, *perc, *slots, *basemap, *covermap, *output;
+	struct Option *quant, *perc, *slots, *basemap, *covermap,
+	              *output, *file, *fs;
     } opt;
     struct {
-	struct Flag *r, *p;
+	struct Flag *r, *p, *t;
     } flag;
     const char *basemap, *covermap;
-    char **outputs;
+    char **outputs, *fs;
     int reclass, print;
     int cover_fd, base_fd;
     struct Range range;
@@ -428,6 +548,16 @@
     opt.output->required = NO;
     opt.output->multiple = YES;
 
+    opt.file = G_define_standard_option(G_OPT_F_OUTPUT);
+    opt.file->key = "file";
+    opt.file->required = NO;
+    opt.file->description =
+	_("Name for output file (if omitted or \"-\" output to stdout)");
+
+    opt.fs = G_define_standard_option(G_OPT_F_SEP);
+    opt.fs->answer = ":";
+    opt.fs->guisection = _("Formatting");
+
     flag.r = G_define_flag();
     flag.r->key = 'r';
     flag.r->description =
@@ -438,6 +568,11 @@
     flag.p->description =
 	_("Do not create output maps; just print statistics");
 
+    flag.t = G_define_flag();
+    flag.t->key = 't';
+    flag.t->description =
+	_("Print statistics in table format");
+
     if (G_parser(argc, argv))
 	exit(EXIT_FAILURE);
 
@@ -445,7 +580,7 @@
     covermap = opt.covermap->answer;
     outputs = opt.output->answers;
     reclass = flag.r->answer;
-    print = flag.p->answer;
+    print = flag.p->answer || flag.t->answer;
 
     if (!print && !opt.output->answers)
 	G_fatal_error(_("Either -%c or %s= must be given"),
@@ -493,33 +628,29 @@
 
     Rast_get_range_min_max(&range, &cmin, &cmax);
     num_cats = cmax - cmin + 1;
-    if (num_cats > MAX_CATS)
-	G_fatal_error(_("Base map <%s> has too many categories (max: %d)"),
-		      basemap, MAX_CATS);
+    if (num_cats > 100000)
+	G_warning(_("Base map <%s> has many categories (%d), computation might be slow and might need a lot of memory"),
+		  basemap, num_cats);
 
     Rast_read_fp_range(covermap, "", &fprange);
     Rast_get_fp_range_min_max(&fprange, &min, &max);
-    slot_size = (max - min) / num_slots;
 
     basecats = G_calloc(num_cats, sizeof(struct basecat));
-
-    for (i = 0; i < num_cats; i++)
-	basecats[i].slots = G_calloc(num_slots, sizeof(unsigned int));
-
     rows = Rast_window_rows();
     cols = Rast_window_cols();
 
     get_slot_counts(base_fd, cover_fd);
-
     initialize_bins();
     fill_bins(base_fd, cover_fd);
-
-
     sort_bins();
     compute_quantiles();
 
-    if (print)
-	print_quantiles();
+    if (print) {
+	/* get field separator */
+	fs = G_option_to_separator(opt.fs);
+	
+	print_quantiles(fs, opt.file->answer, flag.t->answer);
+    }
     else if (reclass)
 	do_reclass(basemap, outputs);
     else

Modified: grass/branches/releasebranch_7_2/raster/r.stats.quantile/r.stats.quantile.html
===================================================================
--- grass/branches/releasebranch_7_2/raster/r.stats.quantile/r.stats.quantile.html	2017-01-17 22:32:30 UTC (rev 70393)
+++ grass/branches/releasebranch_7_2/raster/r.stats.quantile/r.stats.quantile.html	2017-01-18 15:56:07 UTC (rev 70394)
@@ -24,7 +24,8 @@
 
 <h2>AUTHOR</h2>
 
-Glynn Clements
+Glynn Clements<br>
+Markus Metz
 
 <p>
 <i>Last changed: $Date$</i>



More information about the grass-commit mailing list