[GRASS-SVN] r69776 - grass/trunk/raster/r.stats.quantile

svn_grass at osgeo.org svn_grass at osgeo.org
Sat Nov 5 14:24:41 PDT 2016


Author: mmetz
Date: 2016-11-05 14:24:41 -0700 (Sat, 05 Nov 2016)
New Revision: 69776

Modified:
   grass/trunk/raster/r.stats.quantile/main.c
Log:
r.stats.quantile: fix memory violations, optimize for a large number of base categories

Modified: grass/trunk/raster/r.stats.quantile/main.c
===================================================================
--- grass/trunk/raster/r.stats.quantile/main.c	2016-11-05 21:21:47 UTC (rev 69775)
+++ grass/trunk/raster/r.stats.quantile/main.c	2016-11-05 21:24:41 UTC (rev 69776)
@@ -2,7 +2,8 @@
 /****************************************************************************
  *
  * MODULE:       r.stats.quantile
- * AUTHOR(S):    Glynn Clements <glynn gclements.plus.com> (original contributor),
+ * AUTHOR(S):    Glynn Clements <glynn gclements.plus.com> (original contributor)
+ *               Markus Metz: dynamic bins to reduce memory consumptions
  * PURPOSE:      Compute category or object oriented quantiles using two passes
  *
  *               This program is free software under the GNU General Public
@@ -18,12 +19,9 @@
 #include <grass/glocale.h>
 #include <grass/spawn.h>
 
-#define MAX_CATS 1000
-
 struct bin
 {
     unsigned long origin;
-    DCELL min, max;
     int base, count;
 };
 
@@ -32,6 +30,8 @@
     unsigned int *slots;
     unsigned long total;
     int num_values;
+    DCELL min, max, slot_size;
+    int num_slots;
     unsigned char *slot_bins;
     int num_bins;
     struct bin *bins;
@@ -43,7 +43,6 @@
 static DCELL *quants;
 static DCELL min, max;
 static int num_slots;
-static DCELL slot_size;
 
 static int rows, cols;
 
@@ -51,19 +50,27 @@
 static int num_cats;
 static struct basecat *basecats;
 
-static inline int get_slot(DCELL c)
+static inline int get_slot(struct basecat *bc, DCELL c)
 {
-    int i = (int)floor((c - min) / slot_size);
+    int i;
 
+    if (bc->num_slots == 0)
+	return -1;
+
+    i = (int)floor((c - bc->min) / bc->slot_size);
+
     if (i < 0)
 	i = 0;
-    if (i > num_slots - 1)
-	i = num_slots - 1;
+    if (i > bc->num_slots - 1)
+	i = bc->num_slots - 1;
     return i;
 }
 
 static inline double get_quantile(struct basecat *bc, int n)
 {
+    if (n >= num_quants)
+	return (double)bc->total + bc->total;
+
     return (double)bc->total * quants[n];
 }
 
@@ -71,35 +78,91 @@
 {
     CELL *basebuf = Rast_allocate_c_buf();
     DCELL *coverbuf = Rast_allocate_d_buf();
+    struct basecat *bc;
     int row, col;
+    int i;
+    int allnull;
 
     G_message(_("Computing histograms"));
 
+    for (i = 0; i < num_cats; i++) {
+	bc = &basecats[i];
+	bc->min = max;
+	bc->max = min;
+    }
+
+    allnull = 1;
     for (row = 0; row < rows; row++) {
+	G_percent(row, rows, 2);
+
 	Rast_get_c_row(basefile, basebuf, row);
 	Rast_get_d_row(coverfile, coverbuf, row);
 
 	for (col = 0; col < cols; col++) {
-	    struct basecat *bc;
-	    int i;
-
 	    if (Rast_is_c_null_value(&basebuf[col]))
 		continue;
 
 	    if (Rast_is_d_null_value(&coverbuf[col]))
 		continue;
 
-	    i = get_slot(coverbuf[col]);
+	    allnull = 0;
+
 	    bc = &basecats[basebuf[col] - cmin];
 
-	    bc->slots[i]++;
 	    bc->total++;
+
+	    if (bc->min > coverbuf[col])
+		bc->min = coverbuf[col];
+	    if (bc->max < coverbuf[col])
+		bc->max = coverbuf[col];
 	}
+    }
+    G_percent(rows, rows, 2);
 
+    if (allnull)
+	G_fatal_error(_("No cells found where both base and cover are not NULL"));
+
+    for (i = 0; i < num_cats; i++) {
+	bc = &basecats[i];
+
+	bc->num_slots = 0;
+	bc->slot_size = 0;
+
+	if (bc->max <= bc->min)
+	    continue;
+
+	bc->num_slots = 1;
+	if (bc->total * sizeof(DCELL) > num_slots * sizeof(struct bin))
+	    bc->num_slots = num_slots;
+
+	bc->slots = G_calloc(bc->num_slots, sizeof(unsigned int));
+	bc->slot_size = (bc->max - bc->min) / bc->num_slots;
+    }
+
+    for (row = 0; row < rows; row++) {
 	G_percent(row, rows, 2);
+
+	Rast_get_c_row(basefile, basebuf, row);
+	Rast_get_d_row(coverfile, coverbuf, row);
+
+	for (col = 0; col < cols; col++) {
+	    if (Rast_is_c_null_value(&basebuf[col]))
+		continue;
+
+	    if (Rast_is_d_null_value(&coverbuf[col]))
+		continue;
+
+	    bc = &basecats[basebuf[col] - cmin];
+
+	    if (bc->num_slots == 0)
+		continue;
+
+	    i = get_slot(bc, coverbuf[col]);
+	    bc->slots[i]++;
+	}
     }
+    G_percent(rows, rows, 2);
 
-    G_percent(rows, rows, 2);
     G_free(basebuf);
     G_free(coverbuf);
 }
@@ -119,16 +182,20 @@
 	unsigned long accum = 0;
 	int quant = 0;
 
+	if (bc->num_slots == 0)
+	    continue;
+
 	bc->bins = G_calloc(num_quants, sizeof(struct bin));
-	bc->slot_bins = G_calloc(num_slots, sizeof(unsigned char));
+	bc->slot_bins = G_calloc(bc->num_slots, sizeof(unsigned char));
 
 	next = get_quantile(bc, quant);
 
-	for (slot = 0; slot < num_slots; slot++) {
+	for (slot = 0; slot < bc->num_slots; slot++) {
 	    unsigned int count = bc->slots[slot];
 	    unsigned long accum2 = accum + count;
 
-	    if (accum2 > next) {
+	    if (accum2 > next ||
+	        (slot == bc->num_slots - 1 && accum2 == next)) {
 		struct bin *b = &bc->bins[bin];
 
 		bc->slot_bins[slot] = ++bin;
@@ -136,8 +203,6 @@
 		b->origin = accum;
 		b->base = num_values;
 		b->count = 0;
-		b->min = min + slot_size * slot;
-		b->max = min + slot_size * (slot + 1);
 
 		while (accum2 > next)
 		    next = get_quantile(bc, ++quant);
@@ -180,8 +245,12 @@
 	    if (Rast_is_d_null_value(&coverbuf[col]))
 		continue;
 
-	    i = get_slot(coverbuf[col]);
 	    bc = &basecats[basebuf[col] - cmin];
+
+	    if (bc->num_slots == 0)
+		continue;
+
+	    i = get_slot(bc, coverbuf[col]);
 	    if (!bc->slot_bins[i])
 		continue;
 
@@ -193,8 +262,8 @@
 
 	G_percent(row, rows, 2);
     }
+    G_percent(rows, rows, 2);
 
-    G_percent(rows, rows, 2);
     G_free(basebuf);
     G_free(coverbuf);
 }
@@ -221,6 +290,9 @@
 	struct basecat *bc = &basecats[cat];
 	int bin;
 
+	if (bc->num_slots == 0)
+	    continue;
+
 	G_free(bc->slot_bins);
 
 	for (bin = 0; bin < bc->num_bins; bin++) {
@@ -245,6 +317,9 @@
 	struct basecat *bc = &basecats[cat];
 	int quant;
 
+	if (bc->total == 0)
+	    continue;
+
 	for (quant = 0; quant < num_quants; quant++)
 	    printf("%d:%d:%f:%f\n", cmin + cat, quant, 100 * quants[quant], bc->quants[quant]);
     }
@@ -258,31 +333,43 @@
 
     for (cat = 0; cat < num_cats; cat++) {
 	struct basecat *bc = &basecats[cat];
-	struct bin *b = &bc->bins[0];
 	int quant;
 
+	if (bc->max < bc->min)
+	    continue;
+
 	bc->quants = G_malloc(num_quants * sizeof(DCELL));
 
-	for (quant = 0; quant < num_quants; quant++) {
-	    double next = get_quantile(bc, quant);
-	    double k, v;
-	    int i0, i1;
+	if (bc->max == bc->min) {
+	    for (quant = 0; quant < num_quants; quant++)
+		bc->quants[quant] = bc->min;
+	}
+	else {
+	    struct bin *b = &bc->bins[0];
 
-	    while (b->origin + b->count < next)
-		b++;
+	    for (quant = 0; quant < num_quants; quant++) {
+		double next = get_quantile(bc, quant);
+		double k, v;
+		int i0, i1;
 
-	    k = next - b->origin;
-	    i0 = (int)floor(k);
-	    i1 = (int)ceil(k);
+		while (b->origin + b->count < next)
+		    b++;
 
-	    if (i1 > b->count - 1)
-		i1 = b->count - 1;
+		k = next - b->origin;
+		i0 = (int)floor(k);
+		i1 = (int)ceil(k);
 
-	    v = (i0 == i1)
-		? bc->values[b->base + i0]
-		: bc->values[b->base + i0] * (i1 - k) + bc->values[b->base + i1] * (k - i0);
+		if (i0 > b->count - 1)
+		    i0 = b->count - 1;
+		if (i1 > b->count - 1)
+		    i1 = b->count - 1;
 
-	    bc->quants[quant] = v;
+		v = (i0 == i1)
+		    ? bc->values[b->base + i0]
+		    : bc->values[b->base + i0] * (i1 - k) + bc->values[b->base + i1] * (k - i0);
+
+		bc->quants[quant] = v;
+	    }
 	}
     }
 }
@@ -311,8 +398,10 @@
 	if (!fp)
 	    G_fatal_error(_("Unable to open temporary file"));
 
-	for (cat = 0; cat < num_cats; cat++)
-	    fprintf(fp, "%d = %d %f\n", cmin + cat, cmin + cat, basecats[cat].quants[quant]);
+	for (cat = 0; cat < num_cats; cat++) {
+	    if (basecats[cat].total > 0)
+		fprintf(fp, "%d = %d %f\n", cmin + cat, cmin + cat, basecats[cat].quants[quant]);
+	}
 
 	fclose(fp);
 
@@ -351,6 +440,8 @@
 	    for (col = 0; col < cols; col++)
 		if (Rast_is_c_null_value(&base_buf[col]))
 		    Rast_set_d_null_value(&out_buf[col], 1);
+		else if (basecats[base_buf[col] - cmin].total == 0)
+		    Rast_set_d_null_value(&out_buf[col], 1);
 		else
 		    out_buf[col] = basecats[base_buf[col] - cmin].quants[quant];
 
@@ -491,28 +582,20 @@
 
     Rast_get_range_min_max(&range, &cmin, &cmax);
     num_cats = cmax - cmin + 1;
-    if (num_cats > MAX_CATS)
-	G_fatal_error(_("Base map <%s> has too many categories (max: %d)"),
-		      basemap, MAX_CATS);
+    if (num_cats > 100000)
+	G_warning(_("Base map <%s> has many categories (%d), computation might be slow and might need a lot of memory"),
+		  basemap, num_cats);
 
     Rast_read_fp_range(covermap, "", &fprange);
     Rast_get_fp_range_min_max(&fprange, &min, &max);
-    slot_size = (max - min) / num_slots;
 
     basecats = G_calloc(num_cats, sizeof(struct basecat));
-
-    for (i = 0; i < num_cats; i++)
-	basecats[i].slots = G_calloc(num_slots, sizeof(unsigned int));
-
     rows = Rast_window_rows();
     cols = Rast_window_cols();
 
     get_slot_counts(base_fd, cover_fd);
-
     initialize_bins();
     fill_bins(base_fd, cover_fd);
-
-
     sort_bins();
     compute_quantiles();
 



More information about the grass-commit mailing list