[GRASS-SVN] r68543 - grass/trunk/raster/r.clump
svn_grass at osgeo.org
svn_grass at osgeo.org
Mon May 30 15:01:21 PDT 2016
Author: mmetz
Date: 2016-05-30 15:01:21 -0700 (Mon, 30 May 2016)
New Revision: 68543
Modified:
grass/trunk/raster/r.clump/clump.c
grass/trunk/raster/r.clump/local_proto.h
grass/trunk/raster/r.clump/main.c
grass/trunk/raster/r.clump/r.clump.html
Log:
r.clump: +multi-band clumping, +fuzzy clumping
Modified: grass/trunk/raster/r.clump/clump.c
===================================================================
--- grass/trunk/raster/r.clump/clump.c 2016-05-30 21:09:41 UTC (rev 68542)
+++ grass/trunk/raster/r.clump/clump.c 2016-05-30 22:01:21 UTC (rev 68543)
@@ -9,7 +9,7 @@
* PURPOSE: Recategorizes data in a raster map layer by grouping cells
* that form physically discrete areas into unique categories.
*
- * COPYRIGHT: (C) 2006-2014 by the GRASS Development Team
+ * COPYRIGHT: (C) 2006-2016 by the GRASS Development Team
*
* This program is free software under the GNU General Public
* License (>=v2). Read the file COPYING that comes with GRASS
@@ -131,7 +131,7 @@
cur_clump[col] = *temp_clump;
if (OLD == 0) {
OLD = *temp_clump;
- }
+ }
else {
NEW = *temp_clump;
break;
@@ -186,8 +186,7 @@
}
/* right of previous row from col + 1 to ncols */
- temp_clump = prev_clump;
- temp_clump += col;
+ temp_clump = prev_clump + col;
n = ncols - col;
while (n-- > 0) {
temp_clump++; /* skip col */
@@ -293,6 +292,367 @@
return 0;
}
+static int cmp_cells(DCELL **a, int acol, DCELL **b, int bcol,
+ DCELL *rng, int n, double threshold2)
+{
+ int i;
+ double diff, diff2;
+
+ diff2 = 0;
+ for (i = 0; i < n; i++) {
+ if (Rast_is_d_null_value(&b[i][bcol]))
+ return 0;
+ diff = a[i][acol] - b[i][bcol];
+ /* normalize with the band's range */
+ if (rng[i])
+ diff /= rng[i];
+ diff2 += diff * diff;
+ }
+ /* normalize difference to the range [0, 1] */
+ diff2 /= n;
+
+ return (diff2 <= threshold2);
+}
+
+CELL clump_n(int *in_fd, char **inname, int nin, double threshold, int out_fd, int diag, int print)
+{
+ register int col;
+ register int i, n;
+ /* input */
+ DCELL **prev_in, **cur_in, **temp_in;
+ int bcol;
+ DCELL *rng, maxdiff;
+ double thresh2;
+ /* output */
+ CELL OLD, NEW;
+ CELL *temp_clump, *out_cell, *out_cellp;
+ CELL *prev_clump, *cur_clump;
+ CELL *index, *renumber;
+ CELL label;
+ int nrows, ncols;
+ int row;
+ int isnull;
+ int len;
+ int nalloc;
+ long cur_time;
+ char *cname;
+ int cfd, csize;
+ CELL cat;
+
+ G_message(_("%d-band clumping with threshold %g"), nin, threshold);
+
+ nrows = Rast_window_rows();
+ ncols = Rast_window_cols();
+
+ thresh2 = threshold * threshold;
+
+ /* allocate clump index */
+ nalloc = INCR;
+ index = (CELL *) G_malloc(nalloc * sizeof(CELL));
+ index[0] = 0;
+ renumber = NULL;
+
+ /* allocate DCELL buffers two columns larger than current window */
+ len = (ncols + 2) * sizeof(DCELL);
+ prev_in = (DCELL **) G_malloc(sizeof(DCELL *) * nin);
+ cur_in = (DCELL **) G_malloc(sizeof(DCELL *) * nin);
+ rng = G_malloc(sizeof(DCELL) * nin);
+
+ maxdiff = 0;
+ for (i = 0; i < nin; i++) {
+ struct FPRange fp_range; /* min/max values of each input raster */
+ DCELL min, max;
+
+ if (Rast_read_fp_range(inname[i], "", &fp_range) != 1)
+ G_fatal_error(_("No min/max found in raster map <%s>"),
+ inname[i]);
+ Rast_get_fp_range_min_max(&fp_range, &min, &max);
+ rng[i] = max - min;
+ maxdiff += rng[i] * rng[i];
+
+ prev_in[i] = (DCELL *) G_malloc(len);
+ cur_in[i] = (DCELL *) G_malloc(len);
+
+ /* fake a previous row which is all NULL */
+ Rast_set_d_null_value(prev_in[i], ncols + 2);
+
+ /* set left and right edge to NULL */
+ Rast_set_d_null_value(&cur_in[i][0], 1);
+ Rast_set_d_null_value(&cur_in[i][ncols + 1], 1);
+ }
+ G_debug(1, "maximum possible difference: %g", maxdiff);
+
+ /* allocate CELL buffers two columns larger than current window */
+ len = (ncols + 2) * sizeof(CELL);
+ prev_clump = (CELL *) G_malloc(len);
+ cur_clump = (CELL *) G_malloc(len);
+ out_cell = (CELL *) G_malloc(len);
+
+ /* temp file for initial clump IDs */
+ cname = G_tempfile();
+ if ((cfd = open(cname, O_RDWR | O_CREAT | O_EXCL, 0600)) < 0)
+ G_fatal_error(_("Unable to open temp file"));
+ csize = ncols * sizeof(CELL);
+
+ time(&cur_time);
+
+ /* initialize clump labels */
+ G_zero(cur_clump, len);
+ G_zero(prev_clump, len);
+ label = 0;
+
+ /****************************************************
+ * PASS 1 *
+ * pass thru the input, create initial clump labels *
+ ****************************************************/
+
+ G_message(_("Pass 1 of 2..."));
+ for (row = 0; row < nrows; row++) {
+ G_percent(row, nrows, 2);
+ for (i = 0; i < nin; i++) {
+ Rast_get_d_row(in_fd[i], cur_in[i] + 1, row);
+ }
+
+ for (col = 1; col <= ncols; col++) {
+ isnull = 0;
+ for (i = 0; i < nin; i++) {
+ if (Rast_is_d_null_value(&cur_in[i][col])) { /* don't clump NULL data */
+ cur_clump[col] = 0;
+ isnull = 1;
+ break;
+ }
+ }
+ if (isnull)
+ continue;
+
+ /*
+ * if the cell values are different to the left and above
+ * (diagonal: and above left and above right)
+ * then we must start a new clump
+ *
+ * this new clump may eventually collide with another
+ * clump and will have to be merged
+ */
+
+ /* try to connect the current cell to an existing clump */
+ OLD = NEW = 0;
+ /* same clump as to the left */
+ if (cmp_cells(cur_in, col, cur_in, col - 1, rng, nin, thresh2)) {
+ OLD = cur_clump[col] = cur_clump[col - 1];
+ }
+
+ if (diag) {
+ /* check above right, center, left, in that order */
+ temp_clump = prev_clump + col + 1;
+ bcol = col + 1;
+ do {
+ if (cmp_cells(cur_in, col, prev_in, bcol, rng, nin, thresh2)) {
+ cur_clump[col] = *temp_clump;
+ if (OLD == 0) {
+ OLD = *temp_clump;
+ }
+ else {
+ NEW = *temp_clump;
+
+ /* threshold > 0 and diagonal requires a bit of extra work
+ * because of bridge cells:
+ * A similar to B, B similar to C, but A not similar to C
+ * -> B is bridge cell */
+ if (NEW != OLD) {
+ CELL *temp_clump2;
+
+ /* conflict! preserve NEW clump ID and change OLD clump ID.
+ * Must go back to the left in the current row and to the right
+ * in the previous row to change all the clump values as well.
+ */
+
+ /* left of the current row from 1 to col - 1 */
+ temp_clump2 = cur_clump;
+ n = col - 1;
+ while (n-- > 0) {
+ temp_clump2++; /* skip left edge */
+ if (*temp_clump2 == OLD)
+ *temp_clump2 = NEW;
+ }
+
+ /* right of previous row from col - 1 to ncols */
+ temp_clump2 = prev_clump + col - 1;
+ n = ncols - col + 2;
+ while (n-- > 0) {
+ if (*temp_clump2 == OLD)
+ *temp_clump2 = NEW;
+ temp_clump2++;
+ }
+
+ /* modify the OLD index */
+ index[OLD] = NEW;
+
+ OLD = NEW;
+ NEW = 0;
+ }
+ }
+ }
+ temp_clump--;
+ } while (bcol-- > col - 1);
+ }
+ else {
+ /* check above */
+ if (cmp_cells(cur_in, col, prev_in, col, rng, nin, thresh2)) {
+ temp_clump = prev_clump + col;
+ cur_clump[col] = *temp_clump;
+ if (OLD == 0) {
+ OLD = *temp_clump;
+ }
+ else {
+ NEW = *temp_clump;
+ if (NEW != OLD) {
+
+ /* conflict! preserve NEW clump ID and change OLD clump ID.
+ * Must go back to the left in the current row and to the right
+ * in the previous row to change all the clump values as well.
+ */
+
+ /* left of the current row from 1 to col - 1 */
+ temp_clump = cur_clump;
+ n = col - 1;
+ while (n-- > 0) {
+ temp_clump++; /* skip left edge */
+ if (*temp_clump == OLD)
+ *temp_clump = NEW;
+ }
+
+ /* right of previous row from col + 1 to ncols */
+ temp_clump = prev_clump + col;
+ n = ncols - col;
+ while (n-- > 0) {
+ temp_clump++; /* skip col */
+ if (*temp_clump == OLD)
+ *temp_clump = NEW;
+ }
+
+ /* modify the OLD index */
+ index[OLD] = NEW;
+
+ OLD = NEW;
+ NEW = 0;
+ }
+ }
+ }
+ }
+
+ if (NEW == 0 || OLD == NEW) { /* ok */
+ if (OLD == 0) {
+ /* start a new clump */
+ label++;
+ cur_clump[col] = label;
+ if (label >= nalloc) {
+ nalloc += INCR;
+ index =
+ (CELL *) G_realloc(index,
+ nalloc * sizeof(CELL));
+ }
+ index[label] = label;
+ }
+ }
+ /* else the relabelling above failed */
+ }
+
+ /* write initial clump IDs */
+ /* this works also with writing out cur_clump, but only
+ * prev_clump is complete and will not change any more */
+ if (row > 0) {
+ if (write(cfd, prev_clump + 1, csize) != csize)
+ G_fatal_error(_("Unable to write to temp file"));
+ }
+
+ /* switch the buffers so that the current buffer becomes the previous */
+ temp_in = cur_in;
+ cur_in = prev_in;
+ prev_in = temp_in;
+
+ temp_clump = cur_clump;
+ cur_clump = prev_clump;
+ prev_clump = temp_clump;
+ }
+ /* write last row with initial clump IDs */
+ if (write(cfd, prev_clump + 1, csize) != csize)
+ G_fatal_error(_("Unable to write to temp file"));
+ G_percent(1, 1, 1);
+
+ /* generate a renumbering scheme */
+ G_message(_("Generating renumbering scheme..."));
+ G_debug(1, "%d initial labels", label);
+ /* allocate final clump ID */
+ renumber = (CELL *) G_malloc((label + 1) * sizeof(CELL));
+ renumber[0] = 0;
+ cat = 1;
+ G_percent(0, label, 1);
+ for (n = 1; n <= label; n++) {
+ G_percent(n, label, 1);
+ OLD = n;
+ NEW = index[n];
+ if (OLD != NEW) {
+ renumber[n] = 0;
+ /* find valid clump ID */
+ while (OLD != NEW) {
+ OLD = NEW;
+ NEW = index[OLD];
+ if (NEW == n)
+ G_fatal_error("Circular relabelling for %d", n);
+ }
+ index[n] = NEW;
+ }
+ else
+ /* set final clump id */
+ renumber[n] = cat++;
+ }
+
+ /* rewind temp file */
+ lseek(cfd, 0, SEEK_SET);
+
+ if (print) {
+ fprintf(stdout, "clumps=%d\n", cat - 1);
+ }
+ else {
+ /****************************************************
+ * PASS 2 *
+ * apply renumbering scheme to initial clump labels *
+ ****************************************************/
+
+ /* the input raster is no longer needed,
+ * using instead the temp file with initial clump labels */
+
+ G_message(_("Pass 2 of 2..."));
+ for (row = 0; row < nrows; row++) {
+
+ G_percent(row, nrows, 2);
+
+ if (read(cfd, cur_clump, csize) != csize)
+ G_fatal_error(_("Unable to read from temp file"));
+
+ temp_clump = cur_clump;
+ out_cellp = out_cell;
+
+ for (col = 0; col < ncols; col++) {
+ *out_cellp = renumber[index[*temp_clump]];
+ if (*out_cellp == 0)
+ Rast_set_c_null_value(out_cellp, 1);
+ temp_clump++;
+ out_cellp++;
+ }
+ Rast_put_row(out_fd, out_cell, CELL_TYPE);
+ }
+ G_percent(1, 1, 1);
+ }
+
+ close(cfd);
+ unlink(cname);
+
+ print_time(&cur_time);
+
+ return 0;
+}
+
int print_time(long *start)
{
int hours, minutes, seconds;
Modified: grass/trunk/raster/r.clump/local_proto.h
===================================================================
--- grass/trunk/raster/r.clump/local_proto.h 2016-05-30 21:09:41 UTC (rev 68542)
+++ grass/trunk/raster/r.clump/local_proto.h 2016-05-30 22:01:21 UTC (rev 68543)
@@ -21,6 +21,7 @@
/* clump.c */
CELL clump(int, int, int, int);
+CELL clump_n(int *, char **, int, double, int, int, int);
int print_time(long *);
/* main.c */
Modified: grass/trunk/raster/r.clump/main.c
===================================================================
--- grass/trunk/raster/r.clump/main.c 2016-05-30 21:09:41 UTC (rev 68542)
+++ grass/trunk/raster/r.clump/main.c 2016-05-30 22:01:21 UTC (rev 68543)
@@ -9,7 +9,7 @@
* PURPOSE: Recategorizes data in a raster map layer by grouping cells
* that form physically discrete areas into unique categories.
*
- * COPYRIGHT: (C) 2006-2014 by the GRASS Development Team
+ * COPYRIGHT: (C) 2006-2016 by the GRASS Development Team
*
* This program is free software under the GNU General Public
* License (>=v2). Read the file COPYING that comes with GRASS
@@ -31,7 +31,9 @@
struct History hist;
CELL min, max;
int range_return, n_clumps;
- int in_fd, out_fd;
+ int *in_fd, out_fd;
+ int i, n;
+ double threshold;
char title[512];
char name[GNAME_MAX];
char *OUTPUT;
@@ -39,6 +41,7 @@
struct GModule *module;
struct Option *opt_in;
struct Option *opt_out;
+ struct Option *opt_thresh;
struct Option *opt_title;
struct Flag *flag_diag;
struct Flag *flag_print;
@@ -56,7 +59,7 @@
_("Recategorizes data in a raster map by grouping cells "
"that form physically discrete areas into unique categories.");
- opt_in = G_define_standard_option(G_OPT_R_INPUT);
+ opt_in = G_define_standard_option(G_OPT_R_INPUTS);
opt_out = G_define_standard_option(G_OPT_R_OUTPUT);
opt_out->required = NO;
@@ -67,6 +70,14 @@
opt_title->required = NO;
opt_title->description = _("Title for output raster map");
+ opt_thresh = G_define_option();
+ opt_thresh->key = "threshold";
+ opt_thresh->type = TYPE_DOUBLE;
+ opt_thresh->required = NO;
+ opt_thresh->answer = "0";
+ opt_thresh->label = _("Threshold to identify similar cells");
+ opt_thresh->description = _("Valid range: 0 = identical to < 1 = maximal difference");
+
flag_diag = G_define_flag();
flag_diag->key = 'd';
flag_diag->label = _("Clump also diagonal cells");
@@ -83,19 +94,37 @@
if (G_parser(argc, argv))
exit(EXIT_FAILURE);
- INPUT = opt_in->answer;
+ threshold = atof(opt_thresh->answer);
+ if (threshold < 0 || threshold >= 1)
+ G_fatal_error(_("Valid range for option <%s> is 0 <= value < 1"),
+ opt_thresh->key);
+
+ n = 0;
+ while (opt_in->answers[n])
+ n++;
+
+ in_fd = G_malloc(sizeof(int) * n);
+
+ for (i = 0; i < n; i++)
+ in_fd[i] = Rast_open_old(opt_in->answers[i], "");
+
+ INPUT = opt_in->answers[0];
strcpy(name, INPUT);
- in_fd = Rast_open_old(name, "");
-
+ OUTPUT = NULL;
+ out_fd = -1;
if (!flag_print->answer) {
OUTPUT = opt_out->answer;
out_fd = Rast_open_c_new(OUTPUT);
}
- clump(in_fd, out_fd, flag_diag->answer, flag_print->answer);
+ if (n == 1 && threshold == 0)
+ clump(in_fd[0], out_fd, flag_diag->answer, flag_print->answer);
+ else
+ clump_n(in_fd, opt_in->answers, n, threshold, out_fd, flag_diag->answer, flag_print->answer);
- Rast_close(in_fd);
+ for (i = 0; i < n; i++)
+ Rast_close(in_fd[i]);
if (!flag_print->answer) {
Rast_close(out_fd);
Modified: grass/trunk/raster/r.clump/r.clump.html
===================================================================
--- grass/trunk/raster/r.clump/r.clump.html 2016-05-30 21:09:41 UTC (rev 68542)
+++ grass/trunk/raster/r.clump/r.clump.html 2016-05-30 22:01:21 UTC (rev 68543)
@@ -1,9 +1,9 @@
<h2>DESCRIPTION</h2>
-<em>r.clump</em> finds all areas of contiguous cell category values in
-the input raster map. NULL values in the input are ignored. It assigns
-a unique category value to each such area ("clump") in the
-resulting output raster map.
+<em>r.clump</em> finds all areas of contiguous cell category values
+(connected components) in the input raster map. NULL values in the
+input are ignored. It assigns a unique category value to each such area
+("clump") in the resulting output raster map.
<p>
Category distinctions in the input raster map are preserved. This
@@ -13,6 +13,25 @@
to <em>r.clump</em> to recategorize cells and reassign cell category
values.
+<p>
+<em>r.clump</em> can also perform "fuzzy" clumping where
+neighboring cells that are not identical but similar to each other are
+clumped together. Here, the spectral distance between two cells is
+scaled to the range [0, 1] and compared to the <em>threshold</em>
+value. Cells are clumped together if their spectral distance is ≤
+<em>threshold</em>. The result is very sensitive to this
+<em>threshold</em> value, a recommended start value is 0.01, then
+increasing or decreasing this value according to the desired output.
+
+<p>
+<em>r.clump</em> can also use multiple raster maps of any kind (CELL,
+FCELL, DCELL) as input. In this case, the spectral distance between
+cells is used to determine the similarity of two cells. This means that
+input maps must be metric: the difference cell 1 - cell 2 must make
+sense. Categorical maps, e.g. land cover, can not be used in this case.
+Examples for valid inpat maps are satellite imagery, vegetation
+indices, elevation, climatic parameters etc.
+
<h2>NOTES</h2>
By default, the resulting clumps are connected only by their four
@@ -50,6 +69,13 @@
r.report lakes_individual units=h
</pre></div>
+<p>
+Perform fuzzy clumping on Landsat 7 2002 imagery (North Carolina sample dataset)
+<div class="code"><pre>
+r.clump in=lsat7_2002_10,lsat7_2002_20,lsat7_2002_30,lsat7_2002_40,lsat7_2002_50,lsat7_2002_70 \
+ out=lsat7_2002_clump_4 threshold=0.045
+</pre></div>
+
<h2>SEE ALSO</h2>
<em>
More information about the grass-commit
mailing list