[GRASS-SVN] r68543 - grass/trunk/raster/r.clump

svn_grass at osgeo.org svn_grass at osgeo.org
Mon May 30 15:01:21 PDT 2016


Author: mmetz
Date: 2016-05-30 15:01:21 -0700 (Mon, 30 May 2016)
New Revision: 68543

Modified:
   grass/trunk/raster/r.clump/clump.c
   grass/trunk/raster/r.clump/local_proto.h
   grass/trunk/raster/r.clump/main.c
   grass/trunk/raster/r.clump/r.clump.html
Log:
r.clump: +multi-band clumping, +fuzzy clumping

Modified: grass/trunk/raster/r.clump/clump.c
===================================================================
--- grass/trunk/raster/r.clump/clump.c	2016-05-30 21:09:41 UTC (rev 68542)
+++ grass/trunk/raster/r.clump/clump.c	2016-05-30 22:01:21 UTC (rev 68543)
@@ -9,7 +9,7 @@
  * PURPOSE:      Recategorizes data in a raster map layer by grouping cells
  *               that form physically discrete areas into unique categories.
  *
- * COPYRIGHT:    (C) 2006-2014 by the GRASS Development Team
+ * COPYRIGHT:    (C) 2006-2016 by the GRASS Development Team
  *
  *               This program is free software under the GNU General Public
  *               License (>=v2). Read the file COPYING that comes with GRASS
@@ -131,7 +131,7 @@
 			cur_clump[col] = *temp_clump;
 			if (OLD == 0) {
 			    OLD = *temp_clump;
-			    }
+			}
 			else {
 			    NEW = *temp_clump;
 			    break;
@@ -186,8 +186,7 @@
 	    }
 
 	    /* right of previous row from col + 1 to ncols */
-	    temp_clump = prev_clump;
-	    temp_clump += col;
+	    temp_clump = prev_clump + col;
 	    n = ncols - col;
 	    while (n-- > 0) {
 		temp_clump++;	/* skip col */
@@ -293,6 +292,367 @@
     return 0;
 }
 
+static int cmp_cells(DCELL **a, int acol, DCELL **b, int bcol,
+                     DCELL *rng, int n, double threshold2)
+{
+    int i;
+    double diff, diff2;
+
+    diff2 = 0;
+    for (i = 0; i < n; i++) {
+	if (Rast_is_d_null_value(&b[i][bcol]))
+	    return 0;
+	diff = a[i][acol] - b[i][bcol];
+	/* normalize with the band's range */
+	if (rng[i])
+	    diff /= rng[i];
+	diff2 += diff * diff;
+    }
+    /* normalize difference to the range [0, 1] */
+    diff2 /= n;
+    
+    return (diff2 <= threshold2);
+}
+
+CELL clump_n(int *in_fd, char **inname, int nin, double threshold, int out_fd, int diag, int print)
+{
+    register int col;
+    register int i, n;
+    /* input */
+    DCELL **prev_in, **cur_in, **temp_in;
+    int bcol;
+    DCELL *rng, maxdiff;
+    double thresh2;
+    /* output */
+    CELL OLD, NEW;
+    CELL *temp_clump, *out_cell, *out_cellp;
+    CELL *prev_clump, *cur_clump;
+    CELL *index, *renumber;
+    CELL label;
+    int nrows, ncols;
+    int row;
+    int isnull;
+    int len;
+    int nalloc;
+    long cur_time;
+    char *cname;
+    int cfd, csize;
+    CELL cat;
+
+    G_message(_("%d-band clumping with threshold %g"), nin, threshold);
+
+    nrows = Rast_window_rows();
+    ncols = Rast_window_cols();
+
+    thresh2 = threshold * threshold;
+
+    /* allocate clump index */
+    nalloc = INCR;
+    index = (CELL *) G_malloc(nalloc * sizeof(CELL));
+    index[0] = 0;
+    renumber = NULL;
+
+    /* allocate DCELL buffers two columns larger than current window */
+    len = (ncols + 2) * sizeof(DCELL);
+    prev_in = (DCELL **) G_malloc(sizeof(DCELL *) * nin);
+    cur_in = (DCELL **) G_malloc(sizeof(DCELL *) * nin);
+    rng = G_malloc(sizeof(DCELL) * nin);
+
+    maxdiff = 0;
+    for (i = 0; i < nin; i++) {
+	struct FPRange fp_range;	/* min/max values of each input raster */
+	DCELL min, max;
+
+	if (Rast_read_fp_range(inname[i], "", &fp_range) != 1)
+	    G_fatal_error(_("No min/max found in raster map <%s>"),
+			  inname[i]);
+	Rast_get_fp_range_min_max(&fp_range, &min, &max);
+	rng[i] = max - min;
+	maxdiff += rng[i] * rng[i];
+
+	prev_in[i] = (DCELL *) G_malloc(len);
+	cur_in[i] = (DCELL *) G_malloc(len);
+
+	/* fake a previous row which is all NULL */
+	Rast_set_d_null_value(prev_in[i], ncols + 2);
+
+	/* set left and right edge to NULL */
+	Rast_set_d_null_value(&cur_in[i][0], 1);
+	Rast_set_d_null_value(&cur_in[i][ncols + 1], 1);
+    }
+    G_debug(1, "maximum possible difference: %g", maxdiff);
+
+    /* allocate CELL buffers two columns larger than current window */
+    len = (ncols + 2) * sizeof(CELL);
+    prev_clump = (CELL *) G_malloc(len);
+    cur_clump = (CELL *) G_malloc(len);
+    out_cell = (CELL *) G_malloc(len);
+
+    /* temp file for initial clump IDs */
+    cname = G_tempfile();
+    if ((cfd = open(cname, O_RDWR | O_CREAT | O_EXCL, 0600)) < 0)
+	G_fatal_error(_("Unable to open temp file"));
+    csize = ncols * sizeof(CELL);
+
+    time(&cur_time);
+
+    /* initialize clump labels */
+    G_zero(cur_clump, len);
+    G_zero(prev_clump, len);
+    label = 0;
+
+    /****************************************************
+     *                      PASS 1                      *
+     * pass thru the input, create initial clump labels *
+     ****************************************************/
+
+    G_message(_("Pass 1 of 2..."));
+    for (row = 0; row < nrows; row++) {
+	G_percent(row, nrows, 2);
+	for (i = 0; i < nin; i++) {
+	    Rast_get_d_row(in_fd[i], cur_in[i] + 1, row);
+	}
+
+	for (col = 1; col <= ncols; col++) {
+	    isnull = 0;
+	    for (i = 0; i < nin; i++) {
+		if (Rast_is_d_null_value(&cur_in[i][col])) {	/* don't clump NULL data */
+		    cur_clump[col] = 0;
+		    isnull = 1;
+		    break;
+		}
+	    }
+	    if (isnull)
+		continue;
+
+	    /*
+	     * if the cell values are different to the left and above
+	     * (diagonal: and above left and above right)
+	     * then we must start a new clump
+	     *
+	     * this new clump may eventually collide with another
+	     * clump and will have to be merged
+	     */
+
+	    /* try to connect the current cell to an existing clump */
+	    OLD = NEW = 0;
+	    /* same clump as to the left */
+	    if (cmp_cells(cur_in, col, cur_in, col - 1, rng, nin, thresh2)) {
+		OLD = cur_clump[col] = cur_clump[col - 1];
+	    }
+
+	    if (diag) {
+		/* check above right, center, left, in that order */
+		temp_clump = prev_clump + col + 1;
+		bcol = col + 1;
+		do {
+		    if (cmp_cells(cur_in, col, prev_in, bcol, rng, nin, thresh2)) {
+			cur_clump[col] = *temp_clump;
+			if (OLD == 0) {
+			    OLD = *temp_clump;
+			}
+			else {
+			    NEW = *temp_clump;
+
+			    /* threshold > 0 and diagonal requires a bit of extra work
+			     * because of bridge cells:
+			     * A similar to B, B similar to C, but A not similar to C
+			     * -> B is bridge cell */
+			    if (NEW != OLD) {
+				CELL *temp_clump2;
+
+				/* conflict! preserve NEW clump ID and change OLD clump ID.
+				 * Must go back to the left in the current row and to the right
+				 * in the previous row to change all the clump values as well.
+				 */
+
+				/* left of the current row from 1 to col - 1 */
+				temp_clump2 = cur_clump;
+				n = col - 1;
+				while (n-- > 0) {
+				    temp_clump2++;	/* skip left edge */
+				    if (*temp_clump2 == OLD)
+					*temp_clump2 = NEW;
+				}
+
+				/* right of previous row from col - 1 to ncols */
+				temp_clump2 = prev_clump + col - 1;
+				n = ncols - col + 2;
+				while (n-- > 0) {
+				    if (*temp_clump2 == OLD)
+					*temp_clump2 = NEW;
+				    temp_clump2++;
+				}
+
+				/* modify the OLD index */
+				index[OLD] = NEW;
+
+				OLD = NEW;
+				NEW = 0;
+			    }
+			}
+		    }
+		    temp_clump--;
+		} while (bcol-- > col - 1);
+	    }
+	    else {
+		/* check above */
+		if (cmp_cells(cur_in, col, prev_in, col, rng, nin, thresh2)) {
+		    temp_clump = prev_clump + col;
+		    cur_clump[col] = *temp_clump;
+		    if (OLD == 0) {
+			OLD = *temp_clump;
+			}
+		    else {
+			NEW = *temp_clump;
+			if (NEW != OLD) {
+
+			    /* conflict! preserve NEW clump ID and change OLD clump ID.
+			     * Must go back to the left in the current row and to the right
+			     * in the previous row to change all the clump values as well.
+			     */
+
+			    /* left of the current row from 1 to col - 1 */
+			    temp_clump = cur_clump;
+			    n = col - 1;
+			    while (n-- > 0) {
+				temp_clump++;	/* skip left edge */
+				if (*temp_clump == OLD)
+				    *temp_clump = NEW;
+			    }
+
+			    /* right of previous row from col + 1 to ncols */
+			    temp_clump = prev_clump + col;
+			    n = ncols - col;
+			    while (n-- > 0) {
+				temp_clump++;	/* skip col */
+				if (*temp_clump == OLD)
+				    *temp_clump = NEW;
+			    }
+
+			    /* modify the OLD index */
+			    index[OLD] = NEW;
+
+			    OLD = NEW;
+			    NEW = 0;
+			}
+		    }
+		}
+	    }
+
+	    if (NEW == 0 || OLD == NEW) {	/* ok */
+		if (OLD == 0) {
+		    /* start a new clump */
+		    label++;
+		    cur_clump[col] = label;
+		    if (label >= nalloc) {
+			nalloc += INCR;
+			index =
+			    (CELL *) G_realloc(index,
+					       nalloc * sizeof(CELL));
+		    }
+		    index[label] = label;
+		}
+	    }
+	    /* else the relabelling above failed */
+	}
+
+	/* write initial clump IDs */
+	/* this works also with writing out cur_clump, but only 
+	 * prev_clump is complete and will not change any more */
+	if (row > 0) {
+	    if (write(cfd, prev_clump + 1, csize) != csize)
+		G_fatal_error(_("Unable to write to temp file"));
+	}
+
+	/* switch the buffers so that the current buffer becomes the previous */
+	temp_in = cur_in;
+	cur_in = prev_in;
+	prev_in = temp_in;
+
+	temp_clump = cur_clump;
+	cur_clump = prev_clump;
+	prev_clump = temp_clump;
+    }
+    /* write last row with initial clump IDs */
+    if (write(cfd, prev_clump + 1, csize) != csize)
+	G_fatal_error(_("Unable to write to temp file"));
+    G_percent(1, 1, 1);
+
+    /* generate a renumbering scheme */
+    G_message(_("Generating renumbering scheme..."));
+    G_debug(1, "%d initial labels", label);
+    /* allocate final clump ID */
+    renumber = (CELL *) G_malloc((label + 1) * sizeof(CELL));
+    renumber[0] = 0;
+    cat = 1;
+    G_percent(0, label, 1);
+    for (n = 1; n <= label; n++) {
+	G_percent(n, label, 1);
+	OLD = n;
+	NEW = index[n];
+	if (OLD != NEW) {
+	    renumber[n] = 0;
+	    /* find valid clump ID */
+	    while (OLD != NEW) {
+		OLD = NEW;
+		NEW = index[OLD];
+		if (NEW == n)
+		    G_fatal_error("Circular relabelling for %d", n);
+	    }
+	    index[n] = NEW;
+	}
+	else
+	    /* set final clump id */
+	    renumber[n] = cat++;
+    }
+    
+    /* rewind temp file */
+    lseek(cfd, 0, SEEK_SET);
+
+    if (print) {
+	fprintf(stdout, "clumps=%d\n", cat - 1);
+    }
+    else {
+	/****************************************************
+	 *                      PASS 2                      *
+	 * apply renumbering scheme to initial clump labels *
+	 ****************************************************/
+
+	/* the input raster is no longer needed, 
+	 * using instead the temp file with initial clump labels */
+
+	G_message(_("Pass 2 of 2..."));
+	for (row = 0; row < nrows; row++) {
+
+	    G_percent(row, nrows, 2);
+	
+	    if (read(cfd, cur_clump, csize) != csize)
+		G_fatal_error(_("Unable to read from temp file"));
+
+	    temp_clump = cur_clump;
+	    out_cellp = out_cell;
+
+	    for (col = 0; col < ncols; col++) {
+		*out_cellp = renumber[index[*temp_clump]];
+		if (*out_cellp == 0)
+		    Rast_set_c_null_value(out_cellp, 1);
+		temp_clump++;
+		out_cellp++;
+	    }
+	    Rast_put_row(out_fd, out_cell, CELL_TYPE);
+	}
+	G_percent(1, 1, 1);
+    }
+
+    close(cfd);
+    unlink(cname);
+
+    print_time(&cur_time);
+
+    return 0;
+}
+
 int print_time(long *start)
 {
     int hours, minutes, seconds;

Modified: grass/trunk/raster/r.clump/local_proto.h
===================================================================
--- grass/trunk/raster/r.clump/local_proto.h	2016-05-30 21:09:41 UTC (rev 68542)
+++ grass/trunk/raster/r.clump/local_proto.h	2016-05-30 22:01:21 UTC (rev 68543)
@@ -21,6 +21,7 @@
 
 /* clump.c */
 CELL clump(int, int, int, int);
+CELL clump_n(int *, char **, int, double, int, int, int);
 int print_time(long *);
 
 /* main.c */

Modified: grass/trunk/raster/r.clump/main.c
===================================================================
--- grass/trunk/raster/r.clump/main.c	2016-05-30 21:09:41 UTC (rev 68542)
+++ grass/trunk/raster/r.clump/main.c	2016-05-30 22:01:21 UTC (rev 68543)
@@ -9,7 +9,7 @@
  * PURPOSE:      Recategorizes data in a raster map layer by grouping cells
  *               that form physically discrete areas into unique categories.
  *
- * COPYRIGHT:    (C) 2006-2014 by the GRASS Development Team
+ * COPYRIGHT:    (C) 2006-2016 by the GRASS Development Team
  *
  *               This program is free software under the GNU General Public
  *               License (>=v2). Read the file COPYING that comes with GRASS
@@ -31,7 +31,9 @@
     struct History hist;
     CELL min, max;
     int range_return, n_clumps;
-    int in_fd, out_fd;
+    int *in_fd, out_fd;
+    int i, n;
+    double threshold;
     char title[512];
     char name[GNAME_MAX];
     char *OUTPUT;
@@ -39,6 +41,7 @@
     struct GModule *module;
     struct Option *opt_in;
     struct Option *opt_out;
+    struct Option *opt_thresh;
     struct Option *opt_title;
     struct Flag *flag_diag;
     struct Flag *flag_print;
@@ -56,7 +59,7 @@
 	_("Recategorizes data in a raster map by grouping cells "
 	  "that form physically discrete areas into unique categories.");
 
-    opt_in = G_define_standard_option(G_OPT_R_INPUT);
+    opt_in = G_define_standard_option(G_OPT_R_INPUTS);
 
     opt_out = G_define_standard_option(G_OPT_R_OUTPUT);
     opt_out->required = NO;
@@ -67,6 +70,14 @@
     opt_title->required = NO;
     opt_title->description = _("Title for output raster map");
 
+    opt_thresh = G_define_option();
+    opt_thresh->key = "threshold";
+    opt_thresh->type = TYPE_DOUBLE;
+    opt_thresh->required = NO;
+    opt_thresh->answer = "0";
+    opt_thresh->label = _("Threshold to identify similar cells");
+    opt_thresh->description = _("Valid range: 0 = identical to < 1 = maximal difference");
+
     flag_diag = G_define_flag();
     flag_diag->key = 'd';
     flag_diag->label = _("Clump also diagonal cells");
@@ -83,19 +94,37 @@
     if (G_parser(argc, argv))
 	exit(EXIT_FAILURE);
 
-    INPUT = opt_in->answer;
+    threshold = atof(opt_thresh->answer);
+    if (threshold < 0 || threshold >= 1)
+	G_fatal_error(_("Valid range for option <%s> is 0 <= value < 1"),
+	              opt_thresh->key);
+
+    n = 0;
+    while (opt_in->answers[n])
+	n++;
+
+    in_fd = G_malloc(sizeof(int) * n);
+
+    for (i = 0; i < n; i++)
+	in_fd[i] = Rast_open_old(opt_in->answers[i], "");
+
+    INPUT = opt_in->answers[0];
     strcpy(name, INPUT);
 
-    in_fd = Rast_open_old(name, "");
-
+    OUTPUT = NULL;
+    out_fd = -1;
     if (!flag_print->answer) {
 	OUTPUT = opt_out->answer;
 	out_fd = Rast_open_c_new(OUTPUT);
     }
 
-    clump(in_fd, out_fd, flag_diag->answer, flag_print->answer);
+    if (n == 1 && threshold == 0)
+	clump(in_fd[0], out_fd, flag_diag->answer, flag_print->answer);
+    else
+	clump_n(in_fd, opt_in->answers, n, threshold, out_fd, flag_diag->answer, flag_print->answer);
 
-    Rast_close(in_fd);
+    for (i = 0; i < n; i++)
+	Rast_close(in_fd[i]);
 
     if (!flag_print->answer) {
 	Rast_close(out_fd);

Modified: grass/trunk/raster/r.clump/r.clump.html
===================================================================
--- grass/trunk/raster/r.clump/r.clump.html	2016-05-30 21:09:41 UTC (rev 68542)
+++ grass/trunk/raster/r.clump/r.clump.html	2016-05-30 22:01:21 UTC (rev 68543)
@@ -1,9 +1,9 @@
 <h2>DESCRIPTION</h2>
 
-<em>r.clump</em> finds all areas of contiguous cell category values in
-the input raster map. NULL values in the input are ignored. It assigns 
-a unique category value to each such area ("clump") in the 
-resulting output raster map.
+<em>r.clump</em> finds all areas of contiguous cell category values 
+(connected components) in the input raster map. NULL values in the 
+input are ignored. It assigns a unique category value to each such area 
+("clump") in the resulting output raster map.
 
 <p>
 Category distinctions in the input raster map are preserved.  This
@@ -13,6 +13,25 @@
 to <em>r.clump</em> to recategorize cells and reassign cell category
 values.
 
+<p>
+<em>r.clump</em> can also perform "fuzzy" clumping where 
+neighboring cells that are not identical but similar to each other are 
+clumped together. Here, the spectral distance between two cells is 
+scaled to the range [0, 1] and compared to the <em>threshold</em> 
+value. Cells are clumped together if their spectral distance is ≤ 
+<em>threshold</em>. The result is very sensitive to this 
+<em>threshold</em> value, a recommended start value is 0.01, then 
+increasing or decreasing this value according to the desired output.
+
+<p>
+<em>r.clump</em> can also use multiple raster maps of any kind (CELL, 
+FCELL, DCELL) as input. In this case, the spectral distance between 
+cells is used to determine the similarity of two cells. This means that 
+input maps must be metric: the difference cell 1 - cell 2 must make 
+sense. Categorical maps, e.g. land cover, can not be used in this case. 
+Examples for valid inpat maps are satellite imagery, vegetation 
+indices, elevation, climatic parameters etc.
+
 <h2>NOTES</h2>
 
 By default, the resulting clumps are connected only by their four 
@@ -50,6 +69,13 @@
 r.report lakes_individual units=h
 </pre></div>
 
+<p>
+Perform fuzzy clumping on Landsat 7 2002 imagery (North Carolina sample dataset)
+<div class="code"><pre>
+r.clump in=lsat7_2002_10,lsat7_2002_20,lsat7_2002_30,lsat7_2002_40,lsat7_2002_50,lsat7_2002_70 \
+        out=lsat7_2002_clump_4 threshold=0.045
+</pre></div>
+
 <h2>SEE ALSO</h2>
 
 <em>



More information about the grass-commit mailing list