[GRASS-SVN] r68968 - grass/trunk/raster/r.clump

svn_grass at osgeo.org svn_grass at osgeo.org
Thu Jul 14 13:48:02 PDT 2016


Author: mmetz
Date: 2016-07-14 13:48:02 -0700 (Thu, 14 Jul 2016)
New Revision: 68968

Modified:
   grass/trunk/raster/r.clump/Makefile
   grass/trunk/raster/r.clump/clump.c
   grass/trunk/raster/r.clump/local_proto.h
   grass/trunk/raster/r.clump/main.c
Log:
r.clump: +minimum size of clumps

Modified: grass/trunk/raster/r.clump/Makefile
===================================================================
--- grass/trunk/raster/r.clump/Makefile	2016-07-14 19:56:35 UTC (rev 68967)
+++ grass/trunk/raster/r.clump/Makefile	2016-07-14 20:48:02 UTC (rev 68968)
@@ -2,7 +2,7 @@
 
 PGM = r.clump
 
-LIBES = $(RASTERLIB) $(GISLIB)
+LIBES = $(RASTERLIB) $(GISLIB) $(BTREE2LIB)
 DEPENDENCIES = $(RASTERDEP) $(GISDEP)
 
 include $(MODULE_TOPDIR)/include/Make/Module.make

Modified: grass/trunk/raster/r.clump/clump.c
===================================================================
--- grass/trunk/raster/r.clump/clump.c	2016-07-14 19:56:35 UTC (rev 68967)
+++ grass/trunk/raster/r.clump/clump.c	2016-07-14 20:48:02 UTC (rev 68968)
@@ -29,16 +29,159 @@
 
 #define INCR 1024
 
-CELL clump(int in_fd, int out_fd, int diag, int print)
+static CELL do_renumber(int *in_fd, DCELL *rng, int nin,
+                        int diag, int minsize, 
+			int cfd, CELL label, CELL *index, int out_fd)
 {
+    int row, col, nrows, ncols;
+    int n;
+    CELL OLD, NEW;
+    CELL *temp_cell, *temp_clump;
+    CELL *cur_clump, *out_cell;
+    CELL *clumpid;
+    CELL cat;
+    int csize;
+
+    nrows = Rast_window_rows();
+    ncols = Rast_window_cols();
+
+    csize = ncols * sizeof(CELL);
+
+    /* generate a renumbering scheme */
+    G_message(_("Generating renumbering scheme..."));
+    G_debug(1, "%d initial labels", label);
+    /* allocate final clump ID */
+    clumpid = (CELL *) G_malloc((label + 1) * sizeof(CELL));
+    clumpid[0] = 0;
+    cat = 0;
+    G_percent(0, label, 1);
+    for (n = 1; n <= label; n++) {
+	G_percent(n, label, 1);
+	OLD = n;
+	NEW = index[n];
+	if (OLD != NEW) {
+	    clumpid[n] = 0;
+	    /* find valid clump ID */
+	    while (OLD != NEW) {
+		OLD = NEW;
+		NEW = index[OLD];
+	    }
+	    index[n] = NEW;
+	}
+	else
+	    /* set final clump id */
+	    clumpid[n] = ++cat;
+    }
+
+    /****************************************************
+     *                      PASS 2                      *
+     * apply renumbering scheme to initial clump labels *
+     ****************************************************/
+
+    G_message(_("Pass 2 of 2..."));
+
+    if (minsize > 1) {
+	int do_write;
+	off_t coffset;
+	CELL new_clump;
+
+	cur_clump = Rast_allocate_c_buf();
+
+	for (row = 0; row < nrows; row++) {
+
+	    G_percent(row, nrows, 2);
+
+	    coffset = (off_t)row * csize;
+	    lseek(cfd, coffset, SEEK_SET);
+	    if (read(cfd, cur_clump, csize) != csize)
+		G_fatal_error(_("Unable to read from temp file"));
+
+	    temp_clump = cur_clump;
+
+	    do_write = 0;
+	    for (col = 0; col < ncols; col++) {
+		new_clump = clumpid[index[*temp_clump]];
+		if (*temp_clump != new_clump) {
+		    *temp_clump = new_clump;
+		    do_write = 1;
+		}
+		temp_clump++;
+	    }
+	    if (do_write) {
+		lseek(cfd, coffset, SEEK_SET);
+		if (write(cfd, cur_clump, csize) != csize)
+		    G_fatal_error(_("Unable to write to temp file"));
+	    }
+	}
+	G_percent(1, 1, 1);
+
+	G_free(cur_clump);
+	G_free(index);
+	G_free(clumpid);
+
+	G_message(_("%d initial clumps"), cat);
+
+	return merge_small_clumps(in_fd, nin, rng,
+                        diag, minsize, &cat, 
+			cfd, out_fd);
+    }
+
+    if (out_fd < 0) {
+	fprintf(stdout, "clumps=%d\n", cat);
+	
+	return cat;
+    }
+
+    /* the input raster is no longer needed, 
+     * using instead the temp file with initial clump labels */
+
+    /* rewind temp file */
+    lseek(cfd, 0, SEEK_SET);
+
+    cur_clump = Rast_allocate_c_buf();
+    out_cell = Rast_allocate_c_buf();
+
+    G_message(_("Pass 2 of 2..."));
+    for (row = 0; row < nrows; row++) {
+
+	G_percent(row, nrows, 2);
+
+	if (read(cfd, cur_clump, csize) != csize)
+	    G_fatal_error(_("Unable to read from temp file"));
+
+	temp_clump = cur_clump;
+	temp_cell = out_cell;
+
+	for (col = 0; col < ncols; col++) {
+	    *temp_cell = clumpid[index[*temp_clump]];
+	    if (*temp_cell == 0) {
+		Rast_set_c_null_value(temp_cell, 1);
+	    }
+	    temp_clump++;
+	    temp_cell++;
+	}
+	Rast_put_row(out_fd, out_cell, CELL_TYPE);
+    }
+    G_percent(1, 1, 1);
+
+    G_free(cur_clump);
+    G_free(out_cell);
+    G_free(index);
+    G_free(clumpid);
+
+    return cat;
+}
+
+CELL clump(int *in_fd, int out_fd, int diag, int minsize)
+{
     register int col;
     register int n;
     CELL NEW, OLD;
     CELL *temp_cell, *temp_clump;
-    CELL *prev_in, *cur_in, *out_cell;
+    CELL *prev_in, *cur_in;
     CELL *prev_clump, *cur_clump;
     CELL X, LEFT;
-    CELL *index, *renumber;
+    CELL *index;
     CELL label;
     int nrows, ncols;
     int row;
@@ -47,7 +190,6 @@
     long cur_time;
     char *cname;
     int cfd, csize;
-    CELL cat;
 
     nrows = Rast_window_rows();
     ncols = Rast_window_cols();
@@ -56,7 +198,6 @@
     nalloc = INCR;
     index = (CELL *) G_malloc(nalloc * sizeof(CELL));
     index[0] = 0;
-    renumber = NULL;
 
     /* allocate CELL buffers two columns larger than current window */
     len = (ncols + 2) * sizeof(CELL);
@@ -64,7 +205,6 @@
     cur_in = (CELL *) G_malloc(len);
     prev_clump = (CELL *) G_malloc(len);
     cur_clump = (CELL *) G_malloc(len);
-    out_cell = (CELL *) G_malloc(len);
 
     /* temp file for initial clump IDs */
     cname = G_tempfile();
@@ -93,7 +233,7 @@
 
     G_message(_("Pass 1 of 2..."));
     for (row = 0; row < nrows; row++) {
-	Rast_get_c_row(in_fd, cur_in + 1, row);
+	Rast_get_c_row(*in_fd, cur_in + 1, row);
 
 	G_percent(row, nrows, 2);
 	Rast_set_c_null_value(&X, 1);
@@ -220,70 +360,15 @@
 	G_fatal_error(_("Unable to write to temp file"));
     G_percent(1, 1, 1);
 
-    /* generate a renumbering scheme */
-    G_message(_("Generating renumbering scheme..."));
-    G_debug(1, "%d initial labels", label);
-    /* allocate final clump ID */
-    renumber = (CELL *) G_malloc((label + 1) * sizeof(CELL));
-    renumber[0] = 0;
-    cat = 1;
-    G_percent(0, label, 1);
-    for (n = 1; n <= label; n++) {
-	G_percent(n, label, 1);
-	OLD = n;
-	NEW = index[n];
-	if (OLD != NEW) {
-	    renumber[n] = 0;
-	    /* find valid clump ID */
-	    while (OLD != NEW) {
-		OLD = NEW;
-		NEW = index[OLD];
-	    }
-	    index[n] = NEW;
-	}
-	else
-	    /* set final clump id */
-	    renumber[n] = cat++;
-    }
-    
-    /* rewind temp file */
-    lseek(cfd, 0, SEEK_SET);
+    /* free */
+    G_free(prev_clump);
+    G_free(cur_clump);
 
-    if (print) {
-	fprintf(stdout, "clumps=%d\n", cat - 1);
-    }
-    else {
-	/****************************************************
-	 *                      PASS 2                      *
-	 * apply renumbering scheme to initial clump labels *
-	 ****************************************************/
+    G_free(prev_in);
+    G_free(cur_in);
 
-	/* the input raster is no longer needed, 
-	 * using instead the temp file with initial clump labels */
+    do_renumber(in_fd, NULL, 1, diag, minsize, cfd, label, index, out_fd);
 
-	G_message(_("Pass 2 of 2..."));
-	for (row = 0; row < nrows; row++) {
-
-	    G_percent(row, nrows, 2);
-	
-	    if (read(cfd, cur_clump, csize) != csize)
-		G_fatal_error(_("Unable to read from temp file"));
-
-	    temp_clump = cur_clump;
-	    temp_cell = out_cell;
-
-	    for (col = 0; col < ncols; col++) {
-		*temp_cell = renumber[index[*temp_clump]];
-		if (*temp_cell == 0)
-		    Rast_set_c_null_value(temp_cell, 1);
-		temp_clump++;
-		temp_cell++;
-	    }
-	    Rast_put_row(out_fd, out_cell, CELL_TYPE);
-	}
-	G_percent(1, 1, 1);
-    }
-
     close(cfd);
     unlink(cname);
 
@@ -292,8 +377,7 @@
     return 0;
 }
 
-static int cmp_cells(DCELL **a, int acol, DCELL **b, int bcol,
-                     DCELL *rng, int n, double threshold2)
+static double get_diff2(DCELL **a, int acol, DCELL **b, int bcol, DCELL *rng, int n)
 {
     int i;
     double diff, diff2;
@@ -301,7 +385,7 @@
     diff2 = 0;
     for (i = 0; i < n; i++) {
 	if (Rast_is_d_null_value(&b[i][bcol]))
-	    return 0;
+	    return 2;
 	diff = a[i][acol] - b[i][bcol];
 	/* normalize with the band's range */
 	if (rng[i])
@@ -311,10 +395,11 @@
     /* normalize difference to the range [0, 1] */
     diff2 /= n;
     
-    return (diff2 <= threshold2);
+    return diff2;
 }
 
-CELL clump_n(int *in_fd, char **inname, int nin, double threshold, int out_fd, int diag, int print)
+CELL clump_n(int *in_fd, char **inname, int nin, double threshold,
+             int out_fd, int diag, int minsize)
 {
     register int col;
     register int i, n;
@@ -325,9 +410,9 @@
     double thresh2;
     /* output */
     CELL OLD, NEW;
-    CELL *temp_clump, *out_cell, *out_cellp;
+    CELL *temp_clump;
     CELL *prev_clump, *cur_clump;
-    CELL *index, *renumber;
+    CELL *index;
     CELL label;
     int nrows, ncols;
     int row;
@@ -337,7 +422,6 @@
     long cur_time;
     char *cname;
     int cfd, csize;
-    CELL cat;
 
     G_message(_("%d-band clumping with threshold %g"), nin, threshold);
 
@@ -350,7 +434,6 @@
     nalloc = INCR;
     index = (CELL *) G_malloc(nalloc * sizeof(CELL));
     index[0] = 0;
-    renumber = NULL;
 
     /* allocate DCELL buffers two columns larger than current window */
     len = (ncols + 2) * sizeof(DCELL);
@@ -386,7 +469,6 @@
     len = (ncols + 2) * sizeof(CELL);
     prev_clump = (CELL *) G_malloc(len);
     cur_clump = (CELL *) G_malloc(len);
-    out_cell = (CELL *) G_malloc(len);
 
     /* temp file for initial clump IDs */
     cname = G_tempfile();
@@ -437,7 +519,7 @@
 	    /* try to connect the current cell to an existing clump */
 	    OLD = NEW = 0;
 	    /* same clump as to the left */
-	    if (cmp_cells(cur_in, col, cur_in, col - 1, rng, nin, thresh2)) {
+	    if (get_diff2(cur_in, col, cur_in, col - 1, rng, nin) <= thresh2) {
 		OLD = cur_clump[col] = cur_clump[col - 1];
 	    }
 
@@ -446,7 +528,7 @@
 		temp_clump = prev_clump + col + 1;
 		bcol = col + 1;
 		do {
-		    if (cmp_cells(cur_in, col, prev_in, bcol, rng, nin, thresh2)) {
+		    if (get_diff2(cur_in, col, prev_in, bcol, rng, nin) <= thresh2) {
 			cur_clump[col] = *temp_clump;
 			if (OLD == 0) {
 			    OLD = *temp_clump;
@@ -497,7 +579,7 @@
 	    }
 	    else {
 		/* check above */
-		if (cmp_cells(cur_in, col, prev_in, col, rng, nin, thresh2)) {
+		if (get_diff2(cur_in, col, prev_in, col, rng, nin) <= thresh2) {
 		    temp_clump = prev_clump + col;
 		    cur_clump[col] = *temp_clump;
 		    if (OLD == 0) {
@@ -579,72 +661,19 @@
 	G_fatal_error(_("Unable to write to temp file"));
     G_percent(1, 1, 1);
 
-    /* generate a renumbering scheme */
-    G_message(_("Generating renumbering scheme..."));
-    G_debug(1, "%d initial labels", label);
-    /* allocate final clump ID */
-    renumber = (CELL *) G_malloc((label + 1) * sizeof(CELL));
-    renumber[0] = 0;
-    cat = 1;
-    G_percent(0, label, 1);
-    for (n = 1; n <= label; n++) {
-	G_percent(n, label, 1);
-	OLD = n;
-	NEW = index[n];
-	if (OLD != NEW) {
-	    renumber[n] = 0;
-	    /* find valid clump ID */
-	    while (OLD != NEW) {
-		OLD = NEW;
-		NEW = index[OLD];
-		if (NEW == n)
-		    G_fatal_error("Circular relabelling for %d", n);
-	    }
-	    index[n] = NEW;
-	}
-	else
-	    /* set final clump id */
-	    renumber[n] = cat++;
-    }
-    
-    /* rewind temp file */
-    lseek(cfd, 0, SEEK_SET);
+    /* free */
+    G_free(prev_clump);
+    G_free(cur_clump);
 
-    if (print) {
-	fprintf(stdout, "clumps=%d\n", cat - 1);
+    for (i = 0; i < nin; i++) {
+	G_free(prev_in[i]);
+	G_free(cur_in[i]);
     }
-    else {
-	/****************************************************
-	 *                      PASS 2                      *
-	 * apply renumbering scheme to initial clump labels *
-	 ****************************************************/
+    G_free(prev_in);
+    G_free(cur_in);
 
-	/* the input raster is no longer needed, 
-	 * using instead the temp file with initial clump labels */
+    do_renumber(in_fd, rng, nin, diag, minsize, cfd, label, index, out_fd);
 
-	G_message(_("Pass 2 of 2..."));
-	for (row = 0; row < nrows; row++) {
-
-	    G_percent(row, nrows, 2);
-	
-	    if (read(cfd, cur_clump, csize) != csize)
-		G_fatal_error(_("Unable to read from temp file"));
-
-	    temp_clump = cur_clump;
-	    out_cellp = out_cell;
-
-	    for (col = 0; col < ncols; col++) {
-		*out_cellp = renumber[index[*temp_clump]];
-		if (*out_cellp == 0)
-		    Rast_set_c_null_value(out_cellp, 1);
-		temp_clump++;
-		out_cellp++;
-	    }
-	    Rast_put_row(out_fd, out_cell, CELL_TYPE);
-	}
-	G_percent(1, 1, 1);
-    }
-
     close(cfd);
     unlink(cname);
 

Modified: grass/trunk/raster/r.clump/local_proto.h
===================================================================
--- grass/trunk/raster/r.clump/local_proto.h	2016-07-14 19:56:35 UTC (rev 68967)
+++ grass/trunk/raster/r.clump/local_proto.h	2016-07-14 20:48:02 UTC (rev 68968)
@@ -20,11 +20,13 @@
 #define __LOCAL_PROTO_H__
 
 /* clump.c */
-CELL clump(int, int, int, int);
+CELL clump(int *, int, int, int);
 CELL clump_n(int *, char **, int, double, int, int, int);
 int print_time(long *);
 
-/* main.c */
-int main(int, char *[]);
+/* minsize.c */
+int merge_small_clumps(int *in_fd, int nin, DCELL *rng,
+                        int diag, int min_size, int *n_clumps,
+			int cfd, int out_fd);
 
 #endif /* __LOCAL_PROTO_H__ */

Modified: grass/trunk/raster/r.clump/main.c
===================================================================
--- grass/trunk/raster/r.clump/main.c	2016-07-14 19:56:35 UTC (rev 68967)
+++ grass/trunk/raster/r.clump/main.c	2016-07-14 20:48:02 UTC (rev 68968)
@@ -19,6 +19,8 @@
 
 #include <stdlib.h>
 #include <string.h>
+#include <sys/types.h>
+#include <inttypes.h>
 #include <grass/gis.h>
 #include <grass/raster.h>
 #include <grass/glocale.h>
@@ -34,6 +36,7 @@
     int *in_fd, out_fd;
     int i, n;
     double threshold;
+    int minsize;
     char title[512];
     char name[GNAME_MAX];
     char *OUTPUT;
@@ -42,6 +45,7 @@
     struct Option *opt_in;
     struct Option *opt_out;
     struct Option *opt_thresh;
+    struct Option *opt_minsize;
     struct Option *opt_title;
     struct Flag *flag_diag;
     struct Flag *flag_print;
@@ -78,6 +82,14 @@
     opt_thresh->label = _("Threshold to identify similar cells");
     opt_thresh->description = _("Valid range: 0 = identical to < 1 = maximal difference");
 
+    opt_minsize = G_define_option();
+    opt_minsize->key = "minsize";
+    opt_minsize->type = TYPE_INTEGER;
+    opt_minsize->required = NO;
+    opt_minsize->answer = "1";
+    opt_minsize->label = _("Minimum clump size in cells");
+    opt_minsize->description = _("Clumps smaller than minsize will be merged to form larger clumps");
+
     flag_diag = G_define_flag();
     flag_diag->key = 'd';
     flag_diag->label = _("Clump also diagonal cells");
@@ -94,11 +106,24 @@
     if (G_parser(argc, argv))
 	exit(EXIT_FAILURE);
 
+#if defined(int64_t)
+    G_message("have int64_t");
+#endif
+#if defined(_int64_t)
+    G_message("have _int64_t");
+#endif
+#if defined(__int64_t)
+    G_message("have __int64_t");
+#endif
+
+
     threshold = atof(opt_thresh->answer);
     if (threshold < 0 || threshold >= 1)
 	G_fatal_error(_("Valid range for option <%s> is 0 <= value < 1"),
 	              opt_thresh->key);
 
+    minsize = atoi(opt_minsize->answer);
+
     n = 0;
     while (opt_in->answers[n])
 	n++;
@@ -119,9 +144,10 @@
     }
 
     if (n == 1 && threshold == 0)
-	clump(in_fd[0], out_fd, flag_diag->answer, flag_print->answer);
+	clump(in_fd, out_fd, flag_diag->answer, minsize);
     else
-	clump_n(in_fd, opt_in->answers, n, threshold, out_fd, flag_diag->answer, flag_print->answer);
+	clump_n(in_fd, opt_in->answers, n, threshold, out_fd,
+	        flag_diag->answer, minsize);
 
     for (i = 0; i < n; i++)
 	Rast_close(in_fd[i]);



More information about the grass-commit mailing list