[GRASS-SVN] r63952 - in grass/trunk/vector: . v.cluster

svn_grass at osgeo.org svn_grass at osgeo.org
Sun Jan 4 11:36:40 PST 2015


Author: mmetz
Date: 2015-01-04 11:36:39 -0800 (Sun, 04 Jan 2015)
New Revision: 63952

Added:
   grass/trunk/vector/v.cluster/
   grass/trunk/vector/v.cluster/v.cluster.html
Removed:
   grass/trunk/vector/v.cluster/v.example.html
Modified:
   grass/trunk/vector/Makefile
   grass/trunk/vector/v.cluster/Makefile
   grass/trunk/vector/v.cluster/main.c
Log:
add v.cluster: identify clusters in a point cloud

Modified: grass/trunk/vector/Makefile
===================================================================
--- grass/trunk/vector/Makefile	2015-01-04 19:32:25 UTC (rev 63951)
+++ grass/trunk/vector/Makefile	2015-01-04 19:36:39 UTC (rev 63952)
@@ -10,6 +10,7 @@
 	v.category \
 	v.class \
 	v.clean \
+	v.cluster \
 	v.colors \
 	v.colors.out \
 	v.db.connect \

Modified: grass/trunk/vector/v.cluster/Makefile
===================================================================
--- grass/trunk/doc/vector/v.example/Makefile	2014-12-16 12:11:56 UTC (rev 63571)
+++ grass/trunk/vector/v.cluster/Makefile	2015-01-04 19:36:39 UTC (rev 63952)
@@ -1,11 +1,9 @@
-# fix this relative to include/
-# or use absolute path to the GRASS source code
-MODULE_TOPDIR = ../../..
+MODULE_TOPDIR = ../..
 
-PGM = v.example
+PGM = v.cluster
 
-LIBES = $(VECTORLIB) $(DBMILIB) $(GISLIB)
-DEPENDENCIES = $(VECTORDEP) $(DBMIDEP) $(GISDEP)
+LIBES = $(VECTORLIB) $(BTREE2LIB) $(GISLIB)
+DEPENDENCIES = $(VECTORDEP) $(BTREE2DEP) $(GISDEP)
 EXTRA_INC = $(VECT_INC)
 EXTRA_CFLAGS = $(VECT_CFLAGS)
 

Modified: grass/trunk/vector/v.cluster/main.c
===================================================================
--- grass/trunk/doc/vector/v.example/main.c	2014-12-16 12:11:56 UTC (rev 63571)
+++ grass/trunk/vector/v.cluster/main.c	2015-01-04 19:36:39 UTC (rev 63952)
@@ -1,14 +1,13 @@
 
 /****************************************************************
  *
- * MODULE:     v.example
+ * MODULE:     v.cluster
  *
- * AUTHOR(S):  GRASS Development Team, Radim Blazek, Maris Nartiss
+ * AUTHOR(S):  Markus Metz
  *
- * PURPOSE:    copies vector data from source map to destination map
- *             prints out all point coordinates and atributes
+ * PURPOSE:    Identifies clusters in a point cloud
  *
- * COPYRIGHT:  (C) 2002-2009 by the GRASS Development Team
+ * COPYRIGHT:  (C) 2015 by the GRASS Development Team
  *
  *             This program is free software under the
  *             GNU General Public License (>=v2).
@@ -21,25 +20,30 @@
 #include <stdlib.h>
 #include <grass/gis.h>
 #include <grass/vector.h>
-#include <grass/dbmi.h>
 #include <grass/glocale.h>
+#include <grass/kdtree.h>
 
 int main(int argc, char *argv[])
 {
     struct Map_info In, Out;
     static struct line_pnts *Points;
     struct line_cats *Cats;
-    int i, type, cat, ncols, nrows, col, more, open3d;
-    char *mapset, sql[200];
-    struct GModule *module;	/* GRASS module for parsing arguments */
-    struct Option *old, *new;
-    dbDriver *driver;
-    dbHandle handle;
-    dbCursor cursor;
-    dbTable *table;
-    dbColumn *column;
-    dbString table_name, dbsql, valstr;
-    struct field_info *Fi, *Fin;
+    int i, j, type, cat, is3d;
+    struct GModule *module;
+    struct Option *input, *output, *dist_opt, *min_opt, *lyr_opt;
+    struct Flag *flag_2d, *flag_topo, *flag_attr;
+    int clayer;
+    int npoints, nlines;
+    int *cid, *idx, *renumber, OLD, NEW;
+    int nclusters;
+    struct kdtree *kdt;
+    struct kdtrav trav;
+    double c[3];
+    int uid;
+    double eps;
+    int ndims, minpnts;
+    double *kddist;
+    int kdfound, *kduid;
 
     /* initialize GIS environment */
     /* reads grass env, stores program name to G_program_name() */
@@ -48,179 +52,313 @@
     /* initialize module */
     module = G_define_module();
     G_add_keyword(_("vector"));
-    G_add_keyword(_("keyword2"));
-    G_add_keyword(_("keyword3"));
-    module->description = _("My first vector module");
+    G_add_keyword(_("point cloud"));
+    G_add_keyword(_("cluster"));
+    G_add_keyword(_("clump"));
+    module->description = _("Cluster identification");
 
     /* Define the different options as defined in gis.h */
-    old = G_define_standard_option(G_OPT_V_INPUT);
+    input = G_define_standard_option(G_OPT_V_INPUT);
 
-    new = G_define_standard_option(G_OPT_V_OUTPUT);
+    output = G_define_standard_option(G_OPT_V_OUTPUT);
 
+    lyr_opt = G_define_standard_option(G_OPT_V_FIELD);
+    lyr_opt->label = _("Layer number or name for cluster ids");
+    lyr_opt->answer = "2";
+
+    dist_opt = G_define_option();
+    dist_opt->type = TYPE_DOUBLE;
+    dist_opt->key = "distance";
+    dist_opt->required = NO;
+    dist_opt->label = _("Maximum distance to neighbors");
+
+    min_opt = G_define_option();
+    min_opt->type = TYPE_INTEGER;
+    min_opt->key = "min";
+    min_opt->required = NO;
+    min_opt->label = _("Minimum number of points to create a cluster");
+
+    flag_2d = G_define_flag();
+    flag_2d->key = '2';
+    flag_2d->label = _("Force 2D clustering");
+
+    flag_topo = G_define_standard_flag(G_FLG_V_TOPO);
+
+    flag_attr = G_define_standard_flag(G_FLG_V_TABLE);
+    
+
     /* options and flags parser */
     if (G_parser(argc, argv))
 	exit(EXIT_FAILURE);
 
-    /* Create and initialize struct's where to store points/lines and categories */
     Points = Vect_new_line_struct();
     Cats = Vect_new_cats_struct();
 
-    /* Check 1) output is legal vector name; 2) if can find input map; 
-       3) if input was found in current mapset, check if input != output.
-       lib/vector/Vlib/legal_vname.c
-     */
-    Vect_check_input_output_name(old->answer, new->answer, G_FATAL_EXIT);
+    Vect_check_input_output_name(input->answer, output->answer, G_FATAL_EXIT);
 
-    if ((mapset = (char *)G_find_vector2(old->answer, "")) == NULL)
-	G_fatal_error(_("Vector map <%s> not found"), old->answer);
-
-    /* Predetermine level at which a map will be opened for reading 
-       lib/vector/Vlib/open.c
-     */
-    if (Vect_set_open_level(2))
+    if (Vect_set_open_level(1))
 	G_fatal_error(_("Unable to set predetermined vector open level"));
 
-    /* Open existing vector for reading
-       lib/vector/Vlib/open.c
-     */
-    if (1 > Vect_open_old(&In, old->answer, mapset))
-	G_fatal_error(_("Unable to open vector map <%s>"), old->answer);
+    if (1 > Vect_open_old(&In, input->answer, NULL))
+	G_fatal_error(_("Unable to open vector map <%s>"), input->answer);
 
     /* Check if old vector is 3D. We should preserve 3D data. */
-    if (Vect_is_3d(&In))
-	open3d = WITH_Z;
-    else
-	open3d = WITHOUT_Z;
-
-    /* Open new vector for reading/writing */
-    if (0 > Vect_open_new(&Out, new->answer, open3d)) {
-	Vect_close(&In);
-	G_fatal_error(_("Unable to create vector map <%s>"), new->answer);
+    is3d = WITHOUT_Z;
+    ndims = 2;
+    if (Vect_is_3d(&In)) {
+	is3d = WITH_Z;
+	ndims = 3;
     }
 
-    /* Let's get vector layers db connections information */
-    Fi = Vect_get_field(&In, 1);
-    if (!Fi) {
-	Vect_close(&In);
-	G_fatal_error(_("Database connection not defined for layer %d"), 1);
+    if (flag_2d->answer)
+	ndims = 2;
+
+    minpnts = ndims;
+
+    if (min_opt->answer) {
+	minpnts = atoi(min_opt->answer);
+	if (minpnts < 2) {
+	    G_warning(_("Minimum number of points must be at least 2"));
+	    minpnts = 2;
+	}
+	minpnts--;
     }
 
-    /* Output information useful for debuging 
-       include/vect/dig_structs.h
-     */
-    G_debug(1,
-	    "Field number:%d; Name:<%s>; Driver:<%s>; Database:<%s>; Table:<%s>; Key:<%s>;\n",
-	    Fi->number, Fi->name, Fi->driver, Fi->database, Fi->table,
-	    Fi->key);
+    clayer = atoi(lyr_opt->answer);
 
-    /* Prepeare strings for use in db_* calls */
-    db_init_string(&dbsql);
-    db_init_string(&valstr);
-    db_init_string(&table_name);
-    db_init_handle(&handle);
+    /* count points */
+    G_message(_("Counting input points ..."));
+    npoints = nlines = 0;
+    while ((type = Vect_read_next_line(&In, Points, Cats)) > 0) {
+	nlines++;
+	if (type == GV_POINT)
+	    npoints++;
+    }
 
-    /* Prepare database for use */
-    driver = db_start_driver(Fi->driver);
-    if (driver == NULL) {
+    if (npoints == 0) {
+	G_warning(_("No points in input, nothing to do"));
 	Vect_close(&In);
-	G_fatal_error(_("Unable to start driver <%s>"), Fi->driver);
+	exit(EXIT_SUCCESS);
     }
-    db_set_handle(&handle, Fi->database, NULL);
-    if (db_open_database(driver, &handle) != DB_OK) {
+
+    /* Open new vector for reading/writing */
+    if (0 > Vect_open_new(&Out, output->answer, is3d)) {
 	Vect_close(&In);
-	G_fatal_error(_("Unable to open database <%s> by driver <%s>"),
-		      Fi->database, driver);
+	G_fatal_error(_("Unable to create vector map <%s>"), output->answer);
     }
-    db_set_string(&table_name, Fi->table);
-    if (db_describe_table(driver, &table_name, &table) != DB_OK) {
-	Vect_close(&In);
-	G_fatal_error(_("Unable to describe table <%s>"), Fi->table);
-    }
-    ncols = db_get_table_number_of_columns(table);
 
     /* Copy header and history data from old to new map */
     Vect_copy_head_data(&In, &Out);
     Vect_hist_copy(&In, &Out);
     Vect_hist_command(&Out);
 
-    i = 1;
-    /* Let's do something with every vector feature in input map... */
-    /* Read in single line and get it's type */
+    /* create k-d tree */
+    G_message(_("Creating search index ..."));
+    kdt = kdtree_create(ndims, NULL);
+    cid = G_malloc((nlines + 1) * sizeof(int));
+    idx = G_malloc((nlines + 1) * sizeof(int));
+    Vect_rewind(&In);
+    i = 0;
+    cid[0] = 0;
     while ((type = Vect_read_next_line(&In, Points, Cats)) > 0) {
-	/* If Points contain line... */
-	if (type == GV_LINE || type == GV_POINT || type == GV_CENTROID) {
-	    if (Vect_cat_get(Cats, 1, &cat) == 0) {
-		Vect_cat_set(Cats, 1, i);
-		i++;
+	G_percent(i++, nlines, 4);
+	cid[i] = 0;
+	if (type == GV_POINT) {
+	    
+	    c[0] = Points->x[0];
+	    c[1] = Points->y[0];
+	    c[2] = Points->z[0];
+
+	    kdtree_insert(kdt, c, i, 0);
+	}
+    }
+    G_percent(nlines, nlines, 4);
+
+    kdtree_optimize(kdt, 2);
+
+    /* get epsilon */
+    if (dist_opt->answer) {
+	eps = atof(dist_opt->answer);
+	if (eps <= 0)
+	    G_fatal_error(_("Option %s must be a positive number"), dist_opt->key);
+    }
+    else {
+	int n;
+	double dist, mean, min, max, sum, sumsq, sd;
+	double *kd;
+	int *ki;
+
+	/* estimate epsilon */
+	G_message(_("Estimating maximum distance ..."));
+	kdtree_init_trav(&trav, kdt);
+	c[2] = 0.0;
+	n = 0;
+	sum = sumsq = 0;
+	min = 1.0 / 0.0;
+	max = 0;
+	kd = G_malloc(minpnts * sizeof(double));
+	ki = G_malloc(minpnts * sizeof(int));
+	i = 0;
+	while (kdtree_traverse(&trav, c, &uid)) {
+	    G_percent(i++, npoints, 4);
+
+	    kdfound = kdtree_knn(kdt, c, ki, kd, minpnts, &uid);
+	    if (kdfound) {
+		dist = sqrt(kd[kdfound - 1]);
+		sum += dist;
+		sumsq += dist * dist;
+		n++;
+		if (min > dist)
+		    min = dist;
+		if (max < dist)
+		    max = dist;
 	    }
 	}
-	if (type == GV_POINT) {
-	    /* Print out point coordinates */
-	    printf("No:%d\tX:%f\tY:%f\tZ:%f\tCAT:%d\n", i, *Points->x,
-		   *Points->y, *Points->z, cat);
+	G_percent(npoints, npoints, 4);
 
-	    /* Prepeare SQL query to get point attribute data */
-	    sprintf(sql, "select * from %s where %s=%d", Fi->table, Fi->key,
-		    cat);
-	    G_debug(1, "SQL: \"%s\"", sql);
-	    db_set_string(&dbsql, sql);
-	    /* Now execute query */
-	    if (db_open_select_cursor(driver, &dbsql, &cursor, DB_SEQUENTIAL)
-		!= DB_OK)
-		G_warning(_("Unable to get attribute data for cat %d"), cat);
+	G_free(kd);
+	G_free(ki);
+
+	if (!n)
+	    G_fatal_error(_("No neighbors found"));
+	
+	mean = sum / n;
+	sd = sqrt(sumsq / n - mean * mean);
+	eps = mean + 1.644854 * sd; /* 90% CI */
+	eps = mean + 2.575829 * sd; /* 99% CI */
+	
+	if (eps > max)
+	    eps = max;
+
+	G_message(_("Distance to the %d nearest neighbor:"), minpnts);
+	G_message(_("Min: %g, max: %g"), min, max);
+	G_message(_("Mean: %g"), mean);
+	G_message(_("Standard deviation: %g"), sd);
+
+	G_message(_("Estimated maximum distance: %g"), eps);
+    }
+
+    /* create clusters */
+    G_message(_("Building clusters ..."));
+    nclusters = 0;
+    kdtree_init_trav(&trav, kdt);
+    c[2] = 0.0;
+    idx[0] = 0;
+    i = 0;
+    while (kdtree_traverse(&trav, c, &uid)) {
+	G_percent(i++, npoints, 4);
+
+	/* radius search */
+	kdfound = kdtree_dnn(kdt, c, &kduid, &kddist, eps, &uid);
+	
+	/* must have min neighbors within radius */
+	if (kdfound >= minpnts) {
+
+	    cat = idx[cid[uid]];
+
+	    /* find latest cluster */
+	    for (j = 0; j < kdfound; j++) {
+		if (idx[cid[kduid[j]]] > cat) {
+		    cat = idx[cid[kduid[j]]];
+		}
+	    }
+
+	    if (cat == 0) {
+		/* start new cluster */
+		nclusters++;
+		cat = nclusters;
+		if (nclusters > nlines)
+		    G_fatal_error(_("nlines: %d, nclusters: %d"), nlines, nclusters);
+		idx[nclusters] = nclusters;
+		cid[uid] = nclusters;
+	    }
+
+	    /* set or update cluster ids */
+	    if (cid[uid] != 0) {
+		/* relabel */
+		idx[cid[uid]] = cat;
+	    }
 	    else {
-		/* Result count */
-		nrows = db_get_num_rows(&cursor);
-		G_debug(1, "Result count: %d", nrows);
-		table = db_get_cursor_table(&cursor);
-		/* Let's output every columns name and value */
-		while (1) {
-		    if (db_fetch(&cursor, DB_NEXT, &more) != DB_OK) {
-			G_warning(_("Error while retrieving database record for cat %d"),
-				  cat);
-			break;
-		    }
-		    if (!more)
-			break;
-		    for (col = 0; col < ncols; col++) {
-			column = db_get_table_column(table, col);
-			db_convert_column_value_to_string(column, &valstr);
-			printf("%s: %s\t", db_get_column_name(column),
-			       db_get_string(&valstr));
-		    }
-		    printf("\n");
+		cid[uid] = cat;
+	    }
+
+	    for (j = 0; j < kdfound; j++) {
+		if (cid[kduid[j]] != 0) {
+		    /* relabel */
+		    idx[cid[kduid[j]]] = cat;
 		}
-		db_close_cursor(&cursor);
+		else {
+		   cid[kduid[j]] = cat;
+		}
 	    }
 	}
-	/* Only now we write data into new vector */
-	Vect_write_line(&Out, type, Points, Cats);
+	if (kdfound) {
+	    G_free(kddist);
+	    G_free(kduid);
+	}
     }
+    G_percent(npoints, npoints, 4);
 
-    /* Create database for new vector map */
-    Fin = Vect_default_field_info(&Out, 1, NULL, GV_1TABLE);
-    driver = db_start_driver_open_database(Fin->driver, Fin->database);
-    G_debug(1,
-	    "Field number:%d; Name:<%s>; Driver:<%s>; Database:<%s>; Table:<%s>; Key:<%s>;\n",
-	    Fin->number, Fin->name, Fin->driver, Fin->database, Fin->table,
-	    Fin->key);
+    if (nclusters == 0) {
+	G_message(_("No clusters found, adjust option %s"), dist_opt->key);
+	Vect_close(&In);
+	Vect_close(&Out);
+	Vect_delete(output->answer);
+	exit(EXIT_SUCCESS);
+    }
 
-    /* Let's copy attribute table data */
-    if (db_copy_table(Fi->driver, Fi->database, Fi->table,
-		      Fin->driver, Vect_subst_var(Fin->database, &Out),
-		      Fin->table) == DB_FAILED)
-	G_warning(_("Unable to copy attribute table to vector map <%s>"),
-		  new->answer);
-    else
-	Vect_map_add_dblink(&Out, Fin->number, Fin->name, Fin->table, Fi->key,
-			    Fin->database, Fin->driver);
+    /* generate a renumbering scheme */
+    G_message(_("Generating renumbering scheme..."));
+    G_debug(1, "%d initial clusters", nclusters);
+    /* allocate final clump ID */
+    renumber = (int *) G_malloc((nclusters + 1) * sizeof(int));
+    renumber[0] = 0;
+    cat = 1;
+    G_percent(0, nclusters, 1);
+    for (i = 1; i <= nclusters; i++) {
+	G_percent(i, nclusters, 4);
+	OLD = i;
+	NEW = idx[i];
+	if (OLD != NEW) {
+	    renumber[i] = 0;
+	    /* find valid clump ID */
+	    while (OLD != NEW) {
+		OLD = NEW;
+		NEW = idx[OLD];
+	    }
+	    idx[i] = NEW;
+	}
+	else
+	    /* set final clump id */
+	    renumber[i] = cat++;
+    }
 
+    nclusters = cat - 1;
+
+    /* write cluster ids */
+    G_message(_("Write out cluster ids ..."));
+    Vect_rewind(&In);
+    i = 0;
+    while ((type = Vect_read_next_line(&In, Points, Cats)) > 0) {
+	G_percent(i++, nlines, 4);
+	if (type == GV_POINT) {
+	    cat = renumber[idx[cid[i]]];
+	    Vect_cat_set(Cats, clayer, cat);
+	    Vect_write_line(&Out, GV_POINT, Points, Cats);
+	}
+    }
+    G_percent(nlines, nlines, 4);
+
+    if (!flag_attr->answer)
+	Vect_copy_tables(&In, &Out, 0);
+
     /* Build topology for vector map and close them */
-    Vect_build(&Out);
     Vect_close(&In);
+    if (!flag_topo->answer)
+	Vect_build(&Out);
     Vect_close(&Out);
-    db_close_database_shutdown_driver(driver);
 
-    /* Don't forget to report to caller sucessfull end of data processing :) */
+    G_message(_("%d clusters found"), nclusters);
+
     exit(EXIT_SUCCESS);
 }

Copied: grass/trunk/vector/v.cluster/v.cluster.html (from rev 63571, grass/trunk/doc/vector/v.example/v.example.html)
===================================================================
--- grass/trunk/vector/v.cluster/v.cluster.html	                        (rev 0)
+++ grass/trunk/vector/v.cluster/v.cluster.html	2015-01-04 19:36:39 UTC (rev 63952)
@@ -0,0 +1,50 @@
+<h2>DESCRIPTION</h2>
+
+<em>v.cluster</em> partitions a point cloud into clusters or clumps. 
+A point can only be in a cluster if the maximum distance to its <i>min</i> 
+neighbors is smaller than distance. This algoritm is known as 
+<a href="http://en.wikipedia.org/wiki/DBSCAN">DBSCAN</a>.
+
+<p>
+If the minimum number of points is not given with the <i>min</i> option, 
+the minimum number of points to consitute a cluster is <i>number of dimensions + 1</i>, 
+i.e. 3 for 2D points and 4 for 3d points.
+
+
+<h2>EXAMPLE</h2>
+
+Analysis of random points for areas in the vector <i>urbanarea</i> in the 
+North Carolina sample dataset:
+
+<div class="code"><pre>
+# pick a subregion of he vector urbanarea
+g.region -p n=272950 s=188330 w=574720 e=703090 res=10
+
+# create clustered points
+v.random output=rand_clust npoints=1000000 restrict=urbanarea at PERMANENT
+
+# identify clusters
+v.cluster in=rand_clust out=rand_clusters
+
+# create colors for clusters
+v.db.addtable map=rand_clusters layer=2 columns="cat integer,grassrgb varchar(11)"
+v.colors map=rand_clusters layer=2 use=cat color=random rgb_column=grassrgb
+
+# display with your preferred method
+</pre></div>
+
+<h2>TODO</h2>
+
+Implement <a href="http://en.wikipedia.org/wiki/OPTICS_algorithm">OPTICS</a>
+
+<h2>SEE ALSO</h2>
+
+<em>
+<a href="r.clump.html">r.clump</a>
+</em>
+
+<h2>AUTHOR</h2>
+
+Markus Metz
+
+<p><i>Last changed: $Date$</i>

Deleted: grass/trunk/vector/v.cluster/v.example.html
===================================================================
--- grass/trunk/doc/vector/v.example/v.example.html	2014-12-16 12:11:56 UTC (rev 63571)
+++ grass/trunk/vector/v.cluster/v.example.html	2015-01-04 19:36:39 UTC (rev 63952)
@@ -1,23 +0,0 @@
-<h2>DESCRIPTION</h2>
-
-<em>v.example</em> is an example vector module that does something like
-labeling all vectors with value 1. A new map is written instead of updating
-the input map.
-
-<h2>EXAMPLE</h2>
-
-<div class="code"><pre>
-v.example input=map output=newmap
-</pre></div>
-
-<h2>SEE ALSO</h2>
-
-<em>
-<a href="http://grass.osgeo.org/programming7/">GRASS Programmer's Manual</a>
-</em>
-
-<h2>AUTHOR</h2>
-
-Radim Blazek, ITC-irst, Trento, Italy
-
-<p><i>Last changed: $Date$</i>



More information about the grass-commit mailing list