[GRASS-SVN] r63952 - in grass/trunk/vector: . v.cluster
svn_grass at osgeo.org
svn_grass at osgeo.org
Sun Jan 4 11:36:40 PST 2015
Author: mmetz
Date: 2015-01-04 11:36:39 -0800 (Sun, 04 Jan 2015)
New Revision: 63952
Added:
grass/trunk/vector/v.cluster/
grass/trunk/vector/v.cluster/v.cluster.html
Removed:
grass/trunk/vector/v.cluster/v.example.html
Modified:
grass/trunk/vector/Makefile
grass/trunk/vector/v.cluster/Makefile
grass/trunk/vector/v.cluster/main.c
Log:
add v.cluster: identify clusters in a point cloud
Modified: grass/trunk/vector/Makefile
===================================================================
--- grass/trunk/vector/Makefile 2015-01-04 19:32:25 UTC (rev 63951)
+++ grass/trunk/vector/Makefile 2015-01-04 19:36:39 UTC (rev 63952)
@@ -10,6 +10,7 @@
v.category \
v.class \
v.clean \
+ v.cluster \
v.colors \
v.colors.out \
v.db.connect \
Modified: grass/trunk/vector/v.cluster/Makefile
===================================================================
--- grass/trunk/doc/vector/v.example/Makefile 2014-12-16 12:11:56 UTC (rev 63571)
+++ grass/trunk/vector/v.cluster/Makefile 2015-01-04 19:36:39 UTC (rev 63952)
@@ -1,11 +1,9 @@
-# fix this relative to include/
-# or use absolute path to the GRASS source code
-MODULE_TOPDIR = ../../..
+MODULE_TOPDIR = ../..
-PGM = v.example
+PGM = v.cluster
-LIBES = $(VECTORLIB) $(DBMILIB) $(GISLIB)
-DEPENDENCIES = $(VECTORDEP) $(DBMIDEP) $(GISDEP)
+LIBES = $(VECTORLIB) $(BTREE2LIB) $(GISLIB)
+DEPENDENCIES = $(VECTORDEP) $(BTREE2DEP) $(GISDEP)
EXTRA_INC = $(VECT_INC)
EXTRA_CFLAGS = $(VECT_CFLAGS)
Modified: grass/trunk/vector/v.cluster/main.c
===================================================================
--- grass/trunk/doc/vector/v.example/main.c 2014-12-16 12:11:56 UTC (rev 63571)
+++ grass/trunk/vector/v.cluster/main.c 2015-01-04 19:36:39 UTC (rev 63952)
@@ -1,14 +1,13 @@
/****************************************************************
*
- * MODULE: v.example
+ * MODULE: v.cluster
*
- * AUTHOR(S): GRASS Development Team, Radim Blazek, Maris Nartiss
+ * AUTHOR(S): Markus Metz
*
- * PURPOSE: copies vector data from source map to destination map
- * prints out all point coordinates and atributes
+ * PURPOSE: Identifies clusters in a point cloud
*
- * COPYRIGHT: (C) 2002-2009 by the GRASS Development Team
+ * COPYRIGHT: (C) 2015 by the GRASS Development Team
*
* This program is free software under the
* GNU General Public License (>=v2).
@@ -21,25 +20,30 @@
#include <stdlib.h>
#include <grass/gis.h>
#include <grass/vector.h>
-#include <grass/dbmi.h>
#include <grass/glocale.h>
+#include <grass/kdtree.h>
int main(int argc, char *argv[])
{
struct Map_info In, Out;
static struct line_pnts *Points;
struct line_cats *Cats;
- int i, type, cat, ncols, nrows, col, more, open3d;
- char *mapset, sql[200];
- struct GModule *module; /* GRASS module for parsing arguments */
- struct Option *old, *new;
- dbDriver *driver;
- dbHandle handle;
- dbCursor cursor;
- dbTable *table;
- dbColumn *column;
- dbString table_name, dbsql, valstr;
- struct field_info *Fi, *Fin;
+ int i, j, type, cat, is3d;
+ struct GModule *module;
+ struct Option *input, *output, *dist_opt, *min_opt, *lyr_opt;
+ struct Flag *flag_2d, *flag_topo, *flag_attr;
+ int clayer;
+ int npoints, nlines;
+ int *cid, *idx, *renumber, OLD, NEW;
+ int nclusters;
+ struct kdtree *kdt;
+ struct kdtrav trav;
+ double c[3];
+ int uid;
+ double eps;
+ int ndims, minpnts;
+ double *kddist;
+ int kdfound, *kduid;
/* initialize GIS environment */
/* reads grass env, stores program name to G_program_name() */
@@ -48,179 +52,313 @@
/* initialize module */
module = G_define_module();
G_add_keyword(_("vector"));
- G_add_keyword(_("keyword2"));
- G_add_keyword(_("keyword3"));
- module->description = _("My first vector module");
+ G_add_keyword(_("point cloud"));
+ G_add_keyword(_("cluster"));
+ G_add_keyword(_("clump"));
+ module->description = _("Cluster identification");
/* Define the different options as defined in gis.h */
- old = G_define_standard_option(G_OPT_V_INPUT);
+ input = G_define_standard_option(G_OPT_V_INPUT);
- new = G_define_standard_option(G_OPT_V_OUTPUT);
+ output = G_define_standard_option(G_OPT_V_OUTPUT);
+ lyr_opt = G_define_standard_option(G_OPT_V_FIELD);
+ lyr_opt->label = _("Layer number or name for cluster ids");
+ lyr_opt->answer = "2";
+
+ dist_opt = G_define_option();
+ dist_opt->type = TYPE_DOUBLE;
+ dist_opt->key = "distance";
+ dist_opt->required = NO;
+ dist_opt->label = _("Maximum distance to neighbors");
+
+ min_opt = G_define_option();
+ min_opt->type = TYPE_INTEGER;
+ min_opt->key = "min";
+ min_opt->required = NO;
+ min_opt->label = _("Minimum number of points to create a cluster");
+
+ flag_2d = G_define_flag();
+ flag_2d->key = '2';
+ flag_2d->label = _("Force 2D clustering");
+
+ flag_topo = G_define_standard_flag(G_FLG_V_TOPO);
+
+ flag_attr = G_define_standard_flag(G_FLG_V_TABLE);
+
+
/* options and flags parser */
if (G_parser(argc, argv))
exit(EXIT_FAILURE);
- /* Create and initialize struct's where to store points/lines and categories */
Points = Vect_new_line_struct();
Cats = Vect_new_cats_struct();
- /* Check 1) output is legal vector name; 2) if can find input map;
- 3) if input was found in current mapset, check if input != output.
- lib/vector/Vlib/legal_vname.c
- */
- Vect_check_input_output_name(old->answer, new->answer, G_FATAL_EXIT);
+ Vect_check_input_output_name(input->answer, output->answer, G_FATAL_EXIT);
- if ((mapset = (char *)G_find_vector2(old->answer, "")) == NULL)
- G_fatal_error(_("Vector map <%s> not found"), old->answer);
-
- /* Predetermine level at which a map will be opened for reading
- lib/vector/Vlib/open.c
- */
- if (Vect_set_open_level(2))
+ if (Vect_set_open_level(1))
G_fatal_error(_("Unable to set predetermined vector open level"));
- /* Open existing vector for reading
- lib/vector/Vlib/open.c
- */
- if (1 > Vect_open_old(&In, old->answer, mapset))
- G_fatal_error(_("Unable to open vector map <%s>"), old->answer);
+ if (1 > Vect_open_old(&In, input->answer, NULL))
+ G_fatal_error(_("Unable to open vector map <%s>"), input->answer);
/* Check if old vector is 3D. We should preserve 3D data. */
- if (Vect_is_3d(&In))
- open3d = WITH_Z;
- else
- open3d = WITHOUT_Z;
-
- /* Open new vector for reading/writing */
- if (0 > Vect_open_new(&Out, new->answer, open3d)) {
- Vect_close(&In);
- G_fatal_error(_("Unable to create vector map <%s>"), new->answer);
+ is3d = WITHOUT_Z;
+ ndims = 2;
+ if (Vect_is_3d(&In)) {
+ is3d = WITH_Z;
+ ndims = 3;
}
- /* Let's get vector layers db connections information */
- Fi = Vect_get_field(&In, 1);
- if (!Fi) {
- Vect_close(&In);
- G_fatal_error(_("Database connection not defined for layer %d"), 1);
+ if (flag_2d->answer)
+ ndims = 2;
+
+ minpnts = ndims;
+
+ if (min_opt->answer) {
+ minpnts = atoi(min_opt->answer);
+ if (minpnts < 2) {
+ G_warning(_("Minimum number of points must be at least 2"));
+ minpnts = 2;
+ }
+ minpnts--;
}
- /* Output information useful for debuging
- include/vect/dig_structs.h
- */
- G_debug(1,
- "Field number:%d; Name:<%s>; Driver:<%s>; Database:<%s>; Table:<%s>; Key:<%s>;\n",
- Fi->number, Fi->name, Fi->driver, Fi->database, Fi->table,
- Fi->key);
+ clayer = atoi(lyr_opt->answer);
- /* Prepeare strings for use in db_* calls */
- db_init_string(&dbsql);
- db_init_string(&valstr);
- db_init_string(&table_name);
- db_init_handle(&handle);
+ /* count points */
+ G_message(_("Counting input points ..."));
+ npoints = nlines = 0;
+ while ((type = Vect_read_next_line(&In, Points, Cats)) > 0) {
+ nlines++;
+ if (type == GV_POINT)
+ npoints++;
+ }
- /* Prepare database for use */
- driver = db_start_driver(Fi->driver);
- if (driver == NULL) {
+ if (npoints == 0) {
+ G_warning(_("No points in input, nothing to do"));
Vect_close(&In);
- G_fatal_error(_("Unable to start driver <%s>"), Fi->driver);
+ exit(EXIT_SUCCESS);
}
- db_set_handle(&handle, Fi->database, NULL);
- if (db_open_database(driver, &handle) != DB_OK) {
+
+ /* Open new vector for reading/writing */
+ if (0 > Vect_open_new(&Out, output->answer, is3d)) {
Vect_close(&In);
- G_fatal_error(_("Unable to open database <%s> by driver <%s>"),
- Fi->database, driver);
+ G_fatal_error(_("Unable to create vector map <%s>"), output->answer);
}
- db_set_string(&table_name, Fi->table);
- if (db_describe_table(driver, &table_name, &table) != DB_OK) {
- Vect_close(&In);
- G_fatal_error(_("Unable to describe table <%s>"), Fi->table);
- }
- ncols = db_get_table_number_of_columns(table);
/* Copy header and history data from old to new map */
Vect_copy_head_data(&In, &Out);
Vect_hist_copy(&In, &Out);
Vect_hist_command(&Out);
- i = 1;
- /* Let's do something with every vector feature in input map... */
- /* Read in single line and get it's type */
+ /* create k-d tree */
+ G_message(_("Creating search index ..."));
+ kdt = kdtree_create(ndims, NULL);
+ cid = G_malloc((nlines + 1) * sizeof(int));
+ idx = G_malloc((nlines + 1) * sizeof(int));
+ Vect_rewind(&In);
+ i = 0;
+ cid[0] = 0;
while ((type = Vect_read_next_line(&In, Points, Cats)) > 0) {
- /* If Points contain line... */
- if (type == GV_LINE || type == GV_POINT || type == GV_CENTROID) {
- if (Vect_cat_get(Cats, 1, &cat) == 0) {
- Vect_cat_set(Cats, 1, i);
- i++;
+ G_percent(i++, nlines, 4);
+ cid[i] = 0;
+ if (type == GV_POINT) {
+
+ c[0] = Points->x[0];
+ c[1] = Points->y[0];
+ c[2] = Points->z[0];
+
+ kdtree_insert(kdt, c, i, 0);
+ }
+ }
+ G_percent(nlines, nlines, 4);
+
+ kdtree_optimize(kdt, 2);
+
+ /* get epsilon */
+ if (dist_opt->answer) {
+ eps = atof(dist_opt->answer);
+ if (eps <= 0)
+ G_fatal_error(_("Option %s must be a positive number"), dist_opt->key);
+ }
+ else {
+ int n;
+ double dist, mean, min, max, sum, sumsq, sd;
+ double *kd;
+ int *ki;
+
+ /* estimate epsilon */
+ G_message(_("Estimating maximum distance ..."));
+ kdtree_init_trav(&trav, kdt);
+ c[2] = 0.0;
+ n = 0;
+ sum = sumsq = 0;
+ min = 1.0 / 0.0;
+ max = 0;
+ kd = G_malloc(minpnts * sizeof(double));
+ ki = G_malloc(minpnts * sizeof(int));
+ i = 0;
+ while (kdtree_traverse(&trav, c, &uid)) {
+ G_percent(i++, npoints, 4);
+
+ kdfound = kdtree_knn(kdt, c, ki, kd, minpnts, &uid);
+ if (kdfound) {
+ dist = sqrt(kd[kdfound - 1]);
+ sum += dist;
+ sumsq += dist * dist;
+ n++;
+ if (min > dist)
+ min = dist;
+ if (max < dist)
+ max = dist;
}
}
- if (type == GV_POINT) {
- /* Print out point coordinates */
- printf("No:%d\tX:%f\tY:%f\tZ:%f\tCAT:%d\n", i, *Points->x,
- *Points->y, *Points->z, cat);
+ G_percent(npoints, npoints, 4);
- /* Prepeare SQL query to get point attribute data */
- sprintf(sql, "select * from %s where %s=%d", Fi->table, Fi->key,
- cat);
- G_debug(1, "SQL: \"%s\"", sql);
- db_set_string(&dbsql, sql);
- /* Now execute query */
- if (db_open_select_cursor(driver, &dbsql, &cursor, DB_SEQUENTIAL)
- != DB_OK)
- G_warning(_("Unable to get attribute data for cat %d"), cat);
+ G_free(kd);
+ G_free(ki);
+
+ if (!n)
+ G_fatal_error(_("No neighbors found"));
+
+ mean = sum / n;
+ sd = sqrt(sumsq / n - mean * mean);
+ eps = mean + 1.644854 * sd; /* 90% CI */
+ eps = mean + 2.575829 * sd; /* 99% CI */
+
+ if (eps > max)
+ eps = max;
+
+ G_message(_("Distance to the %d nearest neighbor:"), minpnts);
+ G_message(_("Min: %g, max: %g"), min, max);
+ G_message(_("Mean: %g"), mean);
+ G_message(_("Standard deviation: %g"), sd);
+
+ G_message(_("Estimated maximum distance: %g"), eps);
+ }
+
+ /* create clusters */
+ G_message(_("Building clusters ..."));
+ nclusters = 0;
+ kdtree_init_trav(&trav, kdt);
+ c[2] = 0.0;
+ idx[0] = 0;
+ i = 0;
+ while (kdtree_traverse(&trav, c, &uid)) {
+ G_percent(i++, npoints, 4);
+
+ /* radius search */
+ kdfound = kdtree_dnn(kdt, c, &kduid, &kddist, eps, &uid);
+
+ /* must have min neighbors within radius */
+ if (kdfound >= minpnts) {
+
+ cat = idx[cid[uid]];
+
+ /* find latest cluster */
+ for (j = 0; j < kdfound; j++) {
+ if (idx[cid[kduid[j]]] > cat) {
+ cat = idx[cid[kduid[j]]];
+ }
+ }
+
+ if (cat == 0) {
+ /* start new cluster */
+ nclusters++;
+ cat = nclusters;
+ if (nclusters > nlines)
+ G_fatal_error(_("nlines: %d, nclusters: %d"), nlines, nclusters);
+ idx[nclusters] = nclusters;
+ cid[uid] = nclusters;
+ }
+
+ /* set or update cluster ids */
+ if (cid[uid] != 0) {
+ /* relabel */
+ idx[cid[uid]] = cat;
+ }
else {
- /* Result count */
- nrows = db_get_num_rows(&cursor);
- G_debug(1, "Result count: %d", nrows);
- table = db_get_cursor_table(&cursor);
- /* Let's output every columns name and value */
- while (1) {
- if (db_fetch(&cursor, DB_NEXT, &more) != DB_OK) {
- G_warning(_("Error while retrieving database record for cat %d"),
- cat);
- break;
- }
- if (!more)
- break;
- for (col = 0; col < ncols; col++) {
- column = db_get_table_column(table, col);
- db_convert_column_value_to_string(column, &valstr);
- printf("%s: %s\t", db_get_column_name(column),
- db_get_string(&valstr));
- }
- printf("\n");
+ cid[uid] = cat;
+ }
+
+ for (j = 0; j < kdfound; j++) {
+ if (cid[kduid[j]] != 0) {
+ /* relabel */
+ idx[cid[kduid[j]]] = cat;
}
- db_close_cursor(&cursor);
+ else {
+ cid[kduid[j]] = cat;
+ }
}
}
- /* Only now we write data into new vector */
- Vect_write_line(&Out, type, Points, Cats);
+ if (kdfound) {
+ G_free(kddist);
+ G_free(kduid);
+ }
}
+ G_percent(npoints, npoints, 4);
- /* Create database for new vector map */
- Fin = Vect_default_field_info(&Out, 1, NULL, GV_1TABLE);
- driver = db_start_driver_open_database(Fin->driver, Fin->database);
- G_debug(1,
- "Field number:%d; Name:<%s>; Driver:<%s>; Database:<%s>; Table:<%s>; Key:<%s>;\n",
- Fin->number, Fin->name, Fin->driver, Fin->database, Fin->table,
- Fin->key);
+ if (nclusters == 0) {
+ G_message(_("No clusters found, adjust option %s"), dist_opt->key);
+ Vect_close(&In);
+ Vect_close(&Out);
+ Vect_delete(output->answer);
+ exit(EXIT_SUCCESS);
+ }
- /* Let's copy attribute table data */
- if (db_copy_table(Fi->driver, Fi->database, Fi->table,
- Fin->driver, Vect_subst_var(Fin->database, &Out),
- Fin->table) == DB_FAILED)
- G_warning(_("Unable to copy attribute table to vector map <%s>"),
- new->answer);
- else
- Vect_map_add_dblink(&Out, Fin->number, Fin->name, Fin->table, Fi->key,
- Fin->database, Fin->driver);
+ /* generate a renumbering scheme */
+ G_message(_("Generating renumbering scheme..."));
+ G_debug(1, "%d initial clusters", nclusters);
+ /* allocate final clump ID */
+ renumber = (int *) G_malloc((nclusters + 1) * sizeof(int));
+ renumber[0] = 0;
+ cat = 1;
+ G_percent(0, nclusters, 1);
+ for (i = 1; i <= nclusters; i++) {
+ G_percent(i, nclusters, 4);
+ OLD = i;
+ NEW = idx[i];
+ if (OLD != NEW) {
+ renumber[i] = 0;
+ /* find valid clump ID */
+ while (OLD != NEW) {
+ OLD = NEW;
+ NEW = idx[OLD];
+ }
+ idx[i] = NEW;
+ }
+ else
+ /* set final clump id */
+ renumber[i] = cat++;
+ }
+ nclusters = cat - 1;
+
+ /* write cluster ids */
+ G_message(_("Write out cluster ids ..."));
+ Vect_rewind(&In);
+ i = 0;
+ while ((type = Vect_read_next_line(&In, Points, Cats)) > 0) {
+ G_percent(i++, nlines, 4);
+ if (type == GV_POINT) {
+ cat = renumber[idx[cid[i]]];
+ Vect_cat_set(Cats, clayer, cat);
+ Vect_write_line(&Out, GV_POINT, Points, Cats);
+ }
+ }
+ G_percent(nlines, nlines, 4);
+
+ if (!flag_attr->answer)
+ Vect_copy_tables(&In, &Out, 0);
+
/* Build topology for vector map and close them */
- Vect_build(&Out);
Vect_close(&In);
+ if (!flag_topo->answer)
+ Vect_build(&Out);
Vect_close(&Out);
- db_close_database_shutdown_driver(driver);
- /* Don't forget to report to caller sucessfull end of data processing :) */
+ G_message(_("%d clusters found"), nclusters);
+
exit(EXIT_SUCCESS);
}
Copied: grass/trunk/vector/v.cluster/v.cluster.html (from rev 63571, grass/trunk/doc/vector/v.example/v.example.html)
===================================================================
--- grass/trunk/vector/v.cluster/v.cluster.html (rev 0)
+++ grass/trunk/vector/v.cluster/v.cluster.html 2015-01-04 19:36:39 UTC (rev 63952)
@@ -0,0 +1,50 @@
+<h2>DESCRIPTION</h2>
+
+<em>v.cluster</em> partitions a point cloud into clusters or clumps.
+A point can only be in a cluster if the maximum distance to its <i>min</i>
+neighbors is smaller than distance. This algoritm is known as
+<a href="http://en.wikipedia.org/wiki/DBSCAN">DBSCAN</a>.
+
+<p>
+If the minimum number of points is not given with the <i>min</i> option,
+the minimum number of points to consitute a cluster is <i>number of dimensions + 1</i>,
+i.e. 3 for 2D points and 4 for 3d points.
+
+
+<h2>EXAMPLE</h2>
+
+Analysis of random points for areas in the vector <i>urbanarea</i> in the
+North Carolina sample dataset:
+
+<div class="code"><pre>
+# pick a subregion of he vector urbanarea
+g.region -p n=272950 s=188330 w=574720 e=703090 res=10
+
+# create clustered points
+v.random output=rand_clust npoints=1000000 restrict=urbanarea at PERMANENT
+
+# identify clusters
+v.cluster in=rand_clust out=rand_clusters
+
+# create colors for clusters
+v.db.addtable map=rand_clusters layer=2 columns="cat integer,grassrgb varchar(11)"
+v.colors map=rand_clusters layer=2 use=cat color=random rgb_column=grassrgb
+
+# display with your preferred method
+</pre></div>
+
+<h2>TODO</h2>
+
+Implement <a href="http://en.wikipedia.org/wiki/OPTICS_algorithm">OPTICS</a>
+
+<h2>SEE ALSO</h2>
+
+<em>
+<a href="r.clump.html">r.clump</a>
+</em>
+
+<h2>AUTHOR</h2>
+
+Markus Metz
+
+<p><i>Last changed: $Date$</i>
Deleted: grass/trunk/vector/v.cluster/v.example.html
===================================================================
--- grass/trunk/doc/vector/v.example/v.example.html 2014-12-16 12:11:56 UTC (rev 63571)
+++ grass/trunk/vector/v.cluster/v.example.html 2015-01-04 19:36:39 UTC (rev 63952)
@@ -1,23 +0,0 @@
-<h2>DESCRIPTION</h2>
-
-<em>v.example</em> is an example vector module that does something like
-labeling all vectors with value 1. A new map is written instead of updating
-the input map.
-
-<h2>EXAMPLE</h2>
-
-<div class="code"><pre>
-v.example input=map output=newmap
-</pre></div>
-
-<h2>SEE ALSO</h2>
-
-<em>
-<a href="http://grass.osgeo.org/programming7/">GRASS Programmer's Manual</a>
-</em>
-
-<h2>AUTHOR</h2>
-
-Radim Blazek, ITC-irst, Trento, Italy
-
-<p><i>Last changed: $Date$</i>
More information about the grass-commit
mailing list