[GRASS-SVN] r30293 - in grass/trunk/vector: . v.class

svn_grass at osgeo.org svn_grass at osgeo.org
Fri Feb 22 13:07:52 EST 2008


Author: mlennert
Date: 2008-02-22 13:07:52 -0500 (Fri, 22 Feb 2008)
New Revision: 30293

Added:
   grass/trunk/vector/v.class/
   grass/trunk/vector/v.class/Makefile
   grass/trunk/vector/v.class/TODO
   grass/trunk/vector/v.class/description.html
   grass/trunk/vector/v.class/main.c
Log:
First checkin of v.class - module that calculates class breaks for vector attribute data


Added: grass/trunk/vector/v.class/Makefile
===================================================================
--- grass/trunk/vector/v.class/Makefile	                        (rev 0)
+++ grass/trunk/vector/v.class/Makefile	2008-02-22 18:07:52 UTC (rev 30293)
@@ -0,0 +1,14 @@
+
+MODULE_TOPDIR = ../..
+
+PGM=v.class
+
+LIBES     = $(ARRAYSTATSLIB) $(VECTLIB) $(DBMILIB) $(GISLIB)
+DEPENDENCIES = $(ARRAYSTATSDEP) $(VECTDEP) $(DBMIDEP) $(GISDEP)
+EXTRA_INC = $(VECT_INC)
+EXTRA_CFLAGS = $(VECT_CFLAGS)
+ 
+include $(MODULE_TOPDIR)/include/Make/Module.make
+
+default: cmd	
+

Added: grass/trunk/vector/v.class/TODO
===================================================================
--- grass/trunk/vector/v.class/TODO	                        (rev 0)
+++ grass/trunk/vector/v.class/TODO	2008-02-22 18:07:52 UTC (rev 30293)
@@ -0,0 +1 @@
+- rewrite v.class so that it only classifies data linked to a certain type (area, line, point)

Added: grass/trunk/vector/v.class/description.html
===================================================================
--- grass/trunk/vector/v.class/description.html	                        (rev 0)
+++ grass/trunk/vector/v.class/description.html	2008-02-22 18:07:52 UTC (rev 30293)
@@ -0,0 +1,50 @@
+<H2>DESCRIPTION</H2>
+
+<em>v.class</em> classifies vector attribute data into classes, for example for thematic mapping. Classification can be on a column or on an expression including several columns, all in the table linked to the vector map. The user indicates the number of classes desired and the algorithm to use for classification.
+
+Several algorithms are implemented for classification: equal interval, standard deviation, quantiles, equal probabilities, and a discontinuities algorithm developed by Jean-Pierre Grimmeau at the Free University of Brussels (ULB).
+
+It can be used to pipe class breaks into thematic mapping modules such as d.thematic.area (see example below);
+
+<H2>NOTES</H2>
+
+<P>The <em>equal interval</em> algorithm simply divides the range max-min by the number of breaks to determine the interval between class breaks.</P>
+
+<P>The <em>quantiles</em> algorithm creates classes which all contain approximately the same number of observations.</P>
+
+<P>The <em>standard deviations</em> algorithm creates class breaks which are a combination of the mean +/- the standard deviation. It calculates a scale factor (<1) by which to multiply the standard deviation in order for all of the class breaks to fall into the range min-max of the data values.</P>
+
+<P>The <em>equiprobabilites</em> algorithm creates classes that would be equiprobable if the distribution was normal. If some of the class breaks fall outside the range min-max of the data values, the algorithm prints a warning and reduces the number of breaks, but the probabilities used are those of the number of breaks asked for.</P>
+
+<P>The <em>discont</em> algorithm systematically searches discontinuities in the slope of the cumulated frequencies curve, by approximating this curve through straight line segments whose vertices define the class breaks. The first approximation is a straight line which links the two end nodes of the curve. This line is then replaced by a two-segmented polyline whose central node is the point on the curve which is farthest from the preceding straight line. The point on the curve furthest from this new polyline is then chosen as a new node to create break up one of the two preceding segments, and so forth. The problem of the difference in terms of units between the two axes is solved by rescaling both amplitudes to an interval between 0 and 1. In the original algorithm, the process is stopped when the difference between the slopes of the two new segments is no longer significant (alpha = 0.05). As the slope is the ratio between the frequency and the amplitude of the corresponding interval, i.e. its density, this effectively tests whether the frequencies of the two newly proposed classes are different from those obtained by simply distributing the sum of their frequencies amongst them in proportion to the class amplitudes. In the GRASS implementation, the algorithm continues, but a warning is printed.</P>
+
+<H2>EXAMPLE</H2>
+
+Classify column pop of map communes into 5 classes using quantiles:
+
+<div class="code"><pre>
+v.class map=communes column=pop algo=qua nbclasses=5
+</pre></div>
+
+This example uses population and area to calculate a population density and to determine the density classes:
+
+<div class="code"><pre>
+v.class map=communes column=pop/area algo=std nbclasses=5
+</pre></div>
+
+
+The following example uses the output of d.class and feeds it directly into d.area.thematic:
+<div class="code"><pre>
+d.thematic.area -l map=communes2 data=pop/area breaks=`v.class -g map=communes2 column=pop/area algo=std nbcla=5` colors=0:0:255,50:100:255,255:100:50,255:0:0,156:0:0
+</pre></div>
+
+<H2>SEE ALSO</H2>
+
+<EM><A HREF="v.univar.html">v.univar</A></EM>
+<EM><A HREF="d.thematic.area.html">d.area.thematic</A></EM>
+
+
+<H2>AUTHOR</H2>
+
+Moritz Lennert
+

Added: grass/trunk/vector/v.class/main.c
===================================================================
--- grass/trunk/vector/v.class/main.c	                        (rev 0)
+++ grass/trunk/vector/v.class/main.c	2008-02-22 18:07:52 UTC (rev 30293)
@@ -0,0 +1,226 @@
+
+/***************************************************************
+ *
+ * MODULE:       v.class
+ * 
+ * AUTHOR(S):    Moritz Lennert
+ *               
+ * PURPOSE:      Create data classes, mainly for thematic mapping
+ *               
+ * COPYRIGHT:    (C) 2004-2007 by the GRASS Development Team
+ *
+ *               This program is free software under the 
+ *               GNU General Public License (>=v2). 
+ *               Read the file COPYING that comes with GRASS
+ *               for details.
+ *
+ **************************************************************/
+#include <stdlib.h>
+#include <string.h>
+#include <grass/gis.h>
+#include <grass/Vect.h>
+#include <grass/dbmi.h>
+#include <grass/glocale.h>
+#include <grass/arraystats.h>
+
+int main(int argc, char *argv[])
+{
+    struct GModule *module;
+    struct Option *map_opt, *field_opt, *col_opt, *where_opt;
+    struct Option *algo_opt, *nbclass_opt;
+    struct Flag *shell_flag;
+    char *mapset;
+    struct Map_info Map;
+    struct field_info *Fi;
+    dbDriver *Driver;
+    dbCatValArray Cvarr;
+    int ofield;
+    int nrec, ctype, nbclass, nbreaks, *frequencies;
+    int ret, i;
+    double finfo;
+    double *classbreaks, min, max, *data;
+    struct GASTATS stats;
+
+    module = G_define_module();
+    module->keywords = _("vector, statistics");
+    module->description =
+	_("Classifies attribute data, e.g. for thematic mapping");
+
+    map_opt = G_define_standard_option(G_OPT_V_MAP);
+
+    field_opt = G_define_standard_option(G_OPT_V_FIELD);
+
+    col_opt = G_define_option();
+    col_opt->key = "column";
+    col_opt->type = TYPE_STRING;
+    col_opt->required = YES;
+    col_opt->multiple = NO;
+    col_opt->description = _("Column name or expression");
+
+    where_opt = G_define_standard_option(G_OPT_WHERE);
+
+    algo_opt = G_define_option();
+    algo_opt->key = "algorithm";
+    algo_opt->type = TYPE_STRING;
+    algo_opt->required = YES;
+    algo_opt->multiple = NO;
+    algo_opt->options = "int,std,qua,equ,dis";
+    algo_opt->description = _("Algorithm to use for classification");
+    algo_opt->descriptions = _("int;simple intervals;"
+			       "std;standard deviations;"
+			       "qua;quantiles;"
+			       "equ;equiprobable (normal distribution);"
+			       "dis;discontinuities");
+
+    nbclass_opt = G_define_option();
+    nbclass_opt->key = "nbclasses";
+    nbclass_opt->type = TYPE_INTEGER;
+    nbclass_opt->required = YES;
+    nbclass_opt->multiple = NO;
+    nbclass_opt->description = _("Number of classes to define");
+
+    shell_flag = G_define_flag();
+    shell_flag->key = 'g';
+    shell_flag->description =
+	_("Print only class breaks (without min and max)");
+
+    G_gisinit(argv[0]);
+    if (G_parser(argc, argv))
+	exit(EXIT_FAILURE);
+
+    ofield = atoi(field_opt->answer);
+
+    /* open input vector */
+    if ((mapset = G_find_vector2(map_opt->answer, "")) == NULL)
+	G_fatal_error(_("Vector map <%s> not found"), map_opt->answer);
+
+    Vect_set_open_level(2);
+    Vect_open_old(&Map, map_opt->answer, mapset);
+
+    /* Read attributes */
+    db_CatValArray_init(&Cvarr);
+    Fi = Vect_get_field(&Map, ofield);
+    if (Fi == NULL) {
+	G_fatal_error(_("Unable to get layer info for vector map"));
+    }
+
+    Driver = db_start_driver_open_database(Fi->driver, Fi->database);
+    if (Driver == NULL)
+	G_fatal_error("Unable to open database <%s> by driver <%s>",
+		      Fi->database, Fi->driver);
+
+    /* Note do not check if the column exists in the table because it may be an expression */
+
+    nrec =
+	db_select_CatValArray(Driver, Fi->table, Fi->key, col_opt->answer,
+			      where_opt->answer, &Cvarr);
+    G_debug(2, "nrec = %d", nrec);
+
+    ctype = Cvarr.ctype;
+    if (ctype != DB_C_TYPE_INT && ctype != DB_C_TYPE_DOUBLE)
+	G_fatal_error(_("Column type not supported"));
+
+    if (nrec < 0)
+	G_fatal_error(_("Unable to select data from table"));
+
+    db_close_database_shutdown_driver(Driver);
+
+    ret = db_CatValArray_sort_by_value(&Cvarr);
+    if (ret == DB_FAILED)
+	G_fatal_error("Could not sort array of values..");
+
+
+    data = (double *)G_malloc((nrec) * sizeof(double));
+    if (ctype == DB_C_TYPE_INT) {
+	for (i = 0; i < nrec; i++)
+	    data[i] = Cvarr.value[i].val.i;
+    }
+    else {
+	for (i = 0; i < nrec; i++)
+	    data[i] = Cvarr.value[i].val.d;
+    }
+
+
+
+    nbclass = atoi(nbclass_opt->answer);
+    nbreaks = nbclass - 1;	/* we need one less classbreaks (min and max exluded) than classes */
+
+    classbreaks = (double *)G_malloc((nbreaks) * sizeof(double));
+    for (i = 0; i < nbreaks; i++)
+	classbreaks[i] = 0;
+     
+
+    if (G_strcasecmp(algo_opt->answer, "int") == 0)
+	finfo = class_interval(data, nrec, nbreaks, classbreaks);
+    else if (G_strcasecmp(algo_opt->answer, "std") == 0)
+	finfo = class_stdev(data, nrec, nbreaks, classbreaks);
+    else if (G_strcasecmp(algo_opt->answer, "qua") == 0)
+	finfo = class_quant(data, nrec, nbreaks, classbreaks);
+    else if (G_strcasecmp(algo_opt->answer, "equ") == 0)
+	finfo = class_equiprob(data, nrec, &nbreaks, classbreaks);
+    else if (G_strcasecmp(algo_opt->answer, "dis") == 0)
+	finfo = class_discont(data, nrec, nbreaks, classbreaks);
+    else
+	G_fatal_error("%s: Unknown algorithm", algo_opt->answer);
+
+    if (finfo == 0)
+	G_fatal_error(_("%s: Error in classification algorithm"),
+		      algo_opt->answer);
+
+
+
+    /*output to be piped to other modules ? */
+    if (shell_flag->answer) {
+
+	for (i = 0; i < nbreaks - 1; i++)
+	    fprintf(stdout, "%f,", classbreaks[i]);
+	fprintf(stdout, "%f", classbreaks[nbreaks - 1]);
+	fprintf(stdout, "\n");
+
+    }
+    else {
+
+	frequencies = (int *)G_malloc((nbreaks + 1) * sizeof(int));
+	for (i = 0; i < nbreaks + 1; i++)
+	    frequencies[i] = 0;
+
+	ret = class_frequencies(data, nrec, nbreaks, classbreaks, frequencies);
+	basic_stats(data, nrec, &stats);
+
+	min = data[0];
+	max = data[nrec - 1];
+
+
+	fprintf(stdout, _("\nClassification of %s into %i classes\n"),
+		col_opt->answer, nbclass);
+	fprintf(stdout, _("Using algorithm: *** %s ***\n"), algo_opt->answer);
+	fprintf(stdout, _("Mean: %f\tStandard deviation = %f\n"), stats.mean,
+		stats.stdev);
+
+	if (G_strcasecmp(algo_opt->answer, "dis") == 0)
+	    fprintf(stdout, _("Last chi2 = %f\n"), finfo);
+	if (G_strcasecmp(algo_opt->answer, "std") == 0)
+	    fprintf(stdout, _("Stdev multiplied by %.4f to define step\n"), finfo);
+	fprintf(stdout, "\n");
+	fprintf(stdout, _("%15s%15s%15s\n\n"), "From (excl.)", "To (incl.)",
+		"Frequency");
+	fprintf(stdout, "%15.5f%15.5f%15i\n", min, classbreaks[0],
+		frequencies[0]);
+
+	for (i = 1; i < nbreaks; i++) {
+	    fprintf(stdout, "%15.5f%15.5f%15i\n",
+		    classbreaks[i - 1], classbreaks[i], frequencies[i]);
+	}
+	fprintf(stdout, "%15.5f%15.5f%15i\n",
+		classbreaks[nbreaks - 1], max, frequencies[nbreaks]);
+
+	fprintf(stdout, _("\nNote: Minimum of first class is including\n\n"));
+    }
+
+
+    fflush(stdout);
+
+    Vect_close(&Map);
+
+    exit(EXIT_SUCCESS);
+}



More information about the grass-commit mailing list