[GRASS-SVN] r32738 - in grass/branches/develbranch_6: display/d.thematic.area include lib/arraystats vector/v.class

svn_grass at osgeo.org svn_grass at osgeo.org
Wed Aug 13 05:30:59 EDT 2008


Author: mlennert
Date: 2008-08-13 05:30:59 -0400 (Wed, 13 Aug 2008)
New Revision: 32738

Modified:
   grass/branches/develbranch_6/display/d.thematic.area/Makefile
   grass/branches/develbranch_6/display/d.thematic.area/area.c
   grass/branches/develbranch_6/display/d.thematic.area/local_proto.h
   grass/branches/develbranch_6/display/d.thematic.area/main.c
   grass/branches/develbranch_6/include/arraystats.h
   grass/branches/develbranch_6/lib/arraystats/basic.c
   grass/branches/develbranch_6/lib/arraystats/class.c
   grass/branches/develbranch_6/vector/v.class/main.c
Log:
backport from trunk


Modified: grass/branches/develbranch_6/display/d.thematic.area/Makefile
===================================================================
--- grass/branches/develbranch_6/display/d.thematic.area/Makefile	2008-08-13 09:13:40 UTC (rev 32737)
+++ grass/branches/develbranch_6/display/d.thematic.area/Makefile	2008-08-13 09:30:59 UTC (rev 32738)
@@ -3,8 +3,8 @@
 
 PGM = d.thematic.area
 
-LIBES        = $(DISPLAYLIB) $(RASTERLIB) $(VECTLIB) $(DBMILIB) $(GISLIB) $(SYMBLIB)
-DEPENDENCIES = $(DISPLAYDEP) $(RASTERDEP) $(VECTDEP) $(DBMIDEP) $(GISDEP) $(SYMBDEP)
+LIBES        = $(ARRAYSTATSLIB) $(DISPLAYLIB) $(RASTERLIB) $(VECTLIB) $(DBMILIB) $(GISLIB) $(SYMBLIB)
+DEPENDENCIES = $(ARRAYSTATSDEP) $(DISPLAYDEP) $(RASTERDEP) $(VECTDEP) $(DBMIDEP) $(GISDEP) $(SYMBDEP)
 EXTRA_INC = $(VECT_INC)
 EXTRA_CFLAGS = $(VECT_CFLAGS)
 

Modified: grass/branches/develbranch_6/display/d.thematic.area/area.c
===================================================================
--- grass/branches/develbranch_6/display/d.thematic.area/area.c	2008-08-13 09:13:40 UTC (rev 32737)
+++ grass/branches/develbranch_6/display/d.thematic.area/area.c	2008-08-13 09:30:59 UTC (rev 32738)
@@ -16,8 +16,7 @@
 int dareatheme(struct Map_info *Map, struct cat_list *Clist,
 	       dbCatValArray * cvarr, double *breaks, int nbreaks,
 	       const struct color_rgb *colors, const struct color_rgb *bcolor,
-	       int chcat, struct Cell_head *window, int default_width,
-	       int *frequencies, int nodraw)
+	       int chcat, struct Cell_head *window, int default_width)
 {
 
     int num, area, isle, n_isles, n_points;
@@ -34,7 +33,6 @@
     IPoints = Vect_new_line_struct();
     Cats = Vect_new_cats_struct();
 
-
     num = Vect_get_num_areas(Map);
     G_debug(2, "n_areas = %d", num);
 
@@ -148,33 +146,32 @@
 	    }
 	}
 
-	/* find out into which class breakval falls */
-	i = 0;
-	while (breakval > breaks[i] && i < nbreaks)
-	    i++;
-	frequencies[i]++;
+        /* find out into which class breakval falls */
+        i = 0;
+        while (breakval > breaks[i] && i < nbreaks)
+        i++;
 
-	if (!nodraw) {
-	    /* plot polygon in class color */
-	    R_RGB_color(colors[i].r, colors[i].g, colors[i].b);
-	    plot_polygon(Points->x, Points->y, Points->n_points);
 
-	    /* XXX rewrite boundary */
-	    if (bcolor) {
-		int i;
 
-		Vect_get_area_points(Map, area, Points);
-		R_RGB_color(bcolor->r, bcolor->g, bcolor->b);
+	/* plot polygon in class color */
+	R_RGB_color(colors[i].r, colors[i].g, colors[i].b);
+	plot_polygon(Points->x, Points->y, Points->n_points);
+
+	/* XXX rewrite boundary */
+	if (bcolor) {
+	    int i;
+
+	    Vect_get_area_points(Map, area, Points);
+	    R_RGB_color(bcolor->r, bcolor->g, bcolor->b);
+	    /*use different user defined render methods */
+	    plot_polyline(Points->x, Points->y, Points->n_points);
+	    for (i = 0; i < n_isles; i++) {
+		isle = Vect_get_area_isle(Map, area, i);
+		Vect_get_isle_points(Map, isle, Points);
 		/*use different user defined render methods */
 		plot_polyline(Points->x, Points->y, Points->n_points);
-		for (i = 0; i < n_isles; i++) {
-		    isle = Vect_get_area_isle(Map, area, i);
-		    Vect_get_isle_points(Map, isle, Points);
-		    /*use different user defined render methods */
-		    plot_polyline(Points->x, Points->y, Points->n_points);
-		}
 	    }
-	}			/* end if !nodraw */
+	}
     }				/* end for loop over areas */
 
     Vect_destroy_line_struct(Points);

Modified: grass/branches/develbranch_6/display/d.thematic.area/local_proto.h
===================================================================
--- grass/branches/develbranch_6/display/d.thematic.area/local_proto.h	2008-08-13 09:13:40 UTC (rev 32737)
+++ grass/branches/develbranch_6/display/d.thematic.area/local_proto.h	2008-08-13 09:30:59 UTC (rev 32738)
@@ -9,8 +9,7 @@
 	  int, int, int, int, char *, int, char *, double);
 int dareatheme(struct Map_info *, struct cat_list *, dbCatValArray *,
 	       double *, int, const struct color_rgb *,
-	       const struct color_rgb *, int, struct Cell_head *, int, int *,
-	       int);
+	       const struct color_rgb *, int, struct Cell_head *, int);
 void plot_polygon(double *, double *, int);
 void plot_polyline(double *, double *, int);
 

Modified: grass/branches/develbranch_6/display/d.thematic.area/main.c
===================================================================
--- grass/branches/develbranch_6/display/d.thematic.area/main.c	2008-08-13 09:13:40 UTC (rev 32737)
+++ grass/branches/develbranch_6/display/d.thematic.area/main.c	2008-08-13 09:30:59 UTC (rev 32738)
@@ -25,6 +25,7 @@
 #include <grass/colors.h>
 #include <grass/dbmi.h>
 #include <grass/glocale.h>
+#include <grass/arraystats.h>
 #include "plot.h"
 #include "local_proto.h"
 
@@ -34,7 +35,7 @@
     char *mapset;
     int ret, level;
     int i, stat = 0;
-    int nclass, nbreaks, *frequencies;
+    int nclass = 0, nbreaks, *frequencies, boxsize, textsize, ypos;
     int chcat = 0;
     int r, g, b;
     int has_color = 0;
@@ -46,16 +47,19 @@
     struct Option *map_opt;
     struct Option *column_opt;
     struct Option *breaks_opt;
+    struct Option *algo_opt;
+    struct Option *nbclass_opt;
     struct Option *colors_opt;
     struct Option *bcolor_opt;
     struct Option *bwidth_opt;
     struct Option *where_opt;
     struct Option *field_opt;
     struct Option *render_opt;
-    struct Flag *legend_flag, *x_flag, *nodraw_flag;
+    struct Option *legend_file_opt;
+    struct Flag *legend_flag, *algoinfo_flag, *nodraw_flag;
 
     struct cat_list *Clist;
-    int *cats, ncat, nrec;
+    int *cats, ncat, nrec, ctype;
     struct Map_info Map;
     struct field_info *fi;
     dbDriver *driver;
@@ -63,7 +67,10 @@
     dbCatValArray cvarr;
     struct Cell_head window;
     BOUND_BOX box;
-    double overlap, *breakpoints, min, max;
+    double overlap, *breakpoints, min = 0, max = 0, *data = NULL, class_info =
+	0.0;
+    struct GASTATS stats;
+    FILE *fd;
 
     /* Initialize the GIS calls */
     G_gisinit(argv[0]);
@@ -86,17 +93,38 @@
     breaks_opt = G_define_option();
     breaks_opt->key = "breaks";
     breaks_opt->type = TYPE_STRING;
-    breaks_opt->required = YES;
+    breaks_opt->required = NO;
     breaks_opt->multiple = YES;
     breaks_opt->description = _("Class breaks, without minimum and maximum");
 
+    algo_opt = G_define_option();
+    algo_opt->key = "algorithm";
+    algo_opt->type = TYPE_STRING;
+    algo_opt->required = NO;
+    algo_opt->multiple = NO;
+    algo_opt->options = "int,std,qua,equ,dis";
+    algo_opt->description = _("Algorithm to use for classification");
+    algo_opt->descriptions = _("int;simple intervals;"
+			       "std;standard deviations;"
+			       "qua;quantiles;"
+			       "equ;equiprobable (normal distribution);");
+/*currently disabled because of bugs       "dis;discontinuities");*/
+
+    nbclass_opt = G_define_option();
+    nbclass_opt->key = "nbclasses";
+    nbclass_opt->type = TYPE_INTEGER;
+    nbclass_opt->required = NO;
+    nbclass_opt->multiple = NO;
+    nbclass_opt->description = _("Number of classes to define");
+
     colors_opt = G_define_option();
     colors_opt->key = "colors";
     colors_opt->type = TYPE_STRING;
     colors_opt->required = YES;
     colors_opt->multiple = YES;
-    colors_opt->description = _("Colors (one more than class breaks).");
-    colors_opt->gisprompt = GISPROMPT_COLOR;
+    colors_opt->description = _("Colors (one per class).");
+    /* This won't work. We would need multiple color prompt.
+     * colors_opt->gisprompt = GISPROMPT_COLOR; */
 
     field_opt = G_define_standard_option(G_OPT_V_FIELD);
     field_opt->description =
@@ -126,50 +154,47 @@
     render_opt->required = NO;
     render_opt->multiple = NO;
     render_opt->answer = "l";
-    render_opt->options = "g,r,d,c,l";
+    render_opt->options = "d,c,l";
     render_opt->description = _("Rendering method for filled polygons");
     render_opt->descriptions =
-	_("g;use the libgis render functions (features: clipping);"
-	  "r;use the raster graphics library functions (features: polylines);"
-	  "d;use the display library basic functions (features: polylines);"
+	_("d;use the display library basic functions (features: polylines);"
 	  "c;use the display library clipping functions (features: clipping);"
 	  "l;use the display library culling functions (features: culling, polylines)");
 
 
+    legend_file_opt = G_define_standard_option(G_OPT_F_OUTPUT);
+    legend_file_opt->key = "legendfile";
+    legend_file_opt->description =
+	_("File in which to save d.graph instructions for legend display");
+    legend_file_opt->required = NO;
 
     legend_flag = G_define_flag();
     legend_flag->key = 'l';
     legend_flag->description =
 	_("Create legend information and send to stdout");
 
+    algoinfo_flag = G_define_flag();
+    algoinfo_flag->key = 'e';
+    algoinfo_flag->description =
+	_("When printing legend info , include extended statistical info from classification algorithm");
+
     nodraw_flag = G_define_flag();
     nodraw_flag->key = 'n';
     nodraw_flag->description = _("Do not draw map, only output the legend");
 
-    x_flag = G_define_flag();
-    x_flag->key = 'x';
-    x_flag->description =
-	_("Don't add to list of vectors and commands in monitor "
-	  "(it won't be drawn if the monitor is refreshed)");
-
     /* Check command line */
     if (G_parser(argc, argv))
 	exit(EXIT_FAILURE);
 
-    if (G_strcasecmp(render_opt->answer, "g") == 0)
-	render = RENDER_GPP;
-    else if (G_strcasecmp(render_opt->answer, "r") == 0)
-	render = RENDER_RPA;
-    else if (G_strcasecmp(render_opt->answer, "d") == 0)
+    if (G_strcasecmp(render_opt->answer, "d") == 0)
 	render = RENDER_DP;
     else if (G_strcasecmp(render_opt->answer, "c") == 0)
 	render = RENDER_DPC;
     else if (G_strcasecmp(render_opt->answer, "l") == 0)
 	render = RENDER_DPL;
     else
-	render = RENDER_GPP;
+	G_fatal_error(_("Invalid rendering method <%s>"), render_opt->answer);
 
-
     if (G_verbose() > G_verbose_std())
 	verbose = TRUE;
 
@@ -177,7 +202,7 @@
 
     /* Read map options */
 
-    G_strcpy(map_name, map_opt->answer);
+    strcpy(map_name, map_opt->answer);
 
 
     /* Make sure map is available */
@@ -216,7 +241,8 @@
     /*Get CatValArray needed for plotting and for legend calculations */
     db_CatValArray_init(&cvarr);
     nrec = db_select_CatValArray(driver, fi->table, fi->key,
-				 column_opt->answer, NULL, &cvarr);
+				 column_opt->answer, where_opt->answer,
+				 &cvarr);
 
 
     G_debug(3, "nrec (%s) = %d", column_opt->answer, nrec);
@@ -229,8 +255,6 @@
 	G_fatal_error(_("Cannot select data (%s) from table"),
 		      column_opt->answer);
 
-    G_debug(2, "\n%d records selected from table", nrec);
-
     for (i = 0; i < cvarr.n_values; i++) {
 	G_debug(4, "cat = %d  %s = %d", cvarr.value[i].cat,
 		column_opt->answer,
@@ -272,24 +296,92 @@
 	G_fatal_error(_("Unknown color: [%s]"), bcolor_opt->answer);
     }
 
-    /*Get class breaks */
-    nbreaks = 0;
-    while (breaks_opt->answers[nbreaks] != NULL)
-	nbreaks++;
-    nclass = nbreaks + 1;	/*add one since breaks do not include min and max values */
-    G_debug(3, "nclass = %d", nclass);
 
-    breakpoints = (double *)G_malloc((nbreaks) * sizeof(double));
-    for (i = 0; i < nbreaks; i++)
-	breakpoints[i] = atof(breaks_opt->answers[i]);
+    /* if both class breaks and (algorithm or classnumber) are given, give precedence to class 
+     * breaks
+     */
 
-    frequencies = (int *)G_malloc((nbreaks + 1) * sizeof(int));
-    for (i = 0; i < nbreaks + 1; i++)
-	frequencies[i] = 0;
+    if (breaks_opt->answers) {
 
+	if (algo_opt->answer || nbclass_opt->answer)
+	    G_warning(_("You gave both manual breaks and a classification algorithm or a number of classes. The manual breaks have precedence and will thus be used."));
+
+
+	/*Get class breaks */
+	nbreaks = 0;
+	while (breaks_opt->answers[nbreaks] != NULL)
+	    nbreaks++;
+	nclass = nbreaks + 1;	/*add one since breaks do not include min and max values */
+	G_debug(3, "nclass = %d", nclass);
+
+	breakpoints = (double *)G_malloc((nbreaks) * sizeof(double));
+	for (i = 0; i < nbreaks; i++)
+	    breakpoints[i] = atof(breaks_opt->answers[i]);
+
+    }
+    else {
+
+	if (algo_opt->answer && nbclass_opt->answer) {
+
+	    ret = db_CatValArray_sort_by_value(&cvarr);
+	    if (ret == DB_FAILED)
+		G_fatal_error("Could not sort array of values..");
+
+
+	    data = (double *)G_malloc((nrec) * sizeof(double));
+	    for (i = 0; i < nrec; i++)
+		data[i] = 0.0;
+
+	    ctype = cvarr.ctype;
+	    if (ctype == DB_C_TYPE_INT) {
+		for (i = 0; i < nrec; i++)
+		    data[i] = cvarr.value[i].val.i;
+	    }
+	    else {
+		for (i = 0; i < nrec; i++)
+		    data[i] = cvarr.value[i].val.d;
+	    }
+
+
+	    /* min and max are needed later for the legend */
+	    if (cvarr.ctype == DB_C_TYPE_INT) {
+		min = cvarr.value[0].val.i;
+		max = cvarr.value[cvarr.n_values - 1].val.i;
+	    }
+	    else {
+		min = cvarr.value[0].val.d;
+		max = cvarr.value[cvarr.n_values - 1].val.d;
+	    }
+
+	    db_CatValArray_sort(&cvarr);
+
+	    nclass = atoi(nbclass_opt->answer);
+	    nbreaks = nclass - 1;	/* we need one less classbreaks (min and 
+					 * max exluded) than classes */
+
+	    breakpoints = (double *)G_malloc((nbreaks) * sizeof(double));
+	    for (i = 0; i < nbreaks; i++)
+		breakpoints[i] = 0;
+
+	    /* Get classbreaks for given algorithm and number of classbreaks.
+	     * class_info takes any info coming from the classification algorithms */
+	    class_info =
+		class_apply_algorithm(algo_opt->answer, data, nrec, &nbreaks,
+				      breakpoints);
+
+	}
+	else {
+
+	    G_fatal_error(_("You must either give classbreaks or a classification algorithm"));
+
+	}
+    };
+
+
     /* Fill colors */
     colors = (struct color_rgb *)G_malloc(nclass * sizeof(struct color_rgb));
 
+
     if (colors_opt->answers != NULL) {
 	for (i = 0; i < nclass; i++) {
 	    if (colors_opt->answers[i] == NULL)
@@ -308,80 +400,91 @@
     }
 
 
+    if (!nodraw_flag->answer) {
+	/* Now's let's prepare the actual plotting */
+	if (R_open_driver() != 0)
+	    G_fatal_error(_("No graphics device selected"));
 
-    /* Now's let's prepare the actual plotting */
-    if (R_open_driver() != 0)
-	G_fatal_error(_("No graphics device selected"));
+	D_setup(0);
 
-    D_setup(0);
+	if (verbose)
+	    G_message(_("Plotting ..."));
 
-    G_setup_plot(D_get_d_north(), D_get_d_south(),
-		 D_get_d_west(), D_get_d_east(), D_move_abs, D_cont_abs);
+	Vect_get_map_box(&Map, &box);
 
-    if (verbose)
-	G_message(_("Plotting ..."));
+	if (window.north < box.S || window.south > box.N ||
+	    window.east < box.W ||
+	    window.west > G_adjust_easting(box.E, &window)) {
+	    G_message(_("The bounding box of the map is outside the current region, "
+		       "nothing drawn."));
+	    stat = 0;
+	}
+	else {
+	    overlap =
+		G_window_percentage_overlap(&window, box.N, box.S, box.E,
+					    box.W);
+	    G_debug(1, "overlap = %f \n", overlap);
+	    if (overlap < 1)
+		Vect_set_constraint_region(&Map, window.north, window.south,
+					   window.east, window.west,
+					   PORT_DOUBLE_MAX, -PORT_DOUBLE_MAX);
 
-    Vect_get_map_box(&Map, &box);
+	    /* default line width */
+	    D_line_width(default_width);
 
-    if (window.north < box.S || window.south > box.N ||
-	window.east < box.W ||
-	window.west > G_adjust_easting(box.E, &window)) {
-	G_message(_("The bounding box of the map is outside the current region, "
-		   "nothing drawn."));
-	stat = 0;
-    }
-    else {
-	overlap =
-	    G_window_percentage_overlap(&window, box.N, box.S, box.E, box.W);
-	G_debug(1, "overlap = %f \n", overlap);
-	if (overlap < 1)
-	    Vect_set_constraint_region(&Map, window.north, window.south,
-				       window.east, window.west,
-				       PORT_DOUBLE_MAX, -PORT_DOUBLE_MAX);
 
-	/* default line width */
-	D_line_width(default_width);
+	    stat =
+		dareatheme(&Map, Clist, &cvarr, breakpoints, nbreaks, colors,
+			   has_color ? &bcolor : NULL, chcat, &window,
+			   default_width);
 
 
-	stat = dareatheme(&Map, Clist, &cvarr, breakpoints, nbreaks, colors,
-			  has_color ? &bcolor : NULL, chcat, &window,
-			  default_width, frequencies, nodraw_flag->answer);
+	    /* reset line width: Do we need to get line width from display
+	     * driver (not implemented)?  It will help restore previous line
+	     * width (not just 0) determined by another module (e.g.,
+	     * d.linewidth). */
+	    R_line_width(0);
 
+	}			/* end window check if */
 
-	/* reset line width: Do we need to get line width from display
-	 * driver (not implemented)?  It will help restore previous line
-	 * width (not just 0) determined by another module (e.g.,
-	 * d.linewidth). */
-	R_line_width(0);
+	R_close_driver();
 
-    }				/* end window check if */
+    }				/* end of nodraw_flag condition */
 
-    if (!x_flag->answer) {
-	D_add_to_list(G_recreate_command());
+    frequencies = (int *)G_malloc((nbreaks + 1) * sizeof(int));
+    for (i = 0; i < nbreaks + 1; i++)
+	frequencies[i] = 0.0;
+    class_frequencies(data, nrec, nbreaks, breakpoints, frequencies);
 
-	D_set_dig_name(G_fully_qualified_name(map_name, mapset));
-	D_add_to_dig_list(G_fully_qualified_name(map_name, mapset));
-    }
+    /*Get basic statistics about the data*/
+    basic_stats(data, nrec, &stats);
 
-    R_close_driver();
+    if (legend_flag->answer) {
 
+	if (algoinfo_flag->answer) {
 
 
-    if (legend_flag->answer) {
+	    fprintf(stdout, _("\nTotal number of records: %.0f\n"),
+		    stats.count);
+	    fprintf(stdout, _("Classification of %s into %i classes\n"),
+		    column_opt->answer, nbreaks + 1);
+	    fprintf(stdout, _("Using algorithm: *** %s ***\n"),
+		    algo_opt->answer);
+	    fprintf(stdout, _("Mean: %f\tStandard deviation = %f\n"),
+		    stats.mean, stats.stdev);
 
-	ret = db_CatValArray_sort_by_value(&cvarr);
-	if (cvarr.ctype == DB_C_TYPE_INT) {
-	    min = cvarr.value[0].val.i;
-	    max = cvarr.value[cvarr.n_values - 1].val.i;
+	    if (G_strcasecmp(algo_opt->answer, "dis") == 0)
+		fprintf(stdout, _("Last chi2 = %f\n"), class_info);
+	    if (G_strcasecmp(algo_opt->answer, "std") == 0)
+		fprintf(stdout,
+			_("Stdev multiplied by %.4f to define step\n"),
+			class_info);
+	    fprintf(stdout, "\n");
+
 	}
-	else {
-	    min = cvarr.value[0].val.d;
-	    max = cvarr.value[cvarr.n_values - 1].val.d;
-	}
 
-
 	fprintf(stdout, "%f|%f|%i|%d:%d:%d\n",
-		min, breakpoints[0], frequencies[0], colors[0].r, colors[0].g,
+		stats.min, breakpoints[0], frequencies[0], colors[0].r, colors[0].g,
 		colors[0].b);
 
 	for (i = 1; i < nbreaks; i++) {
@@ -390,12 +493,41 @@
 		    colors[i].r, colors[i].g, colors[i].b);
 	}
 	fprintf(stdout, "%f|%f|%i|%d:%d:%d\n",
-		breakpoints[nbreaks - 1], max, frequencies[nbreaks],
-		colors[0].r, colors[0].g, colors[0].b);
+		breakpoints[nbreaks - 1], stats.max, frequencies[nbreaks],
+		colors[nbreaks].r, colors[nbreaks].g, colors[nbreaks].b);
     }
 
+    if (legend_file_opt->answer) {
+	fd = fopen(legend_file_opt->answer, "w");
+	boxsize = 25;
+	textsize = boxsize / 10;
+	fprintf(fd, "size %i %i\n", textsize, textsize);
+	ypos = 10;
+	fprintf(fd, "symbol basic/box %i 5 %i black %d:%d:%d\n", boxsize,
+		ypos, colors[0].r, colors[0].g, colors[0].b);
+	fprintf(fd, "move 8 %f \n", ypos - textsize / 2.5);
+	fprintf(fd, "text %f - %f | %i\n", min, breakpoints[0],
+		frequencies[0]);
+	for (i = 1; i < nbreaks; i++) {
+	    ypos = 10 + i * 6;
+	    fprintf(fd, "symbol basic/box %i 5 %i black %d:%d:%d\n", boxsize,
+		    ypos, colors[i].r, colors[i].g, colors[i].b);
+	    fprintf(fd, "move 8 %f\n", ypos - textsize / 2.5);
+	    fprintf(fd, "text %f - %f | %i\n", breakpoints[i - 1],
+		    breakpoints[i], frequencies[i]);
+	}
+	ypos = 10 + i * 6;
+	fprintf(fd, "symbol basic/box %i 5 %i black %d:%d:%d\n", boxsize,
+		ypos, colors[nbreaks].r, colors[nbreaks].g,
+		colors[nbreaks].b);
+	fprintf(fd, "move 8 %f\n", ypos - textsize / 2.5);
+	fprintf(fd, "text %f - %f | %i\n", breakpoints[nbreaks - 1], max,
+		frequencies[nbreaks]);
+	fclose(fd);
+    }
+
     if (verbose)
-	G_done_msg("");
+	G_done_msg(" ");
 
     Vect_close(&Map);
     Vect_destroy_cat_list(Clist);

Modified: grass/branches/develbranch_6/include/arraystats.h
===================================================================
--- grass/branches/develbranch_6/include/arraystats.h	2008-08-13 09:13:40 UTC (rev 32737)
+++ grass/branches/develbranch_6/include/arraystats.h	2008-08-13 09:30:59 UTC (rev 32738)
@@ -1,6 +1,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
+#include <grass/gis.h>
 #include <grass/glocale.h>
 #include <grass/dbmi.h>
 
@@ -11,12 +12,14 @@
     double max;
     double sum;
     double sumsq;
+    double sumabs;
     double mean;
+    double meanabs;
     double var;
     double stdev;
 };
 
-
+double class_apply_algorithm(char *, double *, int, int *, double *);
 int class_interval(double *, int, int, double *);
 int class_quant(double *, int, int, double *);
 double class_discont(double *, int, int, double *);

Modified: grass/branches/develbranch_6/lib/arraystats/basic.c
===================================================================
--- grass/branches/develbranch_6/lib/arraystats/basic.c	2008-08-13 09:13:40 UTC (rev 32737)
+++ grass/branches/develbranch_6/lib/arraystats/basic.c	2008-08-13 09:30:59 UTC (rev 32738)
@@ -1,3 +1,4 @@
+#include <math.h>
 #include <grass/arraystats.h>
 
 
@@ -5,7 +6,7 @@
 void basic_stats(double *data, int count, struct GASTATS *stats)
 {
     int i = 1;
-    double sum = 0, sumsq = 0;
+    double sum = 0, sumsq = 0, sumabs = 0;
     double dev = 0, dev2 = 0;
 
     stats->count = count;
@@ -14,12 +15,15 @@
 
     for (i = 0; i < count; i++) {
 	sum += data[i];
+	sumabs += fabs(data[i]);
 	sumsq += data[i] * data[i];
     }
     stats->sum = sum;
+    stats->sumabs = sumabs;
     stats->sumsq = sumsq;
 
     stats->mean = stats->sum / stats->count;
+    stats->meanabs = stats->sumabs / stats->count;
     for (i = 0; i < count; i++) {
 	dev2 = dev2 + (data[i] - stats->mean) * (data[i] - stats->mean);
 	dev = dev + (data[i] - stats->mean);

Modified: grass/branches/develbranch_6/lib/arraystats/class.c
===================================================================
--- grass/branches/develbranch_6/lib/arraystats/class.c	2008-08-13 09:13:40 UTC (rev 32737)
+++ grass/branches/develbranch_6/lib/arraystats/class.c	2008-08-13 09:30:59 UTC (rev 32738)
@@ -1,7 +1,33 @@
 /* functions to classify sorted arrays of doubles and fill a vector of classbreaks */
 
+#include <grass/glocale.h>
 #include <grass/arraystats.h>
 
+double class_apply_algorithm(char *algo, double *data, int nrec, int *nbreaks,
+			     double *classbreaks)
+{
+    double finfo = 0.0;
+
+    if (G_strcasecmp(algo, "int") == 0)
+	finfo = class_interval(data, nrec, *nbreaks, classbreaks);
+    else if (G_strcasecmp(algo, "std") == 0)
+	finfo = class_stdev(data, nrec, *nbreaks, classbreaks);
+    else if (G_strcasecmp(algo, "qua") == 0)
+	finfo = class_quant(data, nrec, *nbreaks, classbreaks);
+    else if (G_strcasecmp(algo, "equ") == 0)
+	finfo = class_equiprob(data, nrec, nbreaks, classbreaks);
+    else if (G_strcasecmp(algo, "dis") == 0)
+	    /*	finfo = class_discont(data, nrec, *nbreaks, classbreaks); disabled because of bugs */
+        G_fatal_error(_("Discont algorithm currently not available because of bugs"));
+    else
+	G_fatal_error(_("%s: Unknown algorithm"), algo);
+
+    if (finfo == 0)
+	G_fatal_error(_("%s: Error in classification algorithm"), algo);
+
+    return finfo;
+}
+
 int class_interval(double *data, int count, int nbreaks, double *classbreaks)
 {
     double min, max;
@@ -214,11 +240,11 @@
     return (1);
 }
 
-
+/* FixMe: there seems to a problem with array overflow, probably due to the fact that the code was ported from fortran which has 1-based arrays*/
 double class_discont(double *data, int count, int nbreaks,
 		     double *classbreaks)
 {
-    int *num, nbclass, maxclass = 0;
+    int *num, nbclass;
     double *no, *zz, *nz, *xn, *co;
     double *x;			//Vecteur des observations standardisées
     int i, j, k;
@@ -233,7 +259,7 @@
     int im = 0, ji = 0;
     int tmp = 0;
     int nff = 0, jj = 0, no1 = 0, no2 = 0;
-    double f = 0, xt1 = 0, xt2 = 0, chi2 = 0, xnj_1 = 0, xj_1 = 0;
+    double f = 0, xt1 = 0, xt2 = 0, chi2 = 1000.0, xnj_1 = 0, xj_1 = 0;
 
 
     /*get the number of values */
@@ -382,16 +408,10 @@
 	    xt2 -= xt1;
 	}
 
-	/*if new class break not statistically significant (alpha > 0.05), give warning */
-	if (maxclass == 0) {
+	/* calculate chi-square to indicate statistical significance of new class, i.e. how probable would it be that the new class could be the result of purely random choice */
+	if (chi2 > pow((double)((no1 - no2) - (xt1 - xt2)), 2) / (xt1 + xt2))
 	    chi2 = pow((double)((no1 - no2) - (xt1 - xt2)), 2) / (xt1 + xt2);
-	    if (chi2 < 3.84148) {
 
-		G_warning(_("discontinuities algorithm: %i class breaks or more are not statistically significant at alpha=0.05"),
-			  i);
-		maxclass = 1;
-	    }
-	}
     }
 
     /*  Fill up classbreaks of i <=nbclass classes */

Modified: grass/branches/develbranch_6/vector/v.class/main.c
===================================================================
--- grass/branches/develbranch_6/vector/v.class/main.c	2008-08-13 09:13:40 UTC (rev 32737)
+++ grass/branches/develbranch_6/vector/v.class/main.c	2008-08-13 09:30:59 UTC (rev 32738)
@@ -69,8 +69,8 @@
     algo_opt->descriptions = _("int;simple intervals;"
 			       "std;standard deviations;"
 			       "qua;quantiles;"
-			       "equ;equiprobable (normal distribution);"
-			       "dis;discontinuities");
+			       "equ;equiprobable (normal distribution);");
+/*			       "dis;discontinuities"); currently disabled because of bugs*/
 
     nbclass_opt = G_define_option();
     nbclass_opt->key = "nbclasses";
@@ -100,16 +100,18 @@
     /* Read attributes */
     db_CatValArray_init(&Cvarr);
     Fi = Vect_get_field(&Map, ofield);
+
     if (Fi == NULL) {
 	G_fatal_error(_("Unable to get layer info for vector map"));
     }
+    Vect_close(&Map);
 
     Driver = db_start_driver_open_database(Fi->driver, Fi->database);
     if (Driver == NULL)
 	G_fatal_error("Unable to open database <%s> by driver <%s>",
 		      Fi->database, Fi->driver);
 
-    /* Note do not check if the column exists in the table because it may be an expression */
+    /* Note: do not check if the column exists in the table because it may be an expression */
 
     nrec =
 	db_select_CatValArray(Driver, Fi->table, Fi->key, col_opt->answer,
@@ -131,6 +133,9 @@
 
 
     data = (double *)G_malloc((nrec) * sizeof(double));
+    for (i = 0; i < nrec; i++)
+	data[i] = 0.0;
+
     if (ctype == DB_C_TYPE_INT) {
 	for (i = 0; i < nrec; i++)
 	    data[i] = Cvarr.value[i].val.i;
@@ -149,26 +154,17 @@
     for (i = 0; i < nbreaks; i++)
 	classbreaks[i] = 0;
 
+    /* Get classbreaks for given algorithm and number of classbreaks.
+     * finfo takes any info coming from the classification algorithms
+     * equ algorithm can alter number of class breaks */
+    finfo =
+	class_apply_algorithm(algo_opt->answer, data, nrec, &nbreaks,
+			      classbreaks);
 
-    if (G_strcasecmp(algo_opt->answer, "int") == 0)
-	finfo = class_interval(data, nrec, nbreaks, classbreaks);
-    else if (G_strcasecmp(algo_opt->answer, "std") == 0)
-	finfo = class_stdev(data, nrec, nbreaks, classbreaks);
-    else if (G_strcasecmp(algo_opt->answer, "qua") == 0)
-	finfo = class_quant(data, nrec, nbreaks, classbreaks);
-    else if (G_strcasecmp(algo_opt->answer, "equ") == 0)
-	finfo = class_equiprob(data, nrec, &nbreaks, classbreaks);
-    else if (G_strcasecmp(algo_opt->answer, "dis") == 0)
-	finfo = class_discont(data, nrec, nbreaks, classbreaks);
-    else
-	G_fatal_error("%s: Unknown algorithm", algo_opt->answer);
 
-    if (finfo == 0)
-	G_fatal_error(_("%s: Error in classification algorithm"),
-		      algo_opt->answer);
+    if (G_strcasecmp(algo_opt->answer, "dis") == 0 && finfo < 3.84148)
+	G_warning(_("The discontinuities algorithm indicates that some class breaks are not statistically significant at alpha=0.05. You are advised to reduce the number of classes."));
 
-
-
     /*output to be piped to other modules ? */
     if (shell_flag->answer) {
 
@@ -191,15 +187,18 @@
 	min = data[0];
 	max = data[nrec - 1];
 
-
+	/* as equ algorithm can modify number of breaks we recalculate number of
+	 * classes
+	 */
 	fprintf(stdout, _("\nClassification of %s into %i classes\n"),
-		col_opt->answer, nbclass);
+		col_opt->answer, nbreaks + 1);
 	fprintf(stdout, _("Using algorithm: *** %s ***\n"), algo_opt->answer);
 	fprintf(stdout, _("Mean: %f\tStandard deviation = %f\n"), stats.mean,
 		stats.stdev);
 
-	if (G_strcasecmp(algo_opt->answer, "dis") == 0)
-	    fprintf(stdout, _("Last chi2 = %f\n"), finfo);
+	if (G_strcasecmp(algo_opt->answer, "dis") == 0) {
+	    fprintf(stdout, _("Lowest chi2 = %f\n"), finfo);
+	}
 	if (G_strcasecmp(algo_opt->answer, "std") == 0)
 	    fprintf(stdout, _("Stdev multiplied by %.4f to define step\n"),
 		    finfo);
@@ -220,9 +219,9 @@
     }
 
 
+
     fflush(stdout);
 
-    Vect_close(&Map);
 
     exit(EXIT_SUCCESS);
 }



More information about the grass-commit mailing list