[GRASS-SVN] r30282 - in grass/trunk/lib: . arraystats

svn_grass at osgeo.org svn_grass at osgeo.org
Fri Feb 22 05:17:32 EST 2008


Author: mlennert
Date: 2008-02-22 05:17:32 -0500 (Fri, 22 Feb 2008)
New Revision: 30282

Added:
   grass/trunk/lib/arraystats/
   grass/trunk/lib/arraystats/Makefile
   grass/trunk/lib/arraystats/basic.c
   grass/trunk/lib/arraystats/class.c
Log:
Beginning of a library of statstics for arrays of doubles


Added: grass/trunk/lib/arraystats/Makefile
===================================================================
--- grass/trunk/lib/arraystats/Makefile	                        (rev 0)
+++ grass/trunk/lib/arraystats/Makefile	2008-02-22 10:17:32 UTC (rev 30282)
@@ -0,0 +1,10 @@
+
+MODULE_TOPDIR = ../..
+
+EXTRA_LIBS=$(GISLIB)
+
+LIB_NAME = $(ARRAYSTATS_LIBNAME)
+
+include $(MODULE_TOPDIR)/include/Make/Lib.make
+
+default: lib

Added: grass/trunk/lib/arraystats/basic.c
===================================================================
--- grass/trunk/lib/arraystats/basic.c	                        (rev 0)
+++ grass/trunk/lib/arraystats/basic.c	2008-02-22 10:17:32 UTC (rev 30282)
@@ -0,0 +1,63 @@
+#include <grass/arraystats.h>
+
+
+/*provides basic univar stats */
+void basic_stats(double *data, int count, struct GASTATS *stats)
+{
+    int i = 1;
+    double sum = 0, sumsq = 0;
+    double dev = 0, dev2 = 0;
+
+    stats->count = count;
+    stats->min = data[0];
+    stats->max = data[count - 1];
+
+    for (i = 0; i < count; i++) {
+	sum += data[i];
+	sumsq += data[i] * data[i];
+    }
+    stats->sum = sum;
+    stats->sumsq = sumsq;
+
+    stats->mean = stats->sum / stats->count;
+    for (i = 0; i < count; i++) {
+	dev2 = dev2 + (data[i] - stats->mean) * (data[i] - stats->mean);
+	dev = dev + (data[i] - stats->mean);
+    }
+
+
+    stats->var = (dev2 - (dev * dev / stats->count)) / stats->count;
+    stats->stdev = sqrt(stats->var);
+
+    return;
+}
+
+
+void eqdrt(double vectx[], double vecty[], int i1, int i2, double *vabc)
+{
+    double bn = 0, bd = 0, x1 = 0, y1 = 0;
+    vabc[0] = 0;
+    vabc[1] = 0;
+    vabc[2] = 0;
+    if (i1 == 0) {
+	x1 = 0;
+	y1 = 0;
+    }
+    else {
+	x1 = vectx[i1];
+	y1 = vecty[i1];
+    }
+    bn = y1 - vecty[i2];
+    bd = x1 - vectx[i2];
+    if (bd != 0) {
+	vabc[1] = bn / bd;
+	vabc[0] = y1 - vabc[1] * x1;
+	return;
+    }
+    if (bn != 0)
+	vabc[2] = x1;
+    else
+	G_debug(3, "Points are equal\n");
+    return;
+}
+

Added: grass/trunk/lib/arraystats/class.c
===================================================================
--- grass/trunk/lib/arraystats/class.c	                        (rev 0)
+++ grass/trunk/lib/arraystats/class.c	2008-02-22 10:17:32 UTC (rev 30282)
@@ -0,0 +1,428 @@
+/* functions to classify sorted arrays of doubles and fill a vector of classbreaks */
+
+#include <grass/arraystats.h>
+
+int class_interval(double *data, int count, int nbreaks, double *classbreaks)
+{
+    double min, max;
+    double step;
+    int i = 0;
+
+    min = data[0];
+    max = data[count - 1];
+
+    step = (max - min) / (nbreaks + 1);
+
+    for (i = 0; i < nbreaks; i++)
+	classbreaks[i] = min + (step * (i + 1));
+
+    return (1);
+}
+
+double class_stdev(double *data, int count, int nbreaks, double *classbreaks)
+{
+    struct GASTATS stats;
+    int i;
+    int nbclass;
+    double scale = 1.0;
+
+    basic_stats(data, count, &stats);
+
+    nbclass = nbreaks + 1;
+
+    if (nbclass % 2 == 1) {	/* number of classes is uneven so we center middle class on mean */
+
+	/* find appropriate fraction of stdev for step */
+	i = 1;
+	while (i) {
+	    if (((stats.mean + stats.stdev * scale / 2) +
+		 (stats.stdev * scale * (nbclass / 2 - 1)) > stats.max) ||
+		((stats.mean - stats.stdev * scale / 2) -
+		 (stats.stdev * scale * (nbclass / 2 - 1)) < stats.min))
+		scale = scale / 2;
+	    else
+		i = 0;
+	}
+
+	/* classbreaks below the mean */
+	for (i = 0; i < nbreaks / 2; i++)
+	    classbreaks[i] =
+		(stats.mean - stats.stdev * scale / 2) -
+		stats.stdev * scale * (nbreaks / 2 - (i + 1));
+	/* classbreaks above the mean */
+	for (i = i; i < nbreaks; i++)
+	    classbreaks[i] =
+		(stats.mean + stats.stdev * scale / 2) +
+		stats.stdev * scale * (i - nbreaks / 2);
+
+    }
+    else {			/* number of classes is even so mean is a classbreak */
+
+	/* decide whether to use 1*stdev or 0.5*stdev as step */
+	i = 1;
+	while (i) {
+	    if (((stats.mean) + (stats.stdev * scale * (nbclass / 2 - 1)) >
+		 stats.max) ||
+		((stats.mean) - (stats.stdev * scale * (nbclass / 2 - 1)) <
+		 stats.min))
+		scale = scale / 2;
+	    else
+		i = 0;
+	}
+
+	/* classbreaks below the mean and on the mean */
+	for (i = 0; i <= nbreaks / 2; i++)
+	    classbreaks[i] =
+		stats.mean - stats.stdev * scale * (nbreaks / 2 - i);
+	/* classbreaks above the mean */
+	for (i = i; i < nbreaks; i++)
+	    classbreaks[i] =
+		stats.mean + stats.stdev * scale * (i - nbreaks / 2);
+    }
+
+    return (scale);
+}
+
+int class_quant(double *data, int count, int nbreaks, double *classbreaks)
+{
+    int i, step;
+
+    step = count / (nbreaks + 1);
+
+    for (i = 0; i < nbreaks; i++)
+	classbreaks[i] = data[step * (i + 1)];
+
+    return (1);
+}
+
+
+int class_equiprob(double *data, int count, int *nbreaks, double *classbreaks)
+{
+    int i, j;
+    double *lequi;		/*Vector of scale factors for probabilities of the normal distribution */
+    struct GASTATS stats;
+    int nbclass;
+
+    nbclass = *nbreaks + 1;
+
+    lequi = G_malloc(*nbreaks * sizeof(double));
+
+    /*The following values come from the normal distribution and 
+     * will be used as: 
+     * classbreak[i] = (lequi[i] * stdev) + mean;
+     */
+
+    if (nbclass < 3) {
+	lequi[0] = 0;
+    }
+    else if (nbclass == 3) {
+	lequi[0] = -0.43076;
+	lequi[1] = 0.43076;
+    }
+    else if (nbclass == 4) {
+	lequi[0] = -0.6745;
+	lequi[1] = 0;
+	lequi[2] = 0.6745;
+    }
+    else if (nbclass == 5) {
+	lequi[0] = -0.8416;
+	lequi[1] = -0.2533;
+	lequi[2] = 0.2533;
+	lequi[3] = 0.8416;
+    }
+    else if (nbclass == 6) {
+	lequi[0] = -0.9676;
+	lequi[1] = -0.43076;
+	lequi[2] = 0;
+	lequi[3] = 0.43076;
+	lequi[4] = 0.9676;
+    }
+    else if (nbclass == 7) {
+	lequi[0] = -1.068;
+	lequi[1] = -0.566;
+	lequi[2] = -0.18;
+	lequi[3] = 0.18;
+	lequi[4] = 0.566;
+	lequi[5] = 1.068;
+    }
+    else if (nbclass == 8) {
+	lequi[0] = -1.1507;
+	lequi[1] = -0.6745;
+	lequi[2] = -0.3187;
+	lequi[3] = 0;
+	lequi[4] = 0.3187;
+	lequi[5] = 0.6745;
+	lequi[6] = 1.1507;
+    }
+    else if (nbclass == 9) {
+	lequi[0] = -1.2208;
+	lequi[1] = -0.7648;
+	lequi[2] = -0.4385;
+	lequi[3] = -0.1397;
+	lequi[4] = 0.1397;
+	lequi[5] = 0.4385;
+	lequi[6] = 0.7648;
+	lequi[7] = 1.2208;
+    }
+    else if (nbclass == 10) {
+	lequi[0] = -1.28155;
+	lequi[1] = -0.84162;
+	lequi[2] = -0.5244;
+	lequi[3] = -0.25335;
+	lequi[4] = 0;
+	lequi[5] = 0.25335;
+	lequi[6] = 0.5244;
+	lequi[7] = 0.84162;
+	lequi[8] = 1.28155;
+    }
+    else {
+	G_fatal_error
+	    ("Equiprobable classbreaks currently limited to 10 classes");
+    }
+
+    basic_stats(data, count, &stats);
+
+    /*check if any of the classbreaks would fall outside of the range min-max */
+    j = 0;
+    for (i = 0; i < *nbreaks; i++) {
+	if ((lequi[i] * stats.stdev + stats.mean) >= stats.min &&
+	    (lequi[i] * stats.stdev) + stats.mean <= stats.max) {
+	    j++;
+	}
+    }
+
+    if (j < (*nbreaks)) {
+	G_warning(_
+		  ("There are classbreaks outside the range min-max. Number of classes reduced to %i, but using probabilities for %i classes."),
+		  j + 1, *nbreaks + 1);
+	G_realloc(classbreaks, j * sizeof(double));
+	for (i = 0; i < j; i++)
+	    classbreaks[i] = 0;
+    }
+
+    j = 0;
+    for (i = 0; i < *nbreaks; i++) {
+	if ((lequi[i] * stats.stdev + stats.mean) >= stats.min &&
+	    (lequi[i] * stats.stdev) + stats.mean <= stats.max) {
+	    classbreaks[j] = lequi[i] * stats.stdev + stats.mean;
+	    j++;
+	}
+    }
+
+    *nbreaks = j;
+
+    G_free(lequi);
+    return (1);
+}
+
+
+double class_discont(double *data, int count, int nbreaks, double *classbreaks)
+{
+    int *num, nbclass, maxclass = 0;
+    double *no, *zz, *nz, *xn, *co;
+    double *x;			//Vecteur des observations standardisées
+    int i, j, k;
+    double min = 0, max = 0, rangemax = 0;
+    int n = 0;
+    double rangemin = 0, xlim = 0;
+    double dmax = 0.0, d2 = 0.0, dd = 0.0, p = 0.0;
+    int nf = 0, nmax = 0;
+    double *abc;
+    int nd = 0;
+    double den = 0, d = 0;
+    int im = 0, ji = 0;
+    int tmp = 0;
+    int nff = 0, jj = 0, no1 = 0, no2 = 0;
+    double f = 0, xt1 = 0, xt2 = 0, chi2 = 0, xnj_1 = 0, xj_1 = 0;
+
+
+    /*get the number of values */
+    n = count;
+
+    nbclass = nbreaks + 1;
+
+    num = G_malloc((nbclass + 1) * sizeof(int));
+    no = G_malloc((nbclass + 1) * sizeof(double));
+    zz = G_malloc((nbclass + 1) * sizeof(double));
+    nz = G_malloc(3 * sizeof(double));
+    xn = G_malloc((n + 1) * sizeof(double));
+    co = G_malloc((nbclass + 1) * sizeof(double));
+
+    /* We copy the array of values to x, in order to be able to standardize it */
+    x = G_malloc((n + 1) * sizeof(double));
+    x[0] = n;
+    xn[0] = 0;
+
+    min = data[0];
+    max = data[count - 1];
+    for (i = 1; i <= n; i++)
+	x[i] = data[i - 1];
+
+    rangemax = max - min;
+    rangemin = rangemax;
+
+    for (i = 2; i <= n; i++) {
+	if (x[i] != x[i - 1] && x[i] - x[i - 1] < rangemin)
+	    rangemin = x[i] - x[i - 1];	/* rangemin = minimal distance */
+    }
+
+    /* STANDARDIZATION 
+     * and creation of the number vector (xn) */
+
+    for (i = 1; i <= n; i++) {
+	x[i] = (x[i] - min) / rangemax;
+	xn[i] = i / (double)n;
+    }
+    xlim = rangemin / rangemax;
+    rangemin = rangemin / 2.0;
+
+    /* Searching for the limits */
+    num[1] = n;
+    abc = G_malloc(3 * sizeof(double));
+
+    /*     Loop through possible solutions */
+    for (i = 1; i <= nbclass; i++) {
+	nmax = 0;
+	dmax = 0.0;
+	d2 = 0.0;
+	nf = 0;			/*End number */
+
+	/*           Loop through classes */
+	for (j = 1; j <= i; j++) {
+	    nd = nf;		/*Start number */
+	    nf = num[j];
+	    co[j] = 10e37;
+	    eqdrt(x, xn, nd, nf, abc);
+	    den = sqrt(pow(abc[1], 2) + 1.0);
+	    nd++;
+	    /*              Loop through observations */
+	    for (k = nd; k <= nf; k++) {
+		if (abc[2] == 0.0)
+		    d = fabs((-1.0 * abc[1] * x[k]) + xn[k] - abc[0]) / den;
+		else
+		    d = fabs(x[k] - abc[2]);
+		d2 += pow(d, 2);
+		if (x[k] - x[nd] < xlim)
+		    continue;
+		if (x[nf] - x[k] < xlim)
+		    continue;
+		if (d <= dmax)
+		    continue;
+		dmax = d;
+		nmax = k;
+	    }
+	    nd--;	//A VERIFIER!
+	    if (x[nf] != x[nd]) {
+		if (nd != 0)
+		    co[j] = (xn[nf] - xn[nd]) / (x[nf] - x[nd]);
+		else
+		    co[j] = (xn[nf]) / (x[nf]);	//A VERIFIER!
+	    }
+	}
+	if (i == 1)
+	    dd = d2;
+	p = d2 / dd;
+	for (j = 1; j <= i; j++) {
+	    no[j] = num[j];
+	    zz[j] = x[num[j]] * rangemax + min;
+	    if (j == i)
+		continue;
+	    if (co[j] > co[j + 1]) {
+		zz[j] = zz[j] + rangemin;
+		continue;
+	    }
+	    zz[j] = zz[j] - rangemin;
+	    no[j] = no[j] - 1;
+	}
+	im = i - 1;
+	if (im != 0.0) {
+	    for (j = 1; j <= im; j++) {
+		ji = i + 1 - j;
+		no[ji] -= no[ji - 1];
+	    }
+	}
+	if (nmax == 0) {
+	    break;
+	}
+	nff = i + 2;
+	tmp = 0;
+	for (j = 1; j <= i; j++) {
+	    jj = nff - j;
+	    if (num[jj - 1] < nmax) {
+		num[jj] = nmax;
+		tmp = 1;
+		break;
+	    }
+	    num[jj] = num[jj - 1];
+	}
+	if (tmp == 0) {
+	    num[1] = nmax;
+	    jj = 1;
+	}
+	if (jj == 1) {
+	    xnj_1 = 0;
+	    xj_1 = 0;
+	}
+	else {
+	    xnj_1 = xn[num[jj - 1]];
+	    xj_1 = x[num[jj - 1]];
+	}
+	no1 = (xn[num[jj]] - xnj_1) * n;
+	no2 = (xn[num[jj + 1]] - xn[num[jj]]) * n;
+	f = (xn[num[jj + 1]] - xnj_1) / (x[num[jj + 1]] - xj_1);
+	f *= n;
+	xt1 = (x[num[jj]] - xj_1) * f;
+	xt2 = (x[num[jj + 1]] - x[num[jj]]) * f;
+	if (xt2 == 0) {
+	    xt2 = rangemin / 2.0 / rangemax * f;
+	    xt1 -= xt2;
+	}
+	else if (xt1 * xt2 == 0) {
+	    xt1 = rangemin / 2.0 / rangemax * f;
+	    xt2 -= xt1;
+	}
+
+	/*if new class break not statistically significant (alpha > 0.05), give warning */
+	if (maxclass == 0) {
+	    chi2 = pow((double)((no1 - no2) - (xt1 - xt2)), 2) / (xt1 + xt2);
+	    if (chi2 < 3.84148) {
+
+		G_warning(_
+			  ("discontinuities algorithm: %i class breaks or more are not statistically significant at alpha=0.05"),
+			  i);
+		maxclass = 1;
+	    }
+	}
+    }
+
+    /*  Fill up classbreaks of i <=nbclass classes */
+    for (j = 0; j <= (i - 1); j++)
+	classbreaks[j] = zz[j + 1];
+
+    return (chi2);
+}
+
+int class_frequencies(double *data, int count, int nbreaks,
+		      double *classbreaks, int *frequencies)
+{
+    int i, j;
+    double min, max;
+
+    min = data[0];
+    max = data[count - 1];
+    /* count cases in all classes, except for last class */
+    i = 0;
+    for (j = 0; j < nbreaks; j++) {
+	while (data[i] <= classbreaks[j]) {
+	    frequencies[j]++;
+	    i++;
+	}
+    }
+
+    /*Now count cases in last class */
+    for (i = i; i < count; i++) {
+	frequencies[nbreaks]++;
+    }
+
+    return (1);
+}



More information about the grass-commit mailing list