algorithm used for i.cluster

Harini Nagendra harini at ces.iisc.ernet.in
Mon Dec 8 06:18:42 EST 1997


Hello,

This is the algorithm used for i.cluster -  sorry for the delay....
	
The classification algorithm uses input parameters set by the user on the
initial number of clusters, the minimum distance between clusters, and the
correspondence between iterations which is desired, and minimum size for
each cluster. It also asks if you want all pixels to be clustered, or
every 'x'th row and 'y'th column, the correspondence between iterations
desired, and the max. no. of iterations to be carried out. 
	
In the 1st pass, initial cluster means for each band are defined by giving
the first cluster a value equal to the band mean minus its standard
deviation, and the last cluster a value equal to the band mean plus its
standard deviation, with all other cluster means distributed equally
spaced in between these. Each pixel is then assigned to the class which it
is closest to, distance being measured as Euclidean distance. All clusters
less than the user-specified minimum distance are then merged. If a
cluster has less than the user-specified minimum no. of pixels, all those
pixels are again reassigned to the next nearest cluster.New cluster means
are calculated for each band as the average of intensity values in that
band for all pixels present in that cluster.  
	
In the 2nd pass, pixels are then again reassigned to clusters based on new
cluster means. The cluster means are then again recalculated.  This
processis repeated until the correspondence between iterations reaches a
user-specified level, or till the max. number of iterations specified is
over, whichever comes 1st. 
	
There is only one thing I haven't been able to decipher in this algorithm
- if, in the 1st pass, most of the clusters are empty, then a new method
of assigning initial cluster means is used. I wasn't able to figure out
what this "new"  method is...... if anyone can, please let me know. 
	
Bill Borwn had sent me the codes, I am including them with this mail
(sorry if it makes it too lonnnnnnnnnnnnnnnnnnnng!) - if anyone wants to
check it out.

Harini 
------------------------------------------------------------------------
#include "imagery.h"
I_cluster_assign (C,interrupted)
    struct Cluster *C;
    int *interrupted;
{
    int p, c;
    int class, band;
    double d,q;
    double dmin;

/*
fprintf (stderr,"I_cluster_assign(npoints=%d,nclasses=%d,nbands=%d)\n",
    C->npoints, C->nclasses, C->nbands);
*/

    for (p = 0; p < C->npoints; p++)
    {
	if (*interrupted) return;

	for (c = 0; c < C->nclasses; c++)
	{
	    d = 0.0;
	    for (band = 0; band < C->nbands; band++)
	    {
		q = (double) C->points[band][p];
		q -= C->mean[band][c];
		d += q*q;
	    }
	    if (c == 0 || d < dmin)
	    {
		class = c;
		dmin = d;
	    }
	}
	C->class[p] = class;
	C->count[class]++;
	for (band = 0; band < C->nbands; band++)
	    C->sum[band][class] += (double) C->points[band][p] ;
    }
}


/************* README  ***************/
To merge 2 signatures

n1     number of points in sig 1
n2     number of points in sig 2

mean1[nbands]  means per band for sig 1
mean2[nbands]  means per band for sig 2

var1[b1][b2]   covariance band 1 with band 2 for sig 1
var2[b1][b2]   covariance band 1 with band 2 for sig 2

the meger is

n = n1+n2
mean[b] = (mean1[b]*n1 + mean2[b]*n2)/n

sum1 = var1[b1][b2] * (n1-1) + n1 * mean[b1] * mean[b2];
sum2 = var2[b1][b2] * (n2-1) + n2 * mean[b1] * mean[b2];

var[b1][b2] = (sum1+sum2 - n*mean[b1]*mean[b2) / (n-1)


/******************* c_exec.c *******************/

/***************************************************************
 *
 * I_cluster_exec (C, maxclass, iterations,
 *               convergence, separation, min_class_size,
 *               checkpoint, interrupted)
 *
 *  maxclass       maximum number of classes
 *  iterations     maximum number of iterations
 *  convergence    percentage of points stable
 *  separation     minimum distance between class centroids
 *  checkpoint     routine to be called at various steps
 *  interrupted    boolean to check for interrupt
 *
 * returns:
 *   0 ok
 *  -1 out of memory
 *  -2 interrupted
 *   1 not enough data points
 *************************************************************/
#include "imagery.h"

I_cluster_exec (C, maxclass, iterations, convergence, separation,
	      min_class_size, checkpoint, interrupted)
    struct Cluster *C;
    double convergence;
    double separation;
    int (*checkpoint)();
    int *interrupted;
{
    int changes;

/* set interrupted to false */
    *interrupted = 0;

/* check for valid inputs */
    if (C->npoints < 2)
    {
	fprintf (stderr, "cluster: not enough data points (%d)\n",
	    C->npoints);
	return 1;
    }

/* check other parms */
    if (maxclass < 0)
	maxclass = 1;
    C->nclasses = maxclass;

    if (min_class_size <= 0)
	min_class_size = 17;
    if (min_class_size < 2)
	min_class_size = 2;

    if (iterations <= 0)
	iterations = 20;
    if (convergence <= 0.0)
	convergence = 98.0;
    if (separation < 0.0)
	separation = 0.5;


/* allocate memory */
    if (!I_cluster_exec_allocate(C))
	return -1;


/* generate class means */
    I_cluster_means (C);
    if (checkpoint)
    	(*checkpoint) (C,1);

/* now assign points to nearest class */
    I_cluster_assign (C,interrupted);
    if (*interrupted) return -2;
    I_cluster_sum2(C);
    if (checkpoint)
    	(*checkpoint) (C,2);

/* get rid of empty classes now */
    I_cluster_reclass (C,1);

    for (C->iteration = 1; ; C->iteration++)
    {
	if (*interrupted) return -2;

	changes = 0;

/* re-assign points to nearest class */

	changes = I_cluster_reassign (C,interrupted);
	if (*interrupted) return -2;

/* if too many points have changed class, re-assign points */
	C->percent_stable = (C->npoints-changes) * 100.0;
	C->percent_stable /= (double) C->npoints;

	if (checkpoint)
	    (*checkpoint) (C,3);

	if (C->iteration >= iterations)
	    break;

	if (C->percent_stable < convergence)
	    continue;

/* otherwise merge non-distinct classes */

	if (I_cluster_distinct (C,separation))
	    break;

	if (checkpoint)
	    (*checkpoint) (C,4);

	I_cluster_merge (C) ;
    }

/* get rid of small classes */
    I_cluster_reclass (C,min_class_size);
    I_cluster_sum2(C);

/* compute the resulting signatures */
    I_cluster_signatures (C);


    return 0;
}

/******************* c_means.c *******************/

#include "imagery.h"
I_cluster_means (C)
    struct Cluster *C;
{
    int band;
    int class;
    double m,v; /* m=mean, v=variance then std dev */
    double s;
    double sqrt();

/*
fprintf(stderr,"I_cluster_means(nbands=%d,nclasses=%d)\n",C->nbands, C->nclasses);
*/
    for (band = 0; band < C->nbands; band++)
    {
	s = (double) C->band_sum[band] ;
	m = s / (double) C->npoints;
	v = (double) C->band_sum2[band]  - s * m;
	v = sqrt (v / (double) (C->npoints - 1));
	for (class = 0; class < C->nclasses; class++)
	    C->mean[band][class] = m;
	if (C->nclasses > 1)
	    for (class = 0; class < C->nclasses; class++)
		C->mean[band][class] +=
		    ((2.0 * class) / (C->nclasses-1) - 1.0) * v;
    }
}

/******************* c_sum2.c *******************/

#include "imagery.h"

/* compute sum of squares for each class */
I_cluster_sum2 (C)
    struct Cluster *C;
{
    int p, band, class;
    double q;

/*
fprintf (stderr, "I_cluster_sum2(npoints=%d,nclasses=%d,nbands=%d)\n", C->npoints, C->nclasses, C->nbands);
*/
    for (class=0; class < C->nclasses; class++)
	for (band = 0; band < C->nbands; band++)
	    C->sum2[band][class] = 0;

    for (p = 0; p < C->npoints; p++)
    {
	class = C->class[p];
	if (class < 0)
	    continue;
	for (band = 0; band < C->nbands; band++)
	{
	    q = (double)C->points[band][p];
	    C->sum2[band][class] += q*q;
	}
    }
}

/******************* c_reclass.c *******************/

#include "imagery.h"

I_cluster_reclass(C,minsize)
    struct Cluster *C;
{
    int band, c, hole, move, p;

    for (c=0; c < C->nclasses; c++)
	C->reclass[c] = c;

/* find first `empty' class */
    for (hole = 0; hole < C->nclasses; hole++)
	if (C->count[hole] < minsize)
	    break;

/* if none, just return */
    if (hole >= C->nclasses)
	return;

    for (move = hole; move < C->nclasses; move++)
	if (C->count[move] >= minsize)
	{
	    C->reclass[move] = hole;
	    C->count[hole] = C->count[move];
	    for (band = 0; band < C->nbands; band++)
		C->sum[band][hole] = C->sum[band][move];
	    hole++;
	}
	else
	    C->reclass[move] = -1;	/* eliminate this class */

    for (p = 0; p < C->npoints; p++)
	C->class[p] = C->reclass[C->class[p]];
    C->nclasses = hole;
}

/******************* c_reassign.c *******************/

#include "imagery.h"
I_cluster_reassign (C,interrupted)
    struct Cluster *C;
    int *interrupted;
{
    double min,d,z;
    int q;
    int c,np;
    int old;
    int p, band, class;
    int changes;
    int first;

    changes = 0;
    for (c = 0; c < C->nclasses; c++)
    {
	C->countdiff[c] = 0;
	for (band = 0; band < C->nbands; band++)
	    C->sumdiff[band][c] = 0;
    }

    for (p = 0; p < C->npoints; p++)
    {
	if (*interrupted) return 0;
	if (C->class[p] < 0)	/* point to be ignored */
	    continue;

/* find minimum distance to center of all classes */
	first = 1;
	for (c = 0; c < C->nclasses; c++)
	{
	    d = 0;
	    np = C->count[c];
	    if (np == 0) continue;
	    for (band = 0; band < C->nbands; band++)
	    {
		z =  C->points[band][p] * np - C->sum[band][c] ;
		d += z*z;
	    }
	    d /= (np*np);

	    if (first || (d < min))
	    {
		class = c;
		min = d;
		first = 0;
	    }
	}

	if (C->class[p] != class)
	{
	    old = C->class[p];
	    C->class[p] = class;
	    changes++;

	    C->countdiff[class]++;
	    C->countdiff[old]--;

	    for (band = 0; band < C->nbands; band++)
	    {
		q = (int) C->points[band][p];
		C->sumdiff[band][class] += q;
		C->sumdiff[band][old] -= q;
	    }
	}
    }

    if (changes)
    {
	for (c=0; c < C->nclasses; c++)
	{	
	    C->count[c] += C->countdiff[c];
	    for (band = 0; band < C->nbands; band++)
		C->sum[band][c] += C->sumdiff[band][c];
	}
    }

    return changes ;
}

/******************* c_distinct.c *******************/

#include "imagery.h"

I_cluster_distinct (C,separation)
    struct Cluster *C;
    double separation;
{
    int class1, class2;
    int distinct;
    double dmin;
    double dsep;
    double I_cluster_separation();

/* compute sum of squares for each class */
    I_cluster_sum2 (C);

/* find closest classes */
    distinct = 1;
    dmin = separation;
    for (class1 = 0; class1 < (C->nclasses-1); class1++)
    {
	if (C->count[class1] < 2) continue;
	for (class2 = class1+1; class2 < C->nclasses; class2++)
	{
	    if (C->count[class2] < 2) continue;
	    dsep = I_cluster_separation (C, class1,class2);

	    if (dsep >= 0.0 && dsep < dmin)
	    {
		distinct = 0;
		C->merge1 = class1;
		C->merge2 = class2;
		dmin = dsep ;
	    }
	}
    }

    return distinct;
}

/******************* c_merge.c *******************/

#include "imagery.h"


I_cluster_merge (C)
    struct Cluster *C;
{
    int band, p;
    int c1, c2;

    c1 = C->merge1;
    c2 = C->merge2;

    for (p = 0; p < C->npoints; p++)
	if (C->class[p] == c2)
	    C->class[p] = c1;
    C->count[c1] += C->count[c2];
    C->count[c2] = 0;
    for (band = 0; band < C->nbands; band++)
    {
	C->sum[band][c1] += C->sum[band][c2];
	C->sum[band][c2] = 0;
    }
}

/******************* c_sig.c *******************/

#include "imagery.h"
I_cluster_signatures (C)
    struct Cluster *C;
{
    int c, p, band1, band2;
    int n;
    double m1,m2;
    double p1,p2;
    double dn;

/*
fprintf (stderr, "c_sig: 1\n");
fprintf (stderr, "  nclasses %d\n", C->nclasses);
fprintf (stderr, "  npoints  %d\n", C->npoints );
fprintf (stderr, "  nbands   %d\n", C->nbands  );
*/
    for (n = 0; n < C->nclasses; n++)
    {
	I_new_signature (&C->S);
    }

    for (p = 0; p < C->npoints; p++)
    {
	c = C->class[p];
	if (c < 0)
	    continue;
/*
if (c >= C->nclasses)
    fprintf (stderr, " class[%d]=%d ** illegal **\n", p, c);
*/
	dn = n = C->count[c];
	if (n < 2) continue ;
	for (band1 = 0; band1 < C->nbands; band1++)
	{
	    m1 = C->sum[band1][c] / dn ;
	    p1 = C->points[band1][p];
	    for (band2 = 0; band2 <= band1; band2++)
	    {
		m2 = C->sum[band2][c] / dn ;
		p2 = C->points[band2][p];
		C->S.sig[c].var[band1][band2] += (p1 - m1) * (p2 - m2);
	    }
	}
    }

    for (c = 0; c < C->nclasses; c++)
    {
	dn = n = C->S.sig[c].npoints = C->count[c];
	if (n == 0) dn = 1.0;
	for (band1 = 0; band1 < C->nbands; band1++)
	    C->S.sig[c].mean[band1] = (double) C->sum[band1][c] / dn;
	dn = n = C->count[c] - 1;
	if (n < 1)
	    continue;
	for (band1 = 0; band1 < C->nbands; band1++)
	    for (band2 = 0; band2 <= band1; band2++)
		C->S.sig[c].var[band1][band2] /= dn ;
	C->S.sig[c].status = 1;
    }
}





More information about the grass-user mailing list