[GRASS-SVN] r39602 - grass/trunk/lib/segment

svn_grass at osgeo.org svn_grass at osgeo.org
Wed Oct 21 11:49:13 EDT 2009


Author: mmetz
Date: 2009-10-21 11:49:13 -0400 (Wed, 21 Oct 2009)
New Revision: 39602

Added:
   grass/trunk/lib/segment/rbtree.c
   grass/trunk/lib/segment/rbtree.h
Modified:
   grass/trunk/lib/segment/address.c
   grass/trunk/lib/segment/flush.c
   grass/trunk/lib/segment/format.c
   grass/trunk/lib/segment/get.c
   grass/trunk/lib/segment/get_row.c
   grass/trunk/lib/segment/init.c
   grass/trunk/lib/segment/pagein.c
   grass/trunk/lib/segment/pageout.c
   grass/trunk/lib/segment/put.c
   grass/trunk/lib/segment/put_row.c
   grass/trunk/lib/segment/release.c
   grass/trunk/lib/segment/seek.c
   grass/trunk/lib/segment/segmentlib.dox
   grass/trunk/lib/segment/setup.c
Log:
faster segment library

Modified: grass/trunk/lib/segment/address.c
===================================================================
--- grass/trunk/lib/segment/address.c	2009-10-21 15:48:12 UTC (rev 39601)
+++ grass/trunk/lib/segment/address.c	2009-10-21 15:49:13 UTC (rev 39602)
@@ -9,12 +9,56 @@
  *
  * \author GRASS GIS Development Team
  *
- * \date 2005-2006
+ * \date 2005-2009
  */
 
 #include <grass/segment.h>
 
+int segment_address_fast(const SEGMENT * SEG, int row, int col, int *n, int *index)
+{
+    if (row) {
+        int seg_r = row >> SEG->srowbits;
+        int seg_c = col >> SEG->scolbits;
+     
+        *n = seg_r * SEG->spr + seg_c;
+        *index = ((row - (seg_r << SEG->srowbits)) << SEG->scolbits) + col - (seg_c << SEG->scolbits);
+    }
+    /* for simple arrays */
+    else {
+        *n = col >> SEG->scolbits;
+        *index = col - ((*n) << SEG->scolbits);
+    }
+    if (SEG->slow_seek == 0)
+        *index = *index << SEG->lenbits;
+    else
+        *index *= SEG->len;
 
+    return 0;
+}
+
+int segment_address_slow(const SEGMENT * SEG, int row, int col, int *n, int *index)
+{
+    if (row) {
+        int seg_r = row / SEG->srows;
+        int seg_c = col / SEG->scols;
+
+        *n = seg_r * SEG->spr + seg_c;
+        *index = (row - seg_r * SEG->srows) * SEG->scols + col - seg_c * SEG->scols;
+    }
+    /* for simple arrays */
+    else {
+        *n = col / SEG->scols;
+        *index = col - *n * SEG->scols;
+    }
+    *index *= SEG->len;
+
+    return 0;
+}
+
+static int (*segment_adrs[2]) () = {
+    segment_address_fast, segment_address_slow
+};
+
 /**
  * \fn int segment_address (SEGMENT *SEG, int row, int col, int *n, int *index)
  *
@@ -30,8 +74,13 @@
 
 int segment_address(const SEGMENT * SEG, int row, int col, int *n, int *index)
 {
+    /* old code
     *n = row / SEG->srows * SEG->spr + col / SEG->scols;
     *index = (row % SEG->srows * SEG->scols + col % SEG->scols) * SEG->len;
+    */
 
-    return 0;
+    /* this function is called at least once every time data are accessed in SEG
+     * avoid very slow modulus and divisions, modulus was the main time killer */
+
+    return (*segment_adrs[SEG->slow_adrs])(SEG, row, col, n, index);
 }

Modified: grass/trunk/lib/segment/flush.c
===================================================================
--- grass/trunk/lib/segment/flush.c	2009-10-21 15:48:12 UTC (rev 39601)
+++ grass/trunk/lib/segment/flush.c	2009-10-21 15:49:13 UTC (rev 39602)
@@ -9,7 +9,7 @@
  *
  * \author GRASS GIS Development Team
  *
- * \date 2005-2006
+ * \date 2005-2009
  */
 
 #include <grass/segment.h>

Modified: grass/trunk/lib/segment/format.c
===================================================================
--- grass/trunk/lib/segment/format.c	2009-10-21 15:48:12 UTC (rev 39601)
+++ grass/trunk/lib/segment/format.c	2009-10-21 15:49:13 UTC (rev 39602)
@@ -9,7 +9,7 @@
  *
  * \author GRASS GIS Development Team
  *
- * \date 2005-2006
+ * \date 2005-2009
  */
 
 #include <grass/config.h>

Modified: grass/trunk/lib/segment/get.c
===================================================================
--- grass/trunk/lib/segment/get.c	2009-10-21 15:48:12 UTC (rev 39601)
+++ grass/trunk/lib/segment/get.c	2009-10-21 15:49:13 UTC (rev 39602)
@@ -9,7 +9,7 @@
  *
  * \author GRASS GIS Development Team
  *
- * \date 2005-2006
+ * \date 2005-2009
  */
 
 #include <string.h>

Modified: grass/trunk/lib/segment/get_row.c
===================================================================
--- grass/trunk/lib/segment/get_row.c	2009-10-21 15:48:12 UTC (rev 39601)
+++ grass/trunk/lib/segment/get_row.c	2009-10-21 15:49:13 UTC (rev 39602)
@@ -9,7 +9,7 @@
  *
  * \author GRASS GIS Development Team
  *
- * \date 2005-2006
+ * \date 2005-2009
  */
 
 #include <stdio.h>

Modified: grass/trunk/lib/segment/init.c
===================================================================
--- grass/trunk/lib/segment/init.c	2009-10-21 15:48:12 UTC (rev 39601)
+++ grass/trunk/lib/segment/init.c	2009-10-21 15:49:13 UTC (rev 39602)
@@ -6,9 +6,10 @@
  *               Bernhard Reiter <bernhard intevation.de>, 
  *               Brad Douglas <rez touchofmadness.com>, 
  *               Glynn Clements <glynn gclements.plus.com>, 
- *               Markus Neteler <neteler itc.it>
+ *               Markus Neteler <neteler itc.it>,
+ *               Markus Metz <markus.metz.giswork googlemail.com>
  * PURPOSE:      Segment initialization routines
- * COPYRIGHT:    (C) 2000-2006 by the GRASS Development Team
+ * COPYRIGHT:    (C) 2000-2009 by the GRASS Development Team
  *
  *               This program is free software under the GNU General Public
  *               License (>=v2). Read the file COPYING that comes with GRASS

Modified: grass/trunk/lib/segment/pagein.c
===================================================================
--- grass/trunk/lib/segment/pagein.c	2009-10-21 15:48:12 UTC (rev 39601)
+++ grass/trunk/lib/segment/pagein.c	2009-10-21 15:49:13 UTC (rev 39602)
@@ -9,7 +9,7 @@
  *
  * \author GRASS GIS Development Team
  *
- * \date 2005-2006
+ * \date 2005-2009
  */
 
 #include <stdio.h>
@@ -17,11 +17,9 @@
 #include <string.h>
 #include <errno.h>
 #include <grass/segment.h>
+#include "rbtree.h"
 
 
-static int segment_select(SEGMENT *, int);
-
-
 /**
  * \fn int segment_pagein (SEGMENT *SEG, int n)
  *
@@ -38,37 +36,60 @@
 
 int segment_pagein(SEGMENT * SEG, int n)
 {
-    int age;
     int cur;
-    int i;
     int read_result;
+    SEGID *seg_found, seg_search;
 
     /* is n the current segment? */
     if (n == SEG->scb[SEG->cur].n)
 	return SEG->cur;
-
+	
     /* search the in memory segments */
-    for (i = 0; i < SEG->nseg; i++)
-	if (n == SEG->scb[i].n)
-	    return segment_select(SEG, i);
+    seg_search.i = 0;
+    seg_search.n = n;
+    seg_found = rbtree_find(SEG->loaded, &seg_search);
+    if (seg_found) {
+	cur = seg_found->i;
 
+	if (SEG->scb[cur].age != SEG->youngest) {
+	    /* splice out */
+	    SEG->scb[cur].age->younger->older = SEG->scb[cur].age->older;
+	    SEG->scb[cur].age->older->younger = SEG->scb[cur].age->younger;
+	    /* splice in */
+	    SEG->scb[cur].age->younger = SEG->youngest->younger;
+	    SEG->scb[cur].age->older = SEG->youngest;
+	    SEG->scb[cur].age->older->younger = SEG->scb[cur].age;
+	    SEG->scb[cur].age->younger->older = SEG->scb[cur].age;
+	    /* make it youngest */
+	    SEG->youngest = SEG->scb[cur].age;
+	}
+	
+	return SEG->cur = cur;
+    }
+    
     /* find a slot to use to hold segment */
-    age = 0;
-    cur = 0;
-    for (i = 0; i < SEG->nseg; i++)
-	if (SEG->scb[i].n < 0) {	/* free slot */
-	    cur = i;
-	    break;
-	}
-	else if (age < SEG->scb[i].age) {	/* find oldest segment */
-	    cur = i;
-	    age = SEG->scb[i].age;
-	}
+    if (SEG->nfreeslots) {  /* any free slots left ? */
+	cur = SEG->freeslot[--SEG->nfreeslots];
+    }
+    else {	/* find oldest segment */
+	SEG->oldest = SEG->oldest->younger;
+	cur = SEG->oldest->cur;
+	SEG->oldest->cur = -1;
+	SEG->scb[cur].age = NULL;
+    }
 
     /* if slot is used, write it out, if dirty */
-    if (SEG->scb[cur].n >= 0 && SEG->scb[cur].dirty)
+    if (SEG->scb[cur].n >= 0 && SEG->scb[cur].dirty) {
 	if (segment_pageout(SEG, cur) < 0)
 	    return -1;
+    }
+	
+    if (SEG->scb[cur].n >= 0) {
+	seg_search.n = SEG->scb[cur].n;
+	if (rbtree_remove(SEG->loaded, &seg_search) == 0)
+	    G_fatal_error("could not remove segment");
+	seg_search.n = n;
+    }
 
     /* read in the segment */
     SEG->scb[cur].n = n;
@@ -92,17 +113,18 @@
 	return -1;
     }
 
-    return segment_select(SEG, cur);
-}
+    if (cur < 0 || n < 0)
+	G_fatal_error("segment not loaded");
 
+    /* remember loaded segment */
+    seg_search.i = cur;
+    if (rbtree_insert(SEG->loaded, &seg_search) == 0)
+	G_fatal_error("could not insert segment");
 
-static int segment_select(SEGMENT * SEG, int n)
-{
-    int i;
-
-    SEG->scb[n].age = 0;
-    for (i = 0; i < SEG->nseg; i++)
-	SEG->scb[i].age++;
-
-    return SEG->cur = n;
+    /* make it youngest segment */
+    SEG->youngest = SEG->youngest->younger;
+    SEG->scb[cur].age = SEG->youngest;
+    SEG->youngest->cur = cur;
+    
+    return SEG->cur = cur;
 }

Modified: grass/trunk/lib/segment/pageout.c
===================================================================
--- grass/trunk/lib/segment/pageout.c	2009-10-21 15:48:12 UTC (rev 39601)
+++ grass/trunk/lib/segment/pageout.c	2009-10-21 15:49:13 UTC (rev 39602)
@@ -9,7 +9,7 @@
  *
  * \author GRASS GIS Development Team
  *
- * \date 2005-2006
+ * \date 2005-2009
  */
 
 #include <stdio.h>

Modified: grass/trunk/lib/segment/put.c
===================================================================
--- grass/trunk/lib/segment/put.c	2009-10-21 15:48:12 UTC (rev 39601)
+++ grass/trunk/lib/segment/put.c	2009-10-21 15:49:13 UTC (rev 39602)
@@ -9,7 +9,7 @@
  *
  * \author GRASS GIS Development Team
  *
- * \date 2005-2006
+ * \date 2005-2009
  */
 
 #include <string.h>
@@ -46,8 +46,10 @@
     int index, n, i;
 
     segment_address(SEG, row, col, &n, &index);
-    if ((i = segment_pagein(SEG, n)) < 0)
-	return -1;
+    if ((i = segment_pagein(SEG, n)) < 0) {
+        G_warning("segment lib: put: pagein failed");
+        return -1;
+    }
 
     SEG->scb[i].dirty = 1;
 

Modified: grass/trunk/lib/segment/put_row.c
===================================================================
--- grass/trunk/lib/segment/put_row.c	2009-10-21 15:48:12 UTC (rev 39601)
+++ grass/trunk/lib/segment/put_row.c	2009-10-21 15:49:13 UTC (rev 39602)
@@ -9,7 +9,7 @@
  *
  * \author GRASS GIS Development Team
  *
- * \date 2005-2006
+ * \date 2005-2009
  */
 
 #include <stdio.h>

Added: grass/trunk/lib/segment/rbtree.c
===================================================================
--- grass/trunk/lib/segment/rbtree.c	                        (rev 0)
+++ grass/trunk/lib/segment/rbtree.c	2009-10-21 15:49:13 UTC (rev 39602)
@@ -0,0 +1,536 @@
+/*!
+ * \file rbtree.c
+ *
+ * \brief binary search tree 
+ *
+ * Generic balanced binary search tree (Red Black Tree) implementation
+ *
+ * (C) 2009 by the GRASS Development Team
+ *
+ * This program is free software under the GNU General Public License
+ * (>=v2).  Read the file COPYING that comes with GRASS for details.
+ *
+ * \author Original author Julienne Walker 2003, 2008
+ *         GRASS implementation Markus Metz, 2009
+ */
+
+/* balanced binary search tree implementation
+ * 
+ * this one is a Red Black Tree, the bare version, no parent pointers, no threads
+ * The core code comes from Julienne Walker's tutorials on binary search trees
+ * original license: public domain
+ * http://eternallyconfuzzled.com/tuts/datastructures/jsw_tut_rbtree.aspx
+ * some ideas come from libavl (GPL >= 2)
+ * I could have used some off-the-shelf solution, but that's boring
+ *
+ * Red Black Trees are used to maintain a data structure with
+ * search, insertion and deletion in O(log N) time
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <grass/gis.h>
+#include <grass/glocale.h>
+#include "rbtree.h"
+
+/* internal functions */
+void rbtree_destroy2(struct RB_NODE *);
+struct RB_NODE *rbtree_single(struct RB_NODE *, int);
+struct RB_NODE *rbtree_double(struct RB_NODE *, int);
+void *rbtree_first(struct RB_TRAV *);
+void *rbtree_next(struct RB_TRAV *);
+struct RB_NODE *rbtree_make_node(size_t, void *);
+int is_red(struct RB_NODE *);
+
+
+/* create new tree and initialize
+ * returns pointer to new tree, NULL for memory allocation error
+ */
+struct RB_TREE *rbtree_create(rb_compare_fn *compare, size_t rb_datasize)
+{
+    struct RB_TREE *tree = G_malloc(sizeof(*tree));
+
+    if (tree == NULL) {
+	G_warning("RB tree: Out of memory!");
+	return NULL;
+    }
+
+    assert(compare);
+
+    tree->datasize = rb_datasize;
+    tree->rb_compare = compare;
+    tree->count = 0;
+    tree->root = NULL;
+
+    return tree;
+} 
+
+/* add an item to a tree
+ * non-recursive top-down insertion
+ * the algorithm does not allow duplicates and also does not warn about a duplicate
+ * returns 1 on success, 0 on failure
+ */
+int rbtree_insert(struct RB_TREE *tree, void *data)
+{
+    assert(tree && data);
+    
+    if (tree->root == NULL) {
+	/* create a new root node for tree */
+	tree->root = rbtree_make_node(tree->datasize, data);
+	if (tree->root == NULL)
+	    return 0;
+    }
+    else {
+	struct RB_NODE head = {0}; /* False tree root */
+
+	struct RB_NODE *g, *t;     /* Grandparent & parent */
+	struct RB_NODE *p, *q;     /* Iterator & parent */
+	int dir = 0, last = 0;
+
+	/* Set up helpers */
+	t = &head;
+	g = p = NULL;
+	q = t->link[1] = tree->root;
+
+	/* Search down the tree */
+	for ( ; ; ) {
+	    if (q == NULL) {
+		/* Insert new node at the bottom */
+		p->link[dir] = q = rbtree_make_node(tree->datasize, data);
+		if (q == NULL)
+		    return 0;
+	    }
+	    else if (is_red(q->link[0]) && is_red(q->link[1])) {
+		/* Color flip */
+		q->red = 1;
+		q->link[0]->red = 0;
+		q->link[1]->red = 0;
+	    }
+
+	    /* Fix red violation */
+	    if (is_red(q) && is_red(p)) {
+		int dir2 = t->link[1] == g;
+
+		if (q == p->link[last])
+		    t->link[dir2] = rbtree_single(g, !last);
+		else
+		    t->link[dir2] = rbtree_double(g, !last);
+	    }
+
+	    last = dir;
+	    dir = tree->rb_compare(q->data, data);
+
+	    /* Stop if found. This check also disallows duplicates in the tree */
+	    if (dir == 0)
+		break;
+
+	    dir = dir < 0;
+
+	    /* Move the helpers down */
+	    if (g != NULL)
+		t = g;
+
+	    g = p, p = q;
+	    q = q->link[dir];
+	}
+
+	/* Update root */
+	tree->root = head.link[1];
+    }
+
+    /* Make root black */
+    tree->root->red = 0;
+
+    tree->count++;
+
+    return 1;
+}
+
+/* remove an item from a tree that matches given data
+ * non-recursive top-down removal
+ * returns 1 on successful removal
+ * returns 0 if data item was not found
+ */
+int rbtree_remove(struct RB_TREE *tree, const void *data)
+{
+    struct RB_NODE head = {0}; /* False tree root */
+    struct RB_NODE *q, *p, *g; /* Helpers */
+    struct RB_NODE *f = NULL;  /* Found item */
+    int dir = 1, removed = 0;
+
+    assert(tree && data);
+
+    if (tree->root == NULL) {
+	return 0; /* empty tree, nothing to remove */
+    }
+
+    /* Set up helpers */
+    q = &head;
+    g = p = NULL;
+    q->link[1] = tree->root;
+
+    /* Search and push a red down */
+    while (q->link[dir] != NULL) {
+	int last = dir;
+
+	/* Update helpers */
+	g = p, p = q;
+	q = q->link[dir];
+	dir = tree->rb_compare(q->data, data);
+
+	/* Save found node */
+	if (dir == 0)
+	    f = q;
+
+	dir = dir < 0;
+
+	/* Push the red node down */
+	if (!is_red(q) && !is_red(q->link[dir])) {
+	    if (is_red(q->link[!dir]))
+		p = p->link[last] = rbtree_single(q, dir);
+	    else if (!is_red(q->link[!dir])) {
+		struct RB_NODE *s = p->link[!last];
+
+		if (s != NULL) {
+		    if (!is_red(s->link[!last]) &&
+		        !is_red(s->link[last])) {
+			/* Color flip */
+			p->red = 0;
+			s->red = 1;
+			q->red = 1;
+		    }
+		    else {
+			int dir2 = g->link[1] == p;
+
+			if (is_red(s->link[last]))
+			    g->link[dir2] = rbtree_double(p, last);
+			else if (is_red(s->link[!last]))
+			    g->link[dir2] = rbtree_single(p, last);
+
+			/* Ensure correct coloring */
+			q->red = g->link[dir2]->red = 1;
+			g->link[dir2]->link[0]->red = 0;
+			g->link[dir2]->link[1]->red = 0;
+		    }
+		}
+	    }
+	}
+    }
+
+    /* Replace and remove if found */
+    if (f != NULL) {
+	G_free(f->data);
+	f->data = q->data;
+	p->link[p->link[1] == q] = q->link[q->link[0] == NULL];
+	G_free(q);
+	tree->count--;
+	removed = 1;
+    }
+    else
+	G_debug(2, "RB tree: data not found in search tree");
+
+    /* Update root and make it black */
+    tree->root = head.link[1];
+    if ( tree->root != NULL)
+	tree->root->red = 0;
+
+    return removed;
+}
+
+/* find data item in tree
+ * returns pointer to data item if found else NULL
+ */
+void *rbtree_find(struct RB_TREE *tree, const void *data)
+{
+    struct RB_NODE *curr_node = tree->root;
+    int cmp = 0;
+
+    assert(tree && data);
+
+    while (curr_node != NULL) {
+	cmp = tree->rb_compare(curr_node->data, data);
+	if (cmp == 0)
+	    return curr_node->data;   /* found */
+	else {
+	    curr_node = curr_node->link[cmp < 0];
+	}
+    }
+    return NULL;
+}
+
+/* initialize tree traversal
+ * (re-)sets trav structure
+ * returns 0
+ */
+int rbtree_init_trav(struct RB_TRAV *trav, struct RB_TREE *tree)
+{
+    assert(trav && tree);
+
+    trav->tree = tree;
+    trav->curr_node = tree->root;
+    trav->first = 1;
+    trav->top = 0;
+
+    return 0;
+}
+
+/* traverse the tree in ascending order
+ * useful to get all items in the tree non-recursively
+ * struct RB_TRAV *trav needs to be initialized first
+ * returns pointer to data, NULL when finished
+ */
+void *rbtree_traverse(struct RB_TRAV *trav)
+{
+    assert(trav);
+    
+    if (trav->curr_node == NULL) {
+	if (trav->first)
+	    G_debug(1, "RB tree: empty tree");
+	else
+	    G_debug(1, "RB tree: finished traversing");
+
+	return NULL;
+    }
+	
+    if (!trav->first)
+	return rbtree_next(trav);
+    else {
+	trav->first = 0;
+	return rbtree_first(trav);
+    }
+}
+
+/* find start point to traverse the tree in ascending order
+ * useful to get a selection of items in the tree
+ * magnitudes faster than traversing the whole tree
+ * may return first item that's smaller or first item that's larger
+ * struct RB_TRAV *trav needs to be initialized first
+ * returns pointer to data, NULL when finished
+ */
+void *rbtree_traverse_start(struct RB_TRAV *trav, const void *data)
+{
+    int dir = 0;
+
+    assert(trav && data);
+
+    if (trav->curr_node == NULL) {
+	if (trav->first)
+	    G_warning("RB tree: empty tree");
+	else
+	    G_warning("RB tree: finished traversing");
+
+	return NULL;
+    }
+	
+    if (!trav->first)
+	return rbtree_next(trav);
+
+    /* else first time, get start node */
+
+    trav->first = 0;
+    trav->top = 0;
+
+    while (trav->curr_node != NULL) {
+	dir = trav->tree->rb_compare(trav->curr_node->data, data);
+	/* exact match, great! */
+	if (dir == 0)
+	    return trav->curr_node->data;
+	else {
+	    dir = dir < 0;
+	    /* end of branch, also reached if
+	     * smallest item is larger than search template or
+	     * largest item is smaller than search template */
+	    if (trav->curr_node->link[dir] == NULL)
+		return trav->curr_node->data;
+		
+	    trav->up[trav->top++] = trav->curr_node;
+	    trav->curr_node = trav->curr_node->link[dir];
+	}
+    }
+
+    return NULL; /* should not happen */
+}
+
+/* two functions needed to fully traverse the tree: initialize and continue
+ * useful to get all items in the tree non-recursively
+ * this one here uses a stack
+ * parent pointers or threads would also be possible
+ * but these would need to be added to RB_NODE
+ * -> more memory needed for standard operations
+ */
+
+/* start traversing the tree
+ * returns pointer to smallest data item
+ */
+void *rbtree_first(struct RB_TRAV *trav)
+{
+    /* get smallest item */
+    while (trav->curr_node->link[0] != NULL) {
+	trav->up[trav->top++] = trav->curr_node;
+	trav->curr_node = trav->curr_node->link[0];
+    }
+
+    return trav->curr_node->data; /* return smallest item */
+}
+
+/* continue traversing the tree in ascending order
+ * returns pointer to data item, NULL when finished
+ */
+void *rbtree_next(struct RB_TRAV *trav)
+{
+    if (trav->curr_node->link[1] != NULL) {
+	/* something on the right side: larger item */
+	trav->up[trav->top++] = trav->curr_node;
+	trav->curr_node = trav->curr_node->link[1];
+
+	/* go down, find smallest item in this branch */
+	while (trav->curr_node->link[0] != NULL) {
+	    trav->up[trav->top++] = trav->curr_node;
+	    trav->curr_node = trav->curr_node->link[0];
+	}
+    }
+    else {
+	/* at smallest item in this branch, go back up */
+	struct RB_NODE *last;
+	do {
+	    if (trav->top == 0) {
+		trav->curr_node = NULL;
+		break;
+	    }
+	    last = trav->curr_node;
+	    trav->curr_node = trav->up[--trav->top];
+	} while (last == trav->curr_node->link[1]);
+    }
+
+    if (trav->curr_node != NULL) {
+	return trav->curr_node->data;
+    }
+    else	
+	return NULL; /* finished traversing */
+}
+
+/* destroy the tree */
+void rbtree_destroy(struct RB_TREE *tree) {
+    rbtree_destroy2(tree->root);
+    G_free(tree);
+}
+
+void rbtree_destroy2(struct RB_NODE *root)
+{
+    if (root != NULL) {
+	rbtree_destroy2(root->link[0]);
+	rbtree_destroy2(root->link[1]);
+	G_free(root->data);
+	G_free(root);
+    }
+}
+
+/* used for debugging: check for errors in tree structure */
+int rbtree_debug(struct RB_TREE *tree, struct RB_NODE *root)
+{
+    int lh, rh;
+ 
+    if (root == NULL)
+	return 1;
+    else {
+	struct RB_NODE *ln = root->link[0];
+	struct RB_NODE *rn = root->link[1];
+	int lcmp = 0, rcmp = 0;
+
+	/* Consecutive red links */
+	if (is_red(root)) {
+	    if (is_red(ln) || is_red(rn)) {
+		G_warning("Red Black Tree debugging: Red violation");
+		return 0;
+	    }
+	}
+
+	lh = rbtree_debug(tree, ln);
+	rh = rbtree_debug(tree, rn);
+
+	if (ln) {
+	    lcmp = tree->rb_compare(ln->data, root->data);
+	}
+	
+	if (rn) {
+	    rcmp = tree->rb_compare(rn->data, root->data);
+	}
+
+	/* Invalid binary search tree:
+	 * left node >= parent or right node <= parent */
+	if ((ln != NULL && lcmp > -1)
+	 || (rn != NULL && rcmp < 1)) {
+	    G_warning("Red Black Tree debugging: Binary tree violation" );
+	    return 0;
+	}
+
+	/* Black height mismatch */
+	if (lh != 0 && rh != 0 && lh != rh) {
+	    G_warning("Red Black Tree debugging: Black violation");
+	    return 0;
+	}
+
+	/* Only count black links */
+	if (lh != 0 && rh != 0)
+	    return is_red(root) ? lh : lh + 1;
+	else
+	    return 0;
+    }
+}
+
+/*******************************************************
+ *                                                     *
+ *  internal functions for Red Black Tree maintenance  *
+ *                                                     *
+ *******************************************************/
+
+/* add a new node to the tree */
+struct RB_NODE *rbtree_make_node(size_t datasize, void *data)
+{
+    struct RB_NODE *new_node = G_malloc(sizeof(*new_node));
+
+    if (new_node == NULL)
+	G_fatal_error("RB Search Tree: Out of memory!");
+
+    new_node->data = G_malloc(datasize);
+    if (new_node->data == NULL)
+	G_fatal_error("RB Search Tree: Out of memory!");
+	
+    memcpy(new_node->data, data, datasize);
+    new_node->red = 1;            /* 1 is red, 0 is black */
+    new_node->link[0] = NULL;
+    new_node->link[1] = NULL;
+
+    return new_node;
+}
+
+/* check for red violation */
+int is_red(struct RB_NODE *root)
+{
+    if (root)
+	return root->red == 1;
+
+    return 0;
+}
+
+/* single rotation */
+struct RB_NODE *rbtree_single(struct RB_NODE *root, int dir)
+{
+    struct RB_NODE *newroot = root->link[!dir];
+
+    root->link[!dir] = newroot->link[dir];
+    newroot->link[dir] = root;
+
+    root->red = 1;
+    newroot->red = 0;
+
+    return newroot;
+}
+ 
+/* double rotation */
+struct RB_NODE *rbtree_double(struct RB_NODE *root, int dir)
+{
+    root->link[!dir] = rbtree_single(root->link[!dir], !dir);
+    return rbtree_single(root, dir);
+}

Added: grass/trunk/lib/segment/rbtree.h
===================================================================
--- grass/trunk/lib/segment/rbtree.h	                        (rev 0)
+++ grass/trunk/lib/segment/rbtree.h	2009-10-21 15:49:13 UTC (rev 39602)
@@ -0,0 +1,112 @@
+/*************************************************************
+ *                          USAGE                            *
+ *************************************************************
+ *
+ * NOTE: duplicates are not supported
+ *
+ * custom compare function
+ * extern int my_compare_fn(const void *, const void *);
+ * int my_compare_fn(const void *a, const void *b) {
+ *   if ((mydatastruct *) a < (mydatastruct *) b)
+ *     return -1;
+ *   else if ((mydatastruct *) a > (mydatastruct *) b)
+ *     return 1;
+ *   else if ((mydatastruct *) a == (mydatastruct *) b)
+ *     return 0;
+ * }
+ * 
+ * create and initialize tree:
+ * struct RB_TREE *mytree = rbtree_create(my_compare_fn, item_size);
+ *
+ * insert items to tree:
+ * struct mydatastruct data = <some data>;
+ * if (rbtree_insert(mytree, &data) == 0)
+ * 	 G_warning("could not insert data");
+ *
+ * find item in tree:
+ * struct mydatastruct data = <some data>;
+ * if (rbtree_find(mytree, &data) == 0)
+ * 	 G_message("data not found");
+ *
+ * delete item from tree:
+ * struct mydatastruct data = <some data>;
+ * if (rbtree_remove(mytree, &data) == 0)
+ * 	  G_warning("could not find data in tree");
+ *
+ * traverse tree (get all items in tree in ascending order):
+ * struct RB_TRAV trav;
+ * rbtree_init_trav(&trav, tree);
+ * while ((data = rbtree_traverse(&trav)) != NULL) {
+ *   if (my_compare_fn(data, threshold_data) == 0) break;
+ * 	   <do something with data>;
+ *  }
+ *
+ * get a selection of items: all data > data1 and < data2
+ * start in tree where data is last smaller or first larger compared to data1
+ * struct RB_TRAV trav;
+ * rbtree_init_trav(&trav, tree);
+ * data = rbtree_traverse_start(&trav, &data1);
+ * 	 <do something with data>;
+ * while ((data = rbtree_traverse(&trav)) != NULL) {
+ *	 if (data > data2) break;
+ *   <do something with data>;
+ * }
+ *
+ * destroy tree:
+ * rbtree_destroy(mytree);
+ *
+ * debug the whole tree with
+ * rbtree_debug(mytree, mytree->root);
+ * 
+ *************************************************************/
+
+#include <stddef.h>
+
+/* maximum RB Tree height */
+#define RBTREE_MAX_HEIGHT 64        /* should be more than enough */
+
+/* routine to compare data items
+ * return -1 if rb_a < rb_b
+ * return  0 if rb_a == rb_b
+ * return  1 if rb_a > rb_b
+ */
+typedef int rb_compare_fn(const void *rb_a, const void *rb_b);
+
+struct RB_NODE
+{
+    unsigned char red;              /* 0 = black, 1 = red */
+    void *data;                     /* any kind of data */
+    struct RB_NODE *link[2];        /* link to children: link[0] for smaller, link[1] for larger */
+};
+ 
+struct RB_TREE
+{
+    struct RB_NODE *root;           /* root node */
+    size_t datasize;                /* item size */
+    size_t count;                   /* number of items in tree. */
+    rb_compare_fn *rb_compare;      /* function to compare data */
+};
+
+struct RB_TRAV
+{
+    struct RB_TREE *tree;           /* tree being traversed */
+    struct RB_NODE *curr_node;      /* current node */
+    struct RB_NODE *up[RBTREE_MAX_HEIGHT];  /* stack of parent nodes */
+    int top;                        /* index for stack */
+    int first;                      /* little helper flag */
+};
+
+/* tree functions */
+struct RB_TREE *rbtree_create(rb_compare_fn *, size_t);
+void rbtree_destroy(struct RB_TREE *);
+int rbtree_insert(struct RB_TREE *, void *);
+int rbtree_remove(struct RB_TREE *, const void *);
+void *rbtree_find(struct RB_TREE *, const void *);
+
+/* tree traversal functions */
+int rbtree_init_trav(struct RB_TRAV *, struct RB_TREE *);
+void* rbtree_traverse(struct RB_TRAV *);
+void *rbtree_traverse_start(struct RB_TRAV *, const void *);
+
+/* debug tree from given node downwards */
+int rbtree_debug(struct RB_TREE *, struct RB_NODE *);

Modified: grass/trunk/lib/segment/release.c
===================================================================
--- grass/trunk/lib/segment/release.c	2009-10-21 15:48:12 UTC (rev 39601)
+++ grass/trunk/lib/segment/release.c	2009-10-21 15:49:13 UTC (rev 39602)
@@ -9,7 +9,7 @@
  *
  * \author GRASS GIS Development Team
  *
- * \date 2005-2006
+ * \date 2005-2009
  */
 
 #include <stdlib.h>

Modified: grass/trunk/lib/segment/seek.c
===================================================================
--- grass/trunk/lib/segment/seek.c	2009-10-21 15:48:12 UTC (rev 39601)
+++ grass/trunk/lib/segment/seek.c	2009-10-21 15:49:13 UTC (rev 39602)
@@ -9,15 +9,15 @@
  *
  * \author GRASS GIS Development Team
  *
- * \date 2005-2006
+ * \date 2005-2009
  */
 
+#include <grass/config.h>
 #include <stdio.h>
 #include <sys/types.h>
 #include <unistd.h>
 #include <string.h>
 #include <errno.h>
-#include <grass/config.h>
 #include <grass/gis.h>
 #include <grass/segment.h>
 
@@ -34,11 +34,23 @@
  * \return -1 if unable to seek
  */
 
-int segment_seek(const SEGMENT * SEG, int n, int index)
+int segment_seek_fast(const SEGMENT * SEG, int n, int index)
 {
+    off_t offset = (((off_t) n) << SEG->sizebits) + index + SEG->offset;
+
+    if (lseek(SEG->fd, offset, SEEK_SET) == (off_t) - 1) {
+	G_warning("segment_seek: %s", strerror(errno));
+	return -1;
+    }
+
+    return 0;
+}
+
+int segment_seek_slow(const SEGMENT * SEG, int n, int index)
+{
     off_t offset;
 
-    offset = (off_t) n *SEG->size + index + SEG->offset;
+    offset = (off_t) n * SEG->size + index + SEG->offset;
 
     if (lseek(SEG->fd, offset, SEEK_SET) == (off_t) - 1) {
 	G_warning("segment_seek: %s", strerror(errno));
@@ -47,3 +59,12 @@
 
     return 0;
 }
+
+static int (*segment_seek_mode[2]) () = {
+    segment_seek_fast, segment_seek_slow
+};
+
+int segment_seek(const SEGMENT * SEG, int n, int index)
+{
+    return (*segment_seek_mode[SEG->slow_seek]) (SEG, n, index);
+}

Modified: grass/trunk/lib/segment/segmentlib.dox
===================================================================
--- grass/trunk/lib/segment/segmentlib.dox	2009-10-21 15:48:12 UTC (rev 39601)
+++ grass/trunk/lib/segment/segmentlib.dox	2009-10-21 15:49:13 UTC (rev 39602)
@@ -37,7 +37,7 @@
 code and requires no significant amount of memory to hold the data. But the 
 i/o involved will certainly degrade performance. Method (3) is a mixture of 
 (1) and (2) . Memory requirements are fixed and data is read from the data 
-file only when not already in memory. Howev er the programming is more 
+file only when not already in memory. However the programming is more 
 complex.
 
 <P>
@@ -87,11 +87,13 @@
 
 
 <P>
-int segment_format (int fd, int nrows, int ncols, int srows, int scols,
-  int len) format a segment fileThe segmentation routines require a disk file
-  to be used for paging segments in and out of memory. This routine formats the
-  file open for write on file descriptor <B>fd</B> for use as a segment file.
-  A segment file must be formatted before it can be processed by other segment
+<I>int segment_format (int fd, int nrows, int ncols, int srows, int scols,
+  int len)</I>, format a segment file
+<P>
+  The segmentation routines require a disk file to be used for paging
+  segments in and out of memory. This routine formats the file open for
+  write on file descriptor <B>fd</B> for use as a segment file. A segment
+  file must be formatted before it can be processed by other segment
   routines. The configuration parameters <B>nrows, ncols, srows, scols</B>,
   and <B>len</B> are written to the beginning of the segment file which is
   then filled with zeros.
@@ -112,8 +114,10 @@
 segment file formatted by <I>segment_format.</I>
 
 <P>
-int segment_init (SEGMENT *seg, int fd, int nsegs) initialize segment
-  structureInitializes the <B>seg</B> structure. The file on <B>fd</B> is
+<I>int segment_init (SEGMENT *seg, int fd, int nsegs)</I>, initialize segment
+  structure
+<P>
+  Initializes the <B>seg</B> structure. The file on <B>fd</B> is
   a segment file created by <I>segment_format</I> and must be open for
   reading and writing. The segment file configuration parameters <I>nrows,
     ncols, srows, scols</I>, and <I>len</I>, as written to the file by
@@ -132,8 +136,10 @@
 Then data can be written from another file to the segment file row by row:
 
 <P>
-int segment_put_row (SEGMENT *seg, char *buf, int row) write row to
-  segment fileTransfers nonsegmented matrix data, row by row, into a segment
+<I>int segment_put_row (SEGMENT *seg, char *buf, int row)</I>, write row to
+  segment file
+<P>
+  Transfers nonsegmented matrix data, row by row, into a segment
   file.  <B>Seg</B> is the segment structure that was configured from a call
   to <I>segment_init.</I> <B>Buf</B> should contain <I>ncols*len</I>
   bytes of data to be transferred to the segment file. <B>Row</B> specifies
@@ -146,8 +152,10 @@
 Then data can be read or written to the segment file randomly:
 
 <P>
-int segment_get (SEGMENT *seg, char *value, int row, int col) get value
-  from segment fileProvides random read access to the segmented data. It gets
+<I>int segment_get (SEGMENT *seg, char *value, int row, int col)</I>, get value
+  from segment file
+<P>
+  Provides random read access to the segmented data. It gets
   <I>len</I> bytes of data into <B>value</B> from the segment file
   <B>seg</B> for the corresponding <B>row</B> and <B>col</B> in the
   original data matrix.
@@ -156,8 +164,10 @@
 Return codes are:  1 if ok;  else -1 could not seek or read segment file.
 
 <P>
-int segment_put (SEGMENT *seg, char *value, int row, int col) put
-  value to segment fileProvides random write access to the segmented data. It
+<I>int segment_put (SEGMENT *seg, char *value, int row, int col)</I>, put
+  value to segment file
+<P>
+  Provides random write access to the segmented data. It
   copies <I>len</I> bytes of data from <B>value</B> into the segment
   structure <B>seg</B> for the corresponding <B>row</B> and <B>col</B> in
   the original data matrix.
@@ -169,26 +179,27 @@
 <P>
 Return codes are: 1 if ok; else -1 could not seek or write segment file.
 
-
-
 <P>
 After random reading and writing is finished, the pending updates must be 
 flushed to disk:
 
 <P>
-int segment_flush (SEGMENT *seg) flush pending updates to diskForces
-  all pending updates generated by <I>segment_put()</I> to be written to the
-  segment file <B>seg.</B> Must be called after the final segment_put() to
-  force all pending updates to disk. Must also be called before the first call
-  to <I>segment_get_row.</I>
+<I>int segment_flush (SEGMENT *seg)</I>, flush pending updates to disk
+<P>
+  Forces all pending updates generated by <I>segment_put()</I> to be
+  written to the segment file <B>seg.</B> Must be called after the final
+  segment_put() to force all pending updates to disk. Must also be called
+  before the first call to <I>segment_get_row.</I>
 
 <P>
 Now the data in segment file can be read row by row and transferred to a normal
 sequential data file:
 
 <P>
-int segment_get_row (SEGMENT *seg, char *buf, int row) read row from
-  segment fileTransfers data from a segment file, row by row, into memory
+<I>int segment_get_row (SEGMENT *seg, char *buf, int row)</I>, read row from
+  segment file
+<P>
+  Transfers data from a segment file, row by row, into memory
   (which can then be written to a regular matrix file) . <B>Seg</B> is the
   segment structure that was configured from a call to <I>segment_init.</I>
   <B>Buf</B> will be filled with <I>ncols*len</I> bytes of data
@@ -201,10 +212,11 @@
 Finally, memory allocated in the SEGMENT structure is freed:
 
 <P>
-int segment_release (SEGMENT *seg) free allocated memoryReleases the
-  allocated memory associated with the segment file <B>seg.</B> Does not close
-  the file. Does not flush the data which may be pending from previous
-  <I>segment_put()</I> calls.
+<I>int segment_release (SEGMENT *seg)</I>, free allocated memory
+<P>
+  Releases the allocated memory associated with the segment file
+  <B>seg.</B> Does not close the file. Does not flush the data which may
+  be pending from previous <I>segment_put()</I> calls.
 
 <P>
 
@@ -328,10 +340,24 @@
 
 <P>
 
+\section Segment_Library_Performance Segment Library Performance
+
+Performance of the <I>Segment Library</I> routines can be improved by
+about 10% if <B>srows, scols</B> are each powers of 2; in this case a
+faster alternative is used to access the segment file. An additional
+improvement can be achieved if <B>len</B> is also a power of 2. For
+highly random and scattered access to a large dataset, smaller segments,
+i.e. values for <B>srows, scols</B> of 32, 64, or 128 seem to provide
+better performance than e.g. srows = nrows / 4 + 1.
+
 \section Loading_the_Segment_Library Loading the Segment Library
 
 <P>
-The library is loaded by specifying $(SEGMENTLIB) in the Makefile.
+The library is loaded by specifying
+\verbatim
+$(SEGMENTLIB)
+\endverbatim
+in the Makefile.
 
 <P>
 See \ref Compiling_and_Installing_GRASS_Modules for a complete 

Modified: grass/trunk/lib/segment/setup.c
===================================================================
--- grass/trunk/lib/segment/setup.c	2009-10-21 15:48:12 UTC (rev 39601)
+++ grass/trunk/lib/segment/setup.c	2009-10-21 15:49:13 UTC (rev 39602)
@@ -9,15 +9,18 @@
  *
  * \author GRASS GIS Development Team
  *
- * \date 2005-2006
+ * \date 2005-2009
  */
 
 #include <stdlib.h>
 #include <stdio.h>
+#include <math.h>
 #include <grass/gis.h>
 #include <grass/segment.h>
 
+#include "rbtree.h"
 
+
 /**
  * \fn int segment_setup (SEGMENT *SEG)
  *
@@ -41,7 +44,7 @@
     if (SEG->nrows <= 0 || SEG->ncols <= 0
 	|| SEG->srows <= 0 || SEG->scols <= 0
 	|| SEG->len <= 0 || SEG->nseg <= 0) {
-	G_warning("segment_setup: illegal segment file parameters\n");
+	G_warning("segment_setup: illegal segment file parameters");
 	return -1;
     }
 
@@ -53,23 +56,93 @@
     if (SEG->spill)
 	SEG->spr++;
 
+    /* fast address */
+    SEG->slow_adrs = 1;
+    if (SEG->scols - pow(2, (log(SEG->scols) / log(2))) == 0) {
+	if (SEG->srows - pow(2, (log(SEG->srows) / log(2))) == 0) {
+	    SEG->scolbits = log(SEG->scols) / log(2);
+	    SEG->srowbits = log(SEG->srows) / log(2);
+	    SEG->segbits = SEG->srowbits + SEG->scolbits;
+	    SEG->slow_adrs = 0;
+	    G_debug(1, "segment lib: fast address activated");
+	}
+    }
+    /* fast seek */
+    SEG->slow_seek = 1;
+    if (SEG->slow_adrs == 0) {
+	if (SEG->len - pow(2, (log(SEG->len) / log(2))) == 0) {
+	    SEG->lenbits = log(SEG->len) / log(2);
+	    SEG->sizebits = SEG->segbits + SEG->lenbits;
+	    SEG->slow_seek = 0;
+	    G_debug(1, "segment lib: fast seek activated");
+	}
+    }
+
+    /* adjust number of open segments if larger than number of total segments */
+    if (SEG->nseg > SEG->spr * ((SEG->nrows + SEG->srows - 1) / SEG->srows)) {
+	G_warning("segment: reducing number of open segments from %d to %d",
+		   SEG->nseg, SEG->spr * ((SEG->nrows + SEG->srows - 1) / SEG->srows));
+	SEG->nseg = SEG->spr * ((SEG->nrows + SEG->srows - 1) / SEG->srows);
+    }
+
     if ((SEG->scb =
 	 (struct SEGMENT_SCB *)G_malloc(SEG->nseg *
 					sizeof(struct SEGMENT_SCB))) == NULL)
 	return -2;
 
-    SEG->size = SEG->srows * SEG->scols * SEG->len;
+    if ((SEG->freeslot = (int *)G_malloc(SEG->nseg * sizeof(int))) == NULL)
+	return -2;
 
+    if ((SEG->agequeue = (struct aq *)G_malloc((SEG->nseg + 1) * sizeof(struct aq))) == NULL)
+	return -2;
+
+    SEG->srowscols = SEG->srows * SEG->scols;
+    SEG->size = SEG->srowscols * SEG->len;
+    
     for (i = 0; i < SEG->nseg; i++) {
 	if ((SEG->scb[i].buf = G_malloc(SEG->size)) == NULL)
 	    return -2;
 
 	SEG->scb[i].n = -1;	/* mark free */
 	SEG->scb[i].dirty = 0;
-	SEG->scb[i].age = 0;
+	SEG->scb[i].age = NULL;
+	SEG->freeslot[i] = i;
+	SEG->agequeue[i].cur = -1;
+	if (i > 0) {
+	    SEG->agequeue[i].younger = &(SEG->agequeue[i - 1]);
+	    SEG->agequeue[i].older = &(SEG->agequeue[i + 1]);
+	}
+	else if (i == 0) {
+	    SEG->agequeue[i].younger = &(SEG->agequeue[SEG->nseg]);
+	    SEG->agequeue[i].older = &(SEG->agequeue[i + 1]);
+	}
     }
+    
+    SEG->agequeue[SEG->nseg].cur = -1;
+    SEG->agequeue[SEG->nseg].younger = &(SEG->agequeue[SEG->nseg - 1]);
+    SEG->agequeue[SEG->nseg].older = &(SEG->agequeue[0]);
+    SEG->youngest = SEG->oldest = &(SEG->agequeue[SEG->nseg]);
+    
+    SEG->nfreeslots = SEG->nseg;
     SEG->cur = 0;
     SEG->open = 1;
+    
+    SEG->loaded = rbtree_create(segment_compare, sizeof(SEGID));
 
     return 1;
 }
+
+int segment_compare(const void *sega, const void *segb)
+{
+    SEGID *a, *b;
+    
+    a = (SEGID *)sega;
+    b = (SEGID *)segb;
+    
+    if (a->n > b->n)
+	return 1;
+    else if (a->n < b->n)
+	return -1;
+    
+    return 0;
+}



More information about the grass-commit mailing list