[mapserver-commits] r7528 - trunk/mapserver

svn at osgeo.org svn at osgeo.org
Fri Apr 25 17:32:01 EDT 2008


Author: pramsey
Date: 2008-04-25 17:32:01 -0400 (Fri, 25 Apr 2008)
New Revision: 7528

Modified:
   trunk/mapserver/HISTORY.TXT
   trunk/mapserver/mapbits.c
   trunk/mapserver/mapserver.h
   trunk/mapserver/mapshape.c
   trunk/mapserver/mapshape.h
   trunk/mapserver/maptree.c
Log:
Large shape file performance improvements.  Lazy-load SHX files.  Faster scanning of the status bitmap. (#2282)


Modified: trunk/mapserver/HISTORY.TXT
===================================================================
--- trunk/mapserver/HISTORY.TXT	2008-04-25 21:05:12 UTC (rev 7527)
+++ trunk/mapserver/HISTORY.TXT	2008-04-25 21:32:01 UTC (rev 7528)
@@ -13,6 +13,8 @@
 Current Version (5.1-dev, SVN trunk):
 -------------------------------------
 
+- Improve performance for large shape files (#2282)
+
 - encode WMS parameters correctly (#1296)
 
 - Added alignment option within a scalebar (#2468)

Modified: trunk/mapserver/mapbits.c
===================================================================
--- trunk/mapserver/mapbits.c	2008-04-25 21:05:12 UTC (rev 7527)
+++ trunk/mapserver/mapbits.c	2008-04-25 21:32:01 UTC (rev 7528)
@@ -1,5 +1,5 @@
 /******************************************************************************
- * $Id:$
+ * $Id$
  *
  * Project:  MapServer
  * Purpose:  Implementation of bit array functions.
@@ -53,6 +53,47 @@
   return (*array & (1 << (index % CHAR_BIT))) != 0;    /* 0 or 1 */
 }
 
+/*
+** msGetNextBit( status, start, size)
+**
+** Quickly find the next bit set. If start == 0 and 0 is set, will return 0.
+** If hits end of bitmap without finding set bit, will return -1.
+**
+*/
+int msGetNextBit(char *array, int index, int size) { 
+  char *ptr;
+  ptr = array;
+  int i = 0;
+  
+  ptr += index / CHAR_BIT;
+  
+  /* Check the starting byte for set bits, if necessary. */
+  if(*ptr & (0xff << (index % CHAR_BIT))) {
+    /* a bit in this byte is set, figure out which one */
+    for( i = index; i < index + CHAR_BIT - (index % CHAR_BIT); i++ ) {
+      if ( msGetBit( array, i ) )
+        return i;
+    }
+  }
+
+  /* scroll forwards bytewise to the next byte with a bit set */
+  do {
+    ptr++;
+  } while( ((CHAR_BIT * (ptr - array)) < size) && *ptr == 0 ) ;
+
+  /* check the first non-zero byte for the location of the set bit */
+  if( *ptr ) {
+    /* a bit in this byte is set, figure out which one */
+    for( i = CHAR_BIT * (ptr - array); i < CHAR_BIT * (ptr - array) + CHAR_BIT; i++ ) {
+      if ( msGetBit( array, i ) )
+        return i;
+    }
+  }
+  
+  /* got to the last byte with no hits! */
+  return -1;
+}
+
 void msSetBit(char *array, int index, int value)
 {
   array += index / CHAR_BIT;

Modified: trunk/mapserver/mapserver.h
===================================================================
--- trunk/mapserver/mapserver.h	2008-04-25 21:05:12 UTC (rev 7527)
+++ trunk/mapserver/mapserver.h	2008-04-25 21:32:01 UTC (rev 7528)
@@ -1722,6 +1722,7 @@
 MS_DLL_EXPORT int msGetBit(char *array, int index);
 MS_DLL_EXPORT void msSetBit(char *array, int index, int value);
 MS_DLL_EXPORT void msFlipBit(char *array, int index);
+MS_DLL_EXPORT int msGetNextBit(char *array, int index, int size);
 
 MS_DLL_EXPORT int msLayerInitItemInfo(layerObj *layer);
 MS_DLL_EXPORT void msLayerFreeItemInfo(layerObj *layer); 

Modified: trunk/mapserver/mapshape.c
===================================================================
--- trunk/mapserver/mapshape.c	2008-04-25 21:05:12 UTC (rev 7527)
+++ trunk/mapserver/mapshape.c	2008-04-25 21:32:01 UTC (rev 7528)
@@ -339,17 +339,23 @@
   /* -------------------------------------------------------------------- */
   psSHP->nMaxRecords = psSHP->nRecords;
   
+  /* Our in-memory cache of offset information */
   psSHP->panRecOffset = (int *) malloc(sizeof(int) * psSHP->nMaxRecords );
+  /* Our in-memory cache of size information */
   psSHP->panRecSize = (int *) malloc(sizeof(int) * psSHP->nMaxRecords );
+  /* The completeness information for our in-memory cache */
+  psSHP->panRecLoaded = msAllocBitArray( 1 + (psSHP->nMaxRecords / SHX_BUFFER_PAGE) ) ;
+  /* Is our in-memory cache completely populated? */
+  psSHP->panRecAllLoaded = 0; 
   
-  pabyBuf = (uchar *) malloc(8 * psSHP->nRecords );
+  /* malloc failed? clean up and shut down */  
   if (psSHP->panRecOffset == NULL ||
       psSHP->panRecSize == NULL ||
-      pabyBuf == NULL)
+      psSHP->panRecLoaded == NULL)
   {
     free(psSHP->panRecOffset);
-    free(psSHP->panRecOffset);
-    free( pabyBuf );
+    free(psSHP->panRecSize);
+    free(psSHP->panRecLoaded);
     fclose( psSHP->fpSHP );
     fclose( psSHP->fpSHX );
     free( psSHP );
@@ -357,22 +363,7 @@
     return( NULL );
   }
 
-  fread( pabyBuf, 8, psSHP->nRecords, psSHP->fpSHX );
   
-  for( i = 0; i < psSHP->nRecords; i++ ) {
-    ms_int32 nOffset, nLength;
-    
-    memcpy( &nOffset, pabyBuf + i * 8, 4 );
-    if( !bBigEndian ) SwapWord( 4, &nOffset );
-    
-    memcpy( &nLength, pabyBuf + i * 8 + 4, 4 );
-    if( !bBigEndian ) SwapWord( 4, &nLength );
-    
-    psSHP->panRecOffset[i] = nOffset*2;
-    psSHP->panRecSize[i] = nLength*2;
-  }
-  free( pabyBuf );
-  
   return( psSHP );
 }
 
@@ -394,7 +385,9 @@
   /* -------------------------------------------------------------------- */
   free( psSHP->panRecOffset );
   free( psSHP->panRecSize );
+  free( psSHP->panRecLoaded );
   
+  
   if(psSHP->pabyRec) free(psSHP->pabyRec);
   if(psSHP->panParts) free(psSHP->panParts);
 
@@ -571,6 +564,9 @@
 
   psSHP->bUpdated = MS_TRUE;
 
+  /* Fill the SHX buffer if it is not already full. */
+  if( ! psSHP->panRecAllLoaded ) msSHXLoadAll( psSHP );
+
   /* -------------------------------------------------------------------- */
   /*      Add the new entity to the in memory index.                      */
   /* -------------------------------------------------------------------- */
@@ -660,6 +656,9 @@
 #endif
   psSHP->bUpdated = MS_TRUE;
   
+  /* Fill the SHX buffer if it is not already full. */
+  if( ! psSHP->panRecAllLoaded ) msSHXLoadAll( psSHP );
+  
   /* -------------------------------------------------------------------- */
   /*      Add the new entity to the in memory index.                      */
   /* -------------------------------------------------------------------- */
@@ -962,7 +961,8 @@
  */
 static int msSHPReadAllocateBuffer( SHPHandle psSHP, int hEntity, const char* pszCallingFunction)
 {
-  int nEntitySize = psSHP->panRecSize[hEntity]+8;
+
+  int nEntitySize = msSHXReadSize(psSHP, hEntity) + 8;
   /* -------------------------------------------------------------------- */
   /*      Ensure our record buffer is large enough.                       */
   /* -------------------------------------------------------------------- */
@@ -1010,9 +1010,9 @@
     return(MS_FAILURE);
   }
 
-  nEntitySize = psSHP->panRecSize[hEntity]+8;
+  nEntitySize = msSHXReadSize( psSHP, hEntity) + 8;
 
-  if( psSHP->panRecSize[hEntity] == 4 ) {
+  if( msSHXReadSize( psSHP, hEntity) == 4 ) {
     msSetError(MS_SHPERR, "NULL feature encountered.", "msSHPReadPoint()");
     return(MS_FAILURE);
   }
@@ -1030,7 +1030,7 @@
   /* -------------------------------------------------------------------- */
   /*      Read the record.                                                */
   /* -------------------------------------------------------------------- */
-  fseek( psSHP->fpSHP, psSHP->panRecOffset[hEntity], 0 );
+  fseek( psSHP->fpSHP, msSHXReadOffset( psSHP, hEntity), 0 );
   fread( psSHP->pabyRec, nEntitySize, 1, psSHP->fpSHP );
       
   memcpy( &(point->x), psSHP->pabyRec + 12, 8 );
@@ -1045,6 +1045,118 @@
 }
 
 /*
+** msSHXLoadPage() 
+**
+** The SHX tells us what the byte offsets of the shapes in the SHP file are.
+** We read the SHX file in ~8K pages and store those pages in memory for 
+** successive accesses during the reading cycle (first bounds are read, 
+** then entire shapes). Each time we read a page, we mark it as read.
+*/
+int msSHXLoadPage( SHPHandle psSHP, int shxBufferPage )
+{
+  /*  Validate the page number. */
+  if( shxBufferPage < 0  )
+    return(MS_FAILURE);
+
+  /* Each SHX record is 8 bytes long (two ints), hence our buffer size. */
+  char buffer[SHX_BUFFER_PAGE * 8];
+
+  /* The SHX file starts with 100 bytes of header, skip that. */
+  fseek( psSHP->fpSHX, 100 + shxBufferPage * SHX_BUFFER_PAGE * 8, 0 );
+  fread( buffer, 8, SHX_BUFFER_PAGE, psSHP->fpSHX );
+
+  /* Copy the buffer contents out into the working arrays. */
+  /* TODO: need to check end case so we don't memcpy too far. */
+  int i = 0;
+  for( i = 0; i < SHX_BUFFER_PAGE; i++ ) {
+    int tmpOffset, tmpSize;
+    
+    /* Don't write information past the end of the arrays, please. */
+    if(psSHP->nRecords <= (shxBufferPage * SHX_BUFFER_PAGE + i) )
+      break;
+    
+    memcpy( &tmpOffset, (buffer + (8*i)), 4);
+    memcpy( &tmpSize, (buffer + (8*i) + 4), 4);
+  
+    /* SHX uses big endian numbers for the offsets, so we have to flip them */
+    /* if we are a little endian machine. */
+    if( !bBigEndian ) SwapWord( 4, &tmpOffset );
+    if( !bBigEndian ) SwapWord( 4, &tmpSize );
+
+    /* SHX stores the offsets in 2 byte units, so we double them to get */
+    /* an offset in bytes. */
+    tmpOffset = tmpOffset * 2;
+    tmpSize = tmpSize * 2;
+
+    /* Write the answer into the working arrays on the SHPHandle */
+    psSHP->panRecOffset[shxBufferPage * SHX_BUFFER_PAGE + i] = tmpOffset;
+    psSHP->panRecSize[shxBufferPage * SHX_BUFFER_PAGE + i] = tmpSize;
+  }
+    
+  msSetBit(psSHP->panRecLoaded, shxBufferPage, 1);
+  
+  return(MS_SUCCESS);
+}
+
+int msSHXLoadAll( SHPHandle psSHP ) {
+
+  uchar	*pabyBuf;
+  pabyBuf = (uchar *) malloc(8 * psSHP->nRecords );
+  fread( pabyBuf, 8, psSHP->nRecords, psSHP->fpSHX );
+  int i = 0;
+  for( i = 0; i < psSHP->nRecords; i++ ) {
+    ms_int32 nOffset, nLength;
+    
+    memcpy( &nOffset, pabyBuf + i * 8, 4 );
+    if( !bBigEndian ) SwapWord( 4, &nOffset );
+    
+    memcpy( &nLength, pabyBuf + i * 8 + 4, 4 );
+    if( !bBigEndian ) SwapWord( 4, &nLength );
+    
+    psSHP->panRecOffset[i] = nOffset*2; 
+    psSHP->panRecSize[i] = nLength*2; 
+  }
+  free(pabyBuf);
+  
+  return(MS_SUCCESS);
+
+}
+
+int msSHXReadOffset( SHPHandle psSHP, int hEntity ) {
+
+  /*  Validate the record/entity number. */
+  if( hEntity < 0 || hEntity >= psSHP->nRecords )
+    return(MS_FAILURE);
+
+  int shxBufferPage = hEntity / SHX_BUFFER_PAGE;
+
+  if( ! msGetBit(psSHP->panRecLoaded, shxBufferPage) ) {
+    msSHXLoadPage( psSHP, shxBufferPage );
+  }
+
+  return psSHP->panRecOffset[hEntity];
+
+}
+
+int msSHXReadSize( SHPHandle psSHP, int hEntity ) {
+
+  /*  Validate the record/entity number. */
+  if( hEntity < 0 || hEntity >= psSHP->nRecords )
+    return(MS_FAILURE);
+
+  int shxBufferPage = hEntity / SHX_BUFFER_PAGE;
+
+  if( ! msGetBit(psSHP->panRecLoaded, shxBufferPage) ) {
+    msSHXLoadPage( psSHP, shxBufferPage );
+  }
+
+  return psSHP->panRecSize[hEntity];
+
+}
+
+
+
+/*
 ** msSHPReadShape() - Reads the vertices for one shape from a shape file.
 */
 void msSHPReadShape( SHPHandle psSHP, int hEntity, shapeObj *shape )
@@ -1063,12 +1175,12 @@
   if( hEntity < 0 || hEntity >= psSHP->nRecords )
     return;
 
-  if( psSHP->panRecSize[hEntity] == 4 ) {      
+  if( msSHXReadSize(psSHP, hEntity) == 4 ) {      
     shape->type = MS_SHAPE_NULL;
     return;
   }
 
-  nEntitySize = psSHP->panRecSize[hEntity]+8;
+  nEntitySize = msSHXReadSize(psSHP, hEntity) + 8;
   if (msSHPReadAllocateBuffer(psSHP, hEntity, "msSHPReadShape()") == MS_FAILURE)
   {
     shape->type = MS_SHAPE_NULL;
@@ -1078,7 +1190,7 @@
   /* -------------------------------------------------------------------- */
   /*      Read the record.                                                */
   /* -------------------------------------------------------------------- */
-  fseek( psSHP->fpSHP, psSHP->panRecOffset[hEntity], 0 );
+  fseek( psSHP->fpSHP, msSHXReadOffset(psSHP, hEntity), 0 );
   fread( psSHP->pabyRec, nEntitySize, 1, psSHP->fpSHP );
 
   /* -------------------------------------------------------------------- */
@@ -1264,8 +1376,8 @@
     if (nEntitySize < 44 + 4)
     {
       shape->type = MS_SHAPE_NULL;
-      msSetError(MS_SHPERR, "Corrupted feature encountered.  psSHP->panRecSize[%d]=%d", "msSHPReadShape()",
-                 hEntity, psSHP->panRecSize[hEntity]);
+      msSetError(MS_SHPERR, "Corrupted feature encountered.  recSize of feature %d=%d", "msSHPReadShape()",
+                 hEntity, msSHXReadSize(psSHP, hEntity));
       return;
     }
 
@@ -1371,8 +1483,8 @@
     if (nEntitySize < 20 + 8)
     {
       shape->type = MS_SHAPE_NULL;
-      msSetError(MS_SHPERR, "Corrupted feature encountered.  psSHP->panRecSize[%d]=%d", "msSHPReadShape()",
-                 hEntity, psSHP->panRecSize[hEntity]);
+      msSetError(MS_SHPERR, "Corrupted feature encountered.  recSize of feature %d=%d", "msSHPReadShape()",
+                 hEntity, msSHXReadSize(psSHP, hEntity));
       return;
     }
 
@@ -1453,13 +1565,14 @@
     padBounds->maxx = psSHP->adBoundsMax[0];
     padBounds->maxy = psSHP->adBoundsMax[1];
   } else {    
-    if( psSHP->panRecSize[hEntity] == 4 ) { /* NULL shape */
+    
+    if( msSHXReadSize(psSHP, hEntity) == 4 ) { /* NULL shape */
       padBounds->minx = padBounds->miny = padBounds->maxx = padBounds->maxy = 0.0;
       return MS_FAILURE;
     } 
     
     if( psSHP->nShapeType != SHP_POINT && psSHP->nShapeType != SHP_POINTZ && psSHP->nShapeType != SHP_POINTM) {
-      fseek( psSHP->fpSHP, psSHP->panRecOffset[hEntity]+12, 0 );
+      fseek( psSHP->fpSHP, msSHXReadOffset(psSHP, hEntity) + 12, 0 );
       fread( padBounds, sizeof(double)*4, 1, psSHP->fpSHP );
 
       if( bBigEndian ) {
@@ -1479,7 +1592,7 @@
       /*      minimum and maximum bound.                                      */
       /* -------------------------------------------------------------------- */
       
-      fseek( psSHP->fpSHP, psSHP->panRecOffset[hEntity]+12, 0 );
+      fseek( psSHP->fpSHP, msSHXReadOffset(psSHP, hEntity) + 12, 0 );
       fread( padBounds, sizeof(double)*2, 1, psSHP->fpSHP );
       
       if( bBigEndian ) {
@@ -1632,8 +1745,9 @@
     shpfile->status = msSearchDiskTree(filename, rect, debug);
     free(filename);
 
-    if(shpfile->status) /* index  */
+    if(shpfile->status) { /* index  */
       msFilterTreeSearch(shpfile, shpfile->status, rect);
+    }
     else { /* no index  */
       shpfile->status = msAllocBitArray(shpfile->numshapes);
       if(!shpfile->status) {
@@ -2303,18 +2417,15 @@
 
   /* now apply the maxshapes criteria (NOTE: this ignores the filter so you could get less than maxfeatures) */
   if(layer->maxfeatures > 0) {
-    for(i=0; i<shpfile->numshapes; i++) {
-      n1 += msGetBit(shpfile->status,i);
-    }
 
-    if(n1 > layer->maxfeatures) {
-      for(i=0; i<shpfile->numshapes; i++) {
-        if(msGetBit(shpfile->status,i) && (n2 < (n1 - layer->maxfeatures))) {
-          msSetBit(shpfile->status,i,0);
-          n2++;
-        }
+    for( i = (shpfile->numshapes - 1); i >= 0; i-- ) {
+      n2 = msGetBit(shpfile->status, i);
+      n1 += n2;
+      if( n2 && n1 > layer->maxfeatures ) {
+        msSetBit(shpfile->status, i, 0);
       }
     }
+
   }
     
   return MS_SUCCESS;
@@ -2331,14 +2442,14 @@
   if(!shpfile) {
     msSetError(MS_SHPERR, "Shapefile layer has not been opened.", "msLayerNextShape()");
     return MS_FAILURE;
-  }
+  }    
+  
+  do {
+    i = msGetNextBit(shpfile->status, shpfile->lastshape + 1, shpfile->numshapes);
 
-  do {
-    i = shpfile->lastshape + 1;
-    while(i<shpfile->numshapes && !msGetBit(shpfile->status,i)) i++; /* next "in" shape */
     shpfile->lastshape = i;
 
-    if(i == shpfile->numshapes) return(MS_DONE); /* nothing else to read */
+    if(i == -1) return(MS_DONE); /* nothing else to read */
 
     filter_passed = MS_TRUE;  /* By default accept ANY shape */
     if(layer->numitems > 0 && layer->iteminfo) {

Modified: trunk/mapserver/mapshape.h
===================================================================
--- trunk/mapserver/mapshape.h	2008-04-25 21:05:12 UTC (rev 7527)
+++ trunk/mapserver/mapshape.h	2008-04-25 21:32:01 UTC (rev 7528)
@@ -37,6 +37,8 @@
 extern "C" {
 #endif
 
+#define SHX_BUFFER_PAGE 1024
+
 #ifndef SWIG
 #define MS_PATH_LENGTH 1024
 
@@ -84,8 +86,11 @@
 
     int		nRecords;
     int		nMaxRecords;
+
     int		*panRecOffset;
     int		*panRecSize;
+    char  *panRecLoaded;
+    int   panRecAllLoaded;
 
     double	adBoundsMin[4];
     double	adBoundsMax[4];
@@ -101,6 +106,8 @@
 typedef SHPInfo * SHPHandle;
 #endif
 
+
+
 typedef	struct
 {
 #ifdef SWIG
@@ -175,7 +182,6 @@
   int tilelayerindex;
 } msTiledSHPLayerInfo;
 
-
 /* shapefileObj function prototypes  */
 MS_DLL_EXPORT int msShapefileOpen(shapefileObj *shpfile, char *mode, char *filename);
 MS_DLL_EXPORT int msShapefileCreate(shapefileObj *shpfile, char *filename, int type);
@@ -192,7 +198,13 @@
 MS_DLL_EXPORT int msSHPReadPoint(SHPHandle psSHP, int hEntity, pointObj *point );
 MS_DLL_EXPORT int msSHPWriteShape( SHPHandle psSHP, shapeObj *shape );
 MS_DLL_EXPORT int msSHPWritePoint(SHPHandle psSHP, pointObj *point );
+/* SHX reading */
+MS_DLL_EXPORT int msSHXLoadAll( SHPHandle psSHP );
+MS_DLL_EXPORT int msSHXLoadPage( SHPHandle psSHP, int shxBufferPage );
+MS_DLL_EXPORT int msSHXReadOffset( SHPHandle psSHP, int hEntity );
+MS_DLL_EXPORT int msSHXReadSize( SHPHandle psSHP, int hEntity );
 
+
 /* tiledShapefileObj function prototypes are in mapserver.h */
 
 /* XBase function prototypes */

Modified: trunk/mapserver/maptree.c
===================================================================
--- trunk/mapserver/maptree.c	2008-04-25 21:05:12 UTC (rev 7527)
+++ trunk/mapserver/maptree.c	2008-04-25 21:32:01 UTC (rev 7528)
@@ -1,5 +1,5 @@
 /******************************************************************************
- * $Id:$
+ * $Id$
  *
  * Project:  MapServer
  * Purpose:  .qix spatial index implementation.  Derived from shapelib, and 
@@ -768,10 +768,14 @@
   int i;
   rectObj shape_rect;
 
-  for(i=0;i<shp->numshapes;i++) { /* for each shape */
-    if(msGetBit(status, i)) {
-      if(msSHPReadBounds(shp->hSHP, i, &shape_rect) == MS_SUCCESS)
-	if(msRectOverlap(&shape_rect, &search_rect) != MS_TRUE) msSetBit(status, i, 0);
+  i = msGetNextBit(status, 0, shp->numshapes);
+  while(i >= 0) {
+    if(msSHPReadBounds(shp->hSHP, i, &shape_rect) == MS_SUCCESS) {
+	    if(msRectOverlap(&shape_rect, &search_rect) != MS_TRUE) {
+	      msSetBit(status, i, 0);
+      }
     }
+    i = msGetNextBit(status, i+1, shp->numshapes);
   }
+
 }



More information about the mapserver-commits mailing list