[mapserver-commits] r7528 - trunk/mapserver
svn at osgeo.org
svn at osgeo.org
Fri Apr 25 17:32:01 EDT 2008
Author: pramsey
Date: 2008-04-25 17:32:01 -0400 (Fri, 25 Apr 2008)
New Revision: 7528
Modified:
trunk/mapserver/HISTORY.TXT
trunk/mapserver/mapbits.c
trunk/mapserver/mapserver.h
trunk/mapserver/mapshape.c
trunk/mapserver/mapshape.h
trunk/mapserver/maptree.c
Log:
Large shape file performance improvements. Lazy-load SHX files. Faster scanning of the status bitmap. (#2282)
Modified: trunk/mapserver/HISTORY.TXT
===================================================================
--- trunk/mapserver/HISTORY.TXT 2008-04-25 21:05:12 UTC (rev 7527)
+++ trunk/mapserver/HISTORY.TXT 2008-04-25 21:32:01 UTC (rev 7528)
@@ -13,6 +13,8 @@
Current Version (5.1-dev, SVN trunk):
-------------------------------------
+- Improve performance for large shape files (#2282)
+
- encode WMS parameters correctly (#1296)
- Added alignment option within a scalebar (#2468)
Modified: trunk/mapserver/mapbits.c
===================================================================
--- trunk/mapserver/mapbits.c 2008-04-25 21:05:12 UTC (rev 7527)
+++ trunk/mapserver/mapbits.c 2008-04-25 21:32:01 UTC (rev 7528)
@@ -1,5 +1,5 @@
/******************************************************************************
- * $Id:$
+ * $Id$
*
* Project: MapServer
* Purpose: Implementation of bit array functions.
@@ -53,6 +53,47 @@
return (*array & (1 << (index % CHAR_BIT))) != 0; /* 0 or 1 */
}
+/*
+** msGetNextBit( status, start, size)
+**
+** Quickly find the next bit set. If start == 0 and 0 is set, will return 0.
+** If hits end of bitmap without finding set bit, will return -1.
+**
+*/
+int msGetNextBit(char *array, int index, int size) {
+ char *ptr;
+ ptr = array;
+ int i = 0;
+
+ ptr += index / CHAR_BIT;
+
+ /* Check the starting byte for set bits, if necessary. */
+ if(*ptr & (0xff << (index % CHAR_BIT))) {
+ /* a bit in this byte is set, figure out which one */
+ for( i = index; i < index + CHAR_BIT - (index % CHAR_BIT); i++ ) {
+ if ( msGetBit( array, i ) )
+ return i;
+ }
+ }
+
+ /* scroll forwards bytewise to the next byte with a bit set */
+ do {
+ ptr++;
+ } while( ((CHAR_BIT * (ptr - array)) < size) && *ptr == 0 ) ;
+
+ /* check the first non-zero byte for the location of the set bit */
+ if( *ptr ) {
+ /* a bit in this byte is set, figure out which one */
+ for( i = CHAR_BIT * (ptr - array); i < CHAR_BIT * (ptr - array) + CHAR_BIT; i++ ) {
+ if ( msGetBit( array, i ) )
+ return i;
+ }
+ }
+
+ /* got to the last byte with no hits! */
+ return -1;
+}
+
void msSetBit(char *array, int index, int value)
{
array += index / CHAR_BIT;
Modified: trunk/mapserver/mapserver.h
===================================================================
--- trunk/mapserver/mapserver.h 2008-04-25 21:05:12 UTC (rev 7527)
+++ trunk/mapserver/mapserver.h 2008-04-25 21:32:01 UTC (rev 7528)
@@ -1722,6 +1722,7 @@
MS_DLL_EXPORT int msGetBit(char *array, int index);
MS_DLL_EXPORT void msSetBit(char *array, int index, int value);
MS_DLL_EXPORT void msFlipBit(char *array, int index);
+MS_DLL_EXPORT int msGetNextBit(char *array, int index, int size);
MS_DLL_EXPORT int msLayerInitItemInfo(layerObj *layer);
MS_DLL_EXPORT void msLayerFreeItemInfo(layerObj *layer);
Modified: trunk/mapserver/mapshape.c
===================================================================
--- trunk/mapserver/mapshape.c 2008-04-25 21:05:12 UTC (rev 7527)
+++ trunk/mapserver/mapshape.c 2008-04-25 21:32:01 UTC (rev 7528)
@@ -339,17 +339,23 @@
/* -------------------------------------------------------------------- */
psSHP->nMaxRecords = psSHP->nRecords;
+ /* Our in-memory cache of offset information */
psSHP->panRecOffset = (int *) malloc(sizeof(int) * psSHP->nMaxRecords );
+ /* Our in-memory cache of size information */
psSHP->panRecSize = (int *) malloc(sizeof(int) * psSHP->nMaxRecords );
+ /* The completeness information for our in-memory cache */
+ psSHP->panRecLoaded = msAllocBitArray( 1 + (psSHP->nMaxRecords / SHX_BUFFER_PAGE) ) ;
+ /* Is our in-memory cache completely populated? */
+ psSHP->panRecAllLoaded = 0;
- pabyBuf = (uchar *) malloc(8 * psSHP->nRecords );
+ /* malloc failed? clean up and shut down */
if (psSHP->panRecOffset == NULL ||
psSHP->panRecSize == NULL ||
- pabyBuf == NULL)
+ psSHP->panRecLoaded == NULL)
{
free(psSHP->panRecOffset);
- free(psSHP->panRecOffset);
- free( pabyBuf );
+ free(psSHP->panRecSize);
+ free(psSHP->panRecLoaded);
fclose( psSHP->fpSHP );
fclose( psSHP->fpSHX );
free( psSHP );
@@ -357,22 +363,7 @@
return( NULL );
}
- fread( pabyBuf, 8, psSHP->nRecords, psSHP->fpSHX );
- for( i = 0; i < psSHP->nRecords; i++ ) {
- ms_int32 nOffset, nLength;
-
- memcpy( &nOffset, pabyBuf + i * 8, 4 );
- if( !bBigEndian ) SwapWord( 4, &nOffset );
-
- memcpy( &nLength, pabyBuf + i * 8 + 4, 4 );
- if( !bBigEndian ) SwapWord( 4, &nLength );
-
- psSHP->panRecOffset[i] = nOffset*2;
- psSHP->panRecSize[i] = nLength*2;
- }
- free( pabyBuf );
-
return( psSHP );
}
@@ -394,7 +385,9 @@
/* -------------------------------------------------------------------- */
free( psSHP->panRecOffset );
free( psSHP->panRecSize );
+ free( psSHP->panRecLoaded );
+
if(psSHP->pabyRec) free(psSHP->pabyRec);
if(psSHP->panParts) free(psSHP->panParts);
@@ -571,6 +564,9 @@
psSHP->bUpdated = MS_TRUE;
+ /* Fill the SHX buffer if it is not already full. */
+ if( ! psSHP->panRecAllLoaded ) msSHXLoadAll( psSHP );
+
/* -------------------------------------------------------------------- */
/* Add the new entity to the in memory index. */
/* -------------------------------------------------------------------- */
@@ -660,6 +656,9 @@
#endif
psSHP->bUpdated = MS_TRUE;
+ /* Fill the SHX buffer if it is not already full. */
+ if( ! psSHP->panRecAllLoaded ) msSHXLoadAll( psSHP );
+
/* -------------------------------------------------------------------- */
/* Add the new entity to the in memory index. */
/* -------------------------------------------------------------------- */
@@ -962,7 +961,8 @@
*/
static int msSHPReadAllocateBuffer( SHPHandle psSHP, int hEntity, const char* pszCallingFunction)
{
- int nEntitySize = psSHP->panRecSize[hEntity]+8;
+
+ int nEntitySize = msSHXReadSize(psSHP, hEntity) + 8;
/* -------------------------------------------------------------------- */
/* Ensure our record buffer is large enough. */
/* -------------------------------------------------------------------- */
@@ -1010,9 +1010,9 @@
return(MS_FAILURE);
}
- nEntitySize = psSHP->panRecSize[hEntity]+8;
+ nEntitySize = msSHXReadSize( psSHP, hEntity) + 8;
- if( psSHP->panRecSize[hEntity] == 4 ) {
+ if( msSHXReadSize( psSHP, hEntity) == 4 ) {
msSetError(MS_SHPERR, "NULL feature encountered.", "msSHPReadPoint()");
return(MS_FAILURE);
}
@@ -1030,7 +1030,7 @@
/* -------------------------------------------------------------------- */
/* Read the record. */
/* -------------------------------------------------------------------- */
- fseek( psSHP->fpSHP, psSHP->panRecOffset[hEntity], 0 );
+ fseek( psSHP->fpSHP, msSHXReadOffset( psSHP, hEntity), 0 );
fread( psSHP->pabyRec, nEntitySize, 1, psSHP->fpSHP );
memcpy( &(point->x), psSHP->pabyRec + 12, 8 );
@@ -1045,6 +1045,118 @@
}
/*
+** msSHXLoadPage()
+**
+** The SHX tells us what the byte offsets of the shapes in the SHP file are.
+** We read the SHX file in ~8K pages and store those pages in memory for
+** successive accesses during the reading cycle (first bounds are read,
+** then entire shapes). Each time we read a page, we mark it as read.
+*/
+int msSHXLoadPage( SHPHandle psSHP, int shxBufferPage )
+{
+ /* Validate the page number. */
+ if( shxBufferPage < 0 )
+ return(MS_FAILURE);
+
+ /* Each SHX record is 8 bytes long (two ints), hence our buffer size. */
+ char buffer[SHX_BUFFER_PAGE * 8];
+
+ /* The SHX file starts with 100 bytes of header, skip that. */
+ fseek( psSHP->fpSHX, 100 + shxBufferPage * SHX_BUFFER_PAGE * 8, 0 );
+ fread( buffer, 8, SHX_BUFFER_PAGE, psSHP->fpSHX );
+
+ /* Copy the buffer contents out into the working arrays. */
+ /* TODO: need to check end case so we don't memcpy too far. */
+ int i = 0;
+ for( i = 0; i < SHX_BUFFER_PAGE; i++ ) {
+ int tmpOffset, tmpSize;
+
+ /* Don't write information past the end of the arrays, please. */
+ if(psSHP->nRecords <= (shxBufferPage * SHX_BUFFER_PAGE + i) )
+ break;
+
+ memcpy( &tmpOffset, (buffer + (8*i)), 4);
+ memcpy( &tmpSize, (buffer + (8*i) + 4), 4);
+
+ /* SHX uses big endian numbers for the offsets, so we have to flip them */
+ /* if we are a little endian machine. */
+ if( !bBigEndian ) SwapWord( 4, &tmpOffset );
+ if( !bBigEndian ) SwapWord( 4, &tmpSize );
+
+ /* SHX stores the offsets in 2 byte units, so we double them to get */
+ /* an offset in bytes. */
+ tmpOffset = tmpOffset * 2;
+ tmpSize = tmpSize * 2;
+
+ /* Write the answer into the working arrays on the SHPHandle */
+ psSHP->panRecOffset[shxBufferPage * SHX_BUFFER_PAGE + i] = tmpOffset;
+ psSHP->panRecSize[shxBufferPage * SHX_BUFFER_PAGE + i] = tmpSize;
+ }
+
+ msSetBit(psSHP->panRecLoaded, shxBufferPage, 1);
+
+ return(MS_SUCCESS);
+}
+
+int msSHXLoadAll( SHPHandle psSHP ) {
+
+ uchar *pabyBuf;
+ pabyBuf = (uchar *) malloc(8 * psSHP->nRecords );
+ fread( pabyBuf, 8, psSHP->nRecords, psSHP->fpSHX );
+ int i = 0;
+ for( i = 0; i < psSHP->nRecords; i++ ) {
+ ms_int32 nOffset, nLength;
+
+ memcpy( &nOffset, pabyBuf + i * 8, 4 );
+ if( !bBigEndian ) SwapWord( 4, &nOffset );
+
+ memcpy( &nLength, pabyBuf + i * 8 + 4, 4 );
+ if( !bBigEndian ) SwapWord( 4, &nLength );
+
+ psSHP->panRecOffset[i] = nOffset*2;
+ psSHP->panRecSize[i] = nLength*2;
+ }
+ free(pabyBuf);
+
+ return(MS_SUCCESS);
+
+}
+
+int msSHXReadOffset( SHPHandle psSHP, int hEntity ) {
+
+ /* Validate the record/entity number. */
+ if( hEntity < 0 || hEntity >= psSHP->nRecords )
+ return(MS_FAILURE);
+
+ int shxBufferPage = hEntity / SHX_BUFFER_PAGE;
+
+ if( ! msGetBit(psSHP->panRecLoaded, shxBufferPage) ) {
+ msSHXLoadPage( psSHP, shxBufferPage );
+ }
+
+ return psSHP->panRecOffset[hEntity];
+
+}
+
+int msSHXReadSize( SHPHandle psSHP, int hEntity ) {
+
+ /* Validate the record/entity number. */
+ if( hEntity < 0 || hEntity >= psSHP->nRecords )
+ return(MS_FAILURE);
+
+ int shxBufferPage = hEntity / SHX_BUFFER_PAGE;
+
+ if( ! msGetBit(psSHP->panRecLoaded, shxBufferPage) ) {
+ msSHXLoadPage( psSHP, shxBufferPage );
+ }
+
+ return psSHP->panRecSize[hEntity];
+
+}
+
+
+
+/*
** msSHPReadShape() - Reads the vertices for one shape from a shape file.
*/
void msSHPReadShape( SHPHandle psSHP, int hEntity, shapeObj *shape )
@@ -1063,12 +1175,12 @@
if( hEntity < 0 || hEntity >= psSHP->nRecords )
return;
- if( psSHP->panRecSize[hEntity] == 4 ) {
+ if( msSHXReadSize(psSHP, hEntity) == 4 ) {
shape->type = MS_SHAPE_NULL;
return;
}
- nEntitySize = psSHP->panRecSize[hEntity]+8;
+ nEntitySize = msSHXReadSize(psSHP, hEntity) + 8;
if (msSHPReadAllocateBuffer(psSHP, hEntity, "msSHPReadShape()") == MS_FAILURE)
{
shape->type = MS_SHAPE_NULL;
@@ -1078,7 +1190,7 @@
/* -------------------------------------------------------------------- */
/* Read the record. */
/* -------------------------------------------------------------------- */
- fseek( psSHP->fpSHP, psSHP->panRecOffset[hEntity], 0 );
+ fseek( psSHP->fpSHP, msSHXReadOffset(psSHP, hEntity), 0 );
fread( psSHP->pabyRec, nEntitySize, 1, psSHP->fpSHP );
/* -------------------------------------------------------------------- */
@@ -1264,8 +1376,8 @@
if (nEntitySize < 44 + 4)
{
shape->type = MS_SHAPE_NULL;
- msSetError(MS_SHPERR, "Corrupted feature encountered. psSHP->panRecSize[%d]=%d", "msSHPReadShape()",
- hEntity, psSHP->panRecSize[hEntity]);
+ msSetError(MS_SHPERR, "Corrupted feature encountered. recSize of feature %d=%d", "msSHPReadShape()",
+ hEntity, msSHXReadSize(psSHP, hEntity));
return;
}
@@ -1371,8 +1483,8 @@
if (nEntitySize < 20 + 8)
{
shape->type = MS_SHAPE_NULL;
- msSetError(MS_SHPERR, "Corrupted feature encountered. psSHP->panRecSize[%d]=%d", "msSHPReadShape()",
- hEntity, psSHP->panRecSize[hEntity]);
+ msSetError(MS_SHPERR, "Corrupted feature encountered. recSize of feature %d=%d", "msSHPReadShape()",
+ hEntity, msSHXReadSize(psSHP, hEntity));
return;
}
@@ -1453,13 +1565,14 @@
padBounds->maxx = psSHP->adBoundsMax[0];
padBounds->maxy = psSHP->adBoundsMax[1];
} else {
- if( psSHP->panRecSize[hEntity] == 4 ) { /* NULL shape */
+
+ if( msSHXReadSize(psSHP, hEntity) == 4 ) { /* NULL shape */
padBounds->minx = padBounds->miny = padBounds->maxx = padBounds->maxy = 0.0;
return MS_FAILURE;
}
if( psSHP->nShapeType != SHP_POINT && psSHP->nShapeType != SHP_POINTZ && psSHP->nShapeType != SHP_POINTM) {
- fseek( psSHP->fpSHP, psSHP->panRecOffset[hEntity]+12, 0 );
+ fseek( psSHP->fpSHP, msSHXReadOffset(psSHP, hEntity) + 12, 0 );
fread( padBounds, sizeof(double)*4, 1, psSHP->fpSHP );
if( bBigEndian ) {
@@ -1479,7 +1592,7 @@
/* minimum and maximum bound. */
/* -------------------------------------------------------------------- */
- fseek( psSHP->fpSHP, psSHP->panRecOffset[hEntity]+12, 0 );
+ fseek( psSHP->fpSHP, msSHXReadOffset(psSHP, hEntity) + 12, 0 );
fread( padBounds, sizeof(double)*2, 1, psSHP->fpSHP );
if( bBigEndian ) {
@@ -1632,8 +1745,9 @@
shpfile->status = msSearchDiskTree(filename, rect, debug);
free(filename);
- if(shpfile->status) /* index */
+ if(shpfile->status) { /* index */
msFilterTreeSearch(shpfile, shpfile->status, rect);
+ }
else { /* no index */
shpfile->status = msAllocBitArray(shpfile->numshapes);
if(!shpfile->status) {
@@ -2303,18 +2417,15 @@
/* now apply the maxshapes criteria (NOTE: this ignores the filter so you could get less than maxfeatures) */
if(layer->maxfeatures > 0) {
- for(i=0; i<shpfile->numshapes; i++) {
- n1 += msGetBit(shpfile->status,i);
- }
- if(n1 > layer->maxfeatures) {
- for(i=0; i<shpfile->numshapes; i++) {
- if(msGetBit(shpfile->status,i) && (n2 < (n1 - layer->maxfeatures))) {
- msSetBit(shpfile->status,i,0);
- n2++;
- }
+ for( i = (shpfile->numshapes - 1); i >= 0; i-- ) {
+ n2 = msGetBit(shpfile->status, i);
+ n1 += n2;
+ if( n2 && n1 > layer->maxfeatures ) {
+ msSetBit(shpfile->status, i, 0);
}
}
+
}
return MS_SUCCESS;
@@ -2331,14 +2442,14 @@
if(!shpfile) {
msSetError(MS_SHPERR, "Shapefile layer has not been opened.", "msLayerNextShape()");
return MS_FAILURE;
- }
+ }
+
+ do {
+ i = msGetNextBit(shpfile->status, shpfile->lastshape + 1, shpfile->numshapes);
- do {
- i = shpfile->lastshape + 1;
- while(i<shpfile->numshapes && !msGetBit(shpfile->status,i)) i++; /* next "in" shape */
shpfile->lastshape = i;
- if(i == shpfile->numshapes) return(MS_DONE); /* nothing else to read */
+ if(i == -1) return(MS_DONE); /* nothing else to read */
filter_passed = MS_TRUE; /* By default accept ANY shape */
if(layer->numitems > 0 && layer->iteminfo) {
Modified: trunk/mapserver/mapshape.h
===================================================================
--- trunk/mapserver/mapshape.h 2008-04-25 21:05:12 UTC (rev 7527)
+++ trunk/mapserver/mapshape.h 2008-04-25 21:32:01 UTC (rev 7528)
@@ -37,6 +37,8 @@
extern "C" {
#endif
+#define SHX_BUFFER_PAGE 1024
+
#ifndef SWIG
#define MS_PATH_LENGTH 1024
@@ -84,8 +86,11 @@
int nRecords;
int nMaxRecords;
+
int *panRecOffset;
int *panRecSize;
+ char *panRecLoaded;
+ int panRecAllLoaded;
double adBoundsMin[4];
double adBoundsMax[4];
@@ -101,6 +106,8 @@
typedef SHPInfo * SHPHandle;
#endif
+
+
typedef struct
{
#ifdef SWIG
@@ -175,7 +182,6 @@
int tilelayerindex;
} msTiledSHPLayerInfo;
-
/* shapefileObj function prototypes */
MS_DLL_EXPORT int msShapefileOpen(shapefileObj *shpfile, char *mode, char *filename);
MS_DLL_EXPORT int msShapefileCreate(shapefileObj *shpfile, char *filename, int type);
@@ -192,7 +198,13 @@
MS_DLL_EXPORT int msSHPReadPoint(SHPHandle psSHP, int hEntity, pointObj *point );
MS_DLL_EXPORT int msSHPWriteShape( SHPHandle psSHP, shapeObj *shape );
MS_DLL_EXPORT int msSHPWritePoint(SHPHandle psSHP, pointObj *point );
+/* SHX reading */
+MS_DLL_EXPORT int msSHXLoadAll( SHPHandle psSHP );
+MS_DLL_EXPORT int msSHXLoadPage( SHPHandle psSHP, int shxBufferPage );
+MS_DLL_EXPORT int msSHXReadOffset( SHPHandle psSHP, int hEntity );
+MS_DLL_EXPORT int msSHXReadSize( SHPHandle psSHP, int hEntity );
+
/* tiledShapefileObj function prototypes are in mapserver.h */
/* XBase function prototypes */
Modified: trunk/mapserver/maptree.c
===================================================================
--- trunk/mapserver/maptree.c 2008-04-25 21:05:12 UTC (rev 7527)
+++ trunk/mapserver/maptree.c 2008-04-25 21:32:01 UTC (rev 7528)
@@ -1,5 +1,5 @@
/******************************************************************************
- * $Id:$
+ * $Id$
*
* Project: MapServer
* Purpose: .qix spatial index implementation. Derived from shapelib, and
@@ -768,10 +768,14 @@
int i;
rectObj shape_rect;
- for(i=0;i<shp->numshapes;i++) { /* for each shape */
- if(msGetBit(status, i)) {
- if(msSHPReadBounds(shp->hSHP, i, &shape_rect) == MS_SUCCESS)
- if(msRectOverlap(&shape_rect, &search_rect) != MS_TRUE) msSetBit(status, i, 0);
+ i = msGetNextBit(status, 0, shp->numshapes);
+ while(i >= 0) {
+ if(msSHPReadBounds(shp->hSHP, i, &shape_rect) == MS_SUCCESS) {
+ if(msRectOverlap(&shape_rect, &search_rect) != MS_TRUE) {
+ msSetBit(status, i, 0);
+ }
}
+ i = msGetNextBit(status, i+1, shp->numshapes);
}
+
}
More information about the mapserver-commits
mailing list