[fdo-commits] r171 - in branches/3.2.x/Providers/SHP/Src: Message
Provider ShpRead
svn_fdo at osgeo.org
svn_fdo at osgeo.org
Wed Jan 31 17:25:22 EST 2007
Author: danstoica
Date: 2007-01-31 17:25:22 -0500 (Wed, 31 Jan 2007)
New Revision: 171
Modified:
branches/3.2.x/Providers/SHP/Src/Message/ShpMessage.mc
branches/3.2.x/Providers/SHP/Src/Provider/ShpFeatIdQueryEvaluator.cpp
branches/3.2.x/Providers/SHP/Src/Provider/ShpQueryOptimizer.cpp
branches/3.2.x/Providers/SHP/Src/Provider/ShpReader.h
branches/3.2.x/Providers/SHP/Src/ShpRead/PolyShape.cpp
branches/3.2.x/Providers/SHP/Src/ShpRead/PolylineShape.cpp
branches/3.2.x/Providers/SHP/Src/ShpRead/RowData.cpp
branches/3.2.x/Providers/SHP/Src/ShpRead/Shape.cpp
branches/3.2.x/Providers/SHP/Src/ShpRead/ShapeFile.cpp
branches/3.2.x/Providers/SHP/Src/ShpRead/ShapeFile.h
Log:
SHP performance improvements
Modified: branches/3.2.x/Providers/SHP/Src/Message/ShpMessage.mc
===================================================================
--- branches/3.2.x/Providers/SHP/Src/Message/ShpMessage.mc 2007-01-29 19:57:14 UTC (rev 170)
+++ branches/3.2.x/Providers/SHP/Src/Message/ShpMessage.mc 2007-01-31 22:25:22 UTC (rev 171)
@@ -703,6 +703,11 @@
Language=English
Geometry creation failed for '%1$ls'.
.
+MessageId=13002
+SymbolicName=SHP_READ_GEOMETRY_FAILED
+Language=English
+Found corrupted '%1$ls' geometry.
+.
;//************************************************************************************
;//************************************************************************************
Modified: branches/3.2.x/Providers/SHP/Src/Provider/ShpFeatIdQueryEvaluator.cpp
===================================================================
--- branches/3.2.x/Providers/SHP/Src/Provider/ShpFeatIdQueryEvaluator.cpp 2007-01-29 19:57:14 UTC (rev 170)
+++ branches/3.2.x/Providers/SHP/Src/Provider/ShpFeatIdQueryEvaluator.cpp 2007-01-31 22:25:22 UTC (rev 171)
@@ -299,10 +299,11 @@
results->searchArea.yMax = searchArea.yMax + xyRes;
bool done = false;
+ BoundingBoxEx extents;
+
do
{
unsigned long offset;
- BoundingBoxEx extents;
int nStatus = m_RTree->GetNextObject (offset, extents);
switch (nStatus)
@@ -410,9 +411,12 @@
if (right == NULL)
return right;
- std::sort(left->begin(), left->end(), std::less<FdoInt32>());
- std::sort(right->begin(), right->end(), std::less<FdoInt32>());
+ if ( left->size() > 0 )
+ std::sort(left->begin(), left->end(), std::less<FdoInt32>());
+ if ( right->size() > 0 )
+ std::sort(right->begin(), right->end(), std::less<FdoInt32>());
+
recno_list::iterator iter1 = left->begin();
recno_list::iterator iter2 = right->begin();
Modified: branches/3.2.x/Providers/SHP/Src/Provider/ShpQueryOptimizer.cpp
===================================================================
--- branches/3.2.x/Providers/SHP/Src/Provider/ShpQueryOptimizer.cpp 2007-01-29 19:57:14 UTC (rev 170)
+++ branches/3.2.x/Providers/SHP/Src/Provider/ShpQueryOptimizer.cpp 2007-01-31 22:25:22 UTC (rev 171)
@@ -132,11 +132,12 @@
int nStatus;
size_t filterNum;
bool filter_found = false;
+ BoundingBox box;
// Try to reuse a list of candidates.
for ( filterNum = 0; filterNum < m_FeatidLists.size() && !filter_found; filterNum++ )
{
- BoundingBox box = m_FeatidLists[filterNum]->searchArea;
+ box = m_FeatidLists[filterNum]->searchArea;
filter_found = ( AreEqual( box.xMin, searchArea.xMin ) &&
AreEqual( box.yMin, searchArea.yMin ) &&
@@ -166,11 +167,11 @@
results->searchArea.yMax = searchArea.yMax;
bool done = false;
+ BoundingBoxEx extents;
do
{
unsigned long offset;
- BoundingBoxEx extents;
-
+
nStatus = m_RTree->GetNextObject (offset, extents);
switch (nStatus)
{
Modified: branches/3.2.x/Providers/SHP/Src/Provider/ShpReader.h
===================================================================
--- branches/3.2.x/Providers/SHP/Src/Provider/ShpReader.h 2007-01-29 19:57:14 UTC (rev 170)
+++ branches/3.2.x/Providers/SHP/Src/Provider/ShpReader.h 2007-01-31 22:25:22 UTC (rev 171)
@@ -30,7 +30,7 @@
// Maximum allowed size for the merged list of FeatIds
// produced by the ShpFeatIdQueryEvaluator optimizer
-#define SHP_MAX_FEATID_LIST_SIZE 10000
+#define SHP_MAX_FEATID_LIST_SIZE 50000
#define SHP_CACHED_GEOMETRY_INITIAL_SIZE 100 // bytes
@@ -39,6 +39,16 @@
class ShpFeatIdQueryTester;
class ShpFeatIdQueryEvaluator;
+#define SHP_CACHE_PROP_DATA 0
+#define SHP_CACHE_PROP_ID 1
+#define SHP_CACHE_PROP_GEOM 2
+
+typedef struct {
+ int colIndex;
+ FdoStringP propertyName;
+ int propertyType; // one of SHP_CACHE_PROP_*
+} ShpPropertyInfoDef;
+
template <class FDO_READER> class ShpReader :
public FDO_READER
{
@@ -64,6 +74,7 @@
RowData* mData; // dbf record for current shape
bool mCheckSelected; // if true, check access is only to selected properties, i.e. client call
FdoStringP mClassName;
+ FdoStringP mCodePage; // Code page
FdoStringP mLogicalIdentityPropertyName;
FdoStringP mLogicalGeometryPropertyName;
int mMaxNumObjects;
@@ -77,6 +88,11 @@
bool mFetchGeometry; // ExtendedSelect doesn't need geometries for indexing data.
bool mFetchDeletes; // ExtendedSelect needs fetching the deleted rows to use the featid as index.
+ // A cache of property definition names to avoid expensive string conversions
+ ShpPropertyInfoDef *mPropertyInfoDefs;
+ int mNumPropertyInfoDefs;
+ int mLastPropertyInfoDef;
+
protected:
ShpReader () {}; // to satisfy _NoAddRefReleaseOnFdoPtr
@@ -133,12 +149,27 @@
mSelected = FDO_SAFE_ADDREF (selected);
mFilterExecutor = ShpQueryOptimizer::Create (this, selected );
+
+ // Get the code page from LDID. If not valid try the .CPG file.
+ mCodePage = mFileSet->GetDbfFile()->GetCodePage();
+
+ if (mCodePage == L"" && mFileSet->GetCpgFile())
+ mCodePage = mFileSet->GetCpgFile()->GetCodePage();
+
+ // Initialize the cache
+ mPropertyInfoDefs = NULL;
+ mNumPropertyInfoDefs = 0;
+ mLastPropertyInfoDef = 0;
}
virtual ~ShpReader (void)
{
if ( mData )
delete mData;
+
+ if ( mPropertyInfoDefs )
+ delete [] mPropertyInfoDefs;
+
Close ();
}
@@ -159,34 +190,23 @@
/// <returns>Returns nothing.</returns>
void GetData (ColumnData* data, FdoString* propertyName, eDBFColumnType column_type, FdoString* type_name)
{
- FdoString* columnName;
- ColumnInfo* info;
- int count;
- eDBFColumnType type;
+ ColumnInfo* info = mData->GetColumnInfo ();
+ eDBFColumnType type;
+ int cacheIndex;
- columnName = ShpSchemaUtilities::GetPhysicalColumnName (mConnection, mClassName, propertyName);
- info = mData->GetColumnInfo ();
- count = info->GetNumColumns ();
- type = kColumnUnsupportedType;
- for (int i = 0; i < count; i++)
- {
- if (0 == wcscmp (columnName, info->GetColumnNameAt (i)))
- {
- type = info->GetColumnTypeAt (i);
- if (type == column_type)
- {
- // Get the code page from LDID. If not valid try the .CPG file.
- FdoStringP codePage = mFileSet->GetDbfFile()->GetCodePage();
+ bool found = Property2ColName( propertyName, &cacheIndex );
- if (codePage == L"" && mFileSet->GetCpgFile())
- codePage = mFileSet->GetCpgFile()->GetCodePage();
+ if ( found )
+ {
+ int i = mPropertyInfoDefs[cacheIndex].colIndex;
- mData->GetData (data, i, type, (WCHAR*)(FdoString *)codePage);
- }
- else
- throw FdoException::Create (NlsMsgGet(SHP_VALUE_TYPE_MISMATCH, "Value type (%1$ls) to insert, update or retrieve doesn't match the type (%2$ls) of property '%3$ls'.", type_name, ColumnTypeToString (type), propertyName));
- break;
- }
+ type = info->GetColumnTypeAt (i);
+ if (type == column_type)
+ {
+ mData->GetData (data, i, type, (WCHAR*)(FdoString *)mCodePage);
+ }
+ else
+ throw FdoException::Create (NlsMsgGet(SHP_VALUE_TYPE_MISMATCH, "Value type (%1$ls) to insert, update or retrieve doesn't match the type (%2$ls) of property '%3$ls'.", type_name, ColumnTypeToString (type), propertyName));
}
if (kColumnUnsupportedType == type)
throw FdoException::Create (NlsMsgGet(SHP_COMMAND_PROPERTY_NOT_FOUND, "Property '%1$ls' is not part of class '%2$ls'.", propertyName, (FdoString*)mClassName));
@@ -497,32 +517,33 @@
{
info = mData->GetColumnInfo ();
count = info->GetNumColumns ();
- // if it's the singleton identity property, it's always non-null:
- if (0 == wcscmp (propertyName, mLogicalIdentityPropertyName))
- ret = false;
- // if it's the singleton geometry property, it's null if the shape is NullShape:
- else if (0 == wcscmp (propertyName, mLogicalGeometryPropertyName))
- ret = (eNullShape == mShape->GetShapeType ());
- // if it's anything else, need to explicitly check:
- else
- {
- FdoString* columnName = ShpSchemaUtilities::GetPhysicalColumnName(mConnection, mClassName, propertyName);
- ret = true;
- for (int i = 0; ret && (i < count); i++)
- {
- if (0 == wcscmp (columnName, info->GetColumnNameAt (i)))
- {
- ColumnData coldata;
- GetData (&coldata, propertyName, info->GetColumnTypeAt(i), ColumnTypeToString(info->GetColumnTypeAt(i)));
- ret = coldata.bIsNull;
- break;
- }
- // If property not found, throw exception:
- if (i == count-1)
- throw FdoException::Create(NlsMsgGet(SHP_COMMAND_PROPERTY_NOT_FOUND, "Property '%1$ls' is not part of class '%2$ls'.", propertyName, (FdoString*)mClassName));
- }
+ int cacheIndex;
+ bool found = Property2ColName( propertyName, &cacheIndex );
+
+ if ( found )
+ {
+ int type = mPropertyInfoDefs[cacheIndex].propertyType;
+
+ // if it's the singleton identity property, it's always non-null:
+ if ( type == SHP_CACHE_PROP_ID )
+ ret = false;
+ // if it's the singleton geometry property, it's null if the shape is NullShape:
+ else if ( type == SHP_CACHE_PROP_GEOM )
+ ret = (eNullShape == mShape->GetShapeType ());
+ // if it's anything else, need to explicitly check:
+ else
+ {
+ int i = mPropertyInfoDefs[cacheIndex].colIndex;
+ eDBFColumnType type = info->GetColumnTypeAt (i);
+ ColumnData coldata;
+
+ mData->GetData (&coldata, i, type);
+ ret = coldata.bIsNull;
+ }
}
+ else
+ throw FdoException::Create(NlsMsgGet(SHP_COMMAND_PROPERTY_NOT_FOUND, "Property '%1$ls' is not part of class '%2$ls'.", propertyName, (FdoString*)mClassName));
}
return (ret);
@@ -1318,6 +1339,103 @@
return numOrd;
}
+ /////////////////////////////////////////////////////////////////////////////////
+ bool Property2ColName( const wchar_t *propName, int *index )
+ {
+ bool found = false;
+ const char* string = NULL;
+ FdoStringP colName;
+ ShpPropertyInfoDef *cacheElem = NULL;
+ bool found2 = false;
+ int cacheIndex;
+
+ *index = -1;
+
+ // Allocate the cache
+ if ( mPropertyInfoDefs == NULL )
+ {
+ ColumnInfo* info = mData->GetColumnInfo ();
+ int count = info->GetNumColumns ();
+
+ // Allocate a slot for each column + for "FeatId" and "Geometry"
+ mPropertyInfoDefs = new ShpPropertyInfoDef[ info->GetNumColumns() + 2 ];
+ mNumPropertyInfoDefs = 0;
+ }
+
+ // Optimize the linear search in the cache:
+ // Chances are it's the A) next property (usually fetched in order)
+ // or B) the current property (after IsNull)
+ for ( int i = mLastPropertyInfoDef; !found2 && i < mNumPropertyInfoDefs; i++ )
+ {
+ cacheElem = &mPropertyInfoDefs[i];
+
+ found2 = ( wcscmp( propName, cacheElem->propertyName ) == 0);
+ cacheIndex = i;
+ }
+
+ for ( int i = 0; !found2 && i < mLastPropertyInfoDef; i++ )
+ {
+ cacheElem = &mPropertyInfoDefs[i];
+
+ found2 = ( wcscmp( propName, cacheElem->propertyName ) == 0);
+ cacheIndex = i;
+ }
+
+ // Fast return if property found in the cache
+ if ( found2 )
+ {
+ found = true;
+ *index = cacheIndex;
+
+ mLastPropertyInfoDef = cacheIndex;
+
+ return found;
+ }
+
+ // Initialize a new slot in the cache. "FeatId" and "Geometry" are special cases
+ int type = SHP_CACHE_PROP_DATA;
+ if (0 == wcscmp (propName, mLogicalIdentityPropertyName))
+ type = SHP_CACHE_PROP_ID;
+ else if (0 == wcscmp (propName, mLogicalGeometryPropertyName))
+ type = SHP_CACHE_PROP_GEOM;
+
+ ShpPropertyInfoDef *elem = &mPropertyInfoDefs[mNumPropertyInfoDefs];
+
+ //wcscpy( elem->propertyName, propName );
+ elem->propertyName = propName;
+ elem->propertyType = type;
+ elem->colIndex = -1;
+
+ // Search for the position of this column
+ if ( type == SHP_CACHE_PROP_DATA )
+ {
+ FdoString* columnName = ShpSchemaUtilities::GetPhysicalColumnName (mConnection, mClassName, propName);
+ ColumnInfo* info = mData->GetColumnInfo ();
+ int count = info->GetNumColumns ();
+
+ for (int i = 0; i < count && !found; i++)
+ {
+ found = (0 == wcscmp (columnName, info->GetColumnNameAt (i)));
+ if ( found )
+ elem->colIndex = i; // position in the array of columns
+ }
+ }
+ else
+ found = true;
+
+ // Store the column index
+ if ( found )
+ {
+ cacheIndex = mNumPropertyInfoDefs;
+ *index = cacheIndex; // Position in this cache
+
+ // Remember this
+ mLastPropertyInfoDef = mNumPropertyInfoDefs;
+ mNumPropertyInfoDefs++;
+ }
+
+ return found;
+ }
};
#endif // SHPREADER_H
Modified: branches/3.2.x/Providers/SHP/Src/ShpRead/PolyShape.cpp
===================================================================
--- branches/3.2.x/Providers/SHP/Src/ShpRead/PolyShape.cpp 2007-01-29 19:57:14 UTC (rev 170)
+++ branches/3.2.x/Providers/SHP/Src/ShpRead/PolyShape.cpp 2007-01-31 22:25:22 UTC (rev 171)
@@ -23,8 +23,10 @@
PolyShape::PolyShape (int nRecordNumber, void* pMemory, bool bOverlay, int* pParts, DoublePoint* pPoints) :
Shape (nRecordNumber, pMemory, bOverlay, pPoints)
{
- _ASSERT(NULL != pParts);
- mParts = pParts;
+ if ( NULL == pParts )
+ throw FdoException::Create (NlsMsgGet(SHP_READ_GEOMETRY_FAILED, "Found corrupted '%1$ls' geometry.", L"PolyShape"));
+
+ mParts = pParts;
}
PolyShape::~PolyShape ()
Modified: branches/3.2.x/Providers/SHP/Src/ShpRead/PolylineShape.cpp
===================================================================
--- branches/3.2.x/Providers/SHP/Src/ShpRead/PolylineShape.cpp 2007-01-29 19:57:14 UTC (rev 170)
+++ branches/3.2.x/Providers/SHP/Src/ShpRead/PolylineShape.cpp 2007-01-31 22:25:22 UTC (rev 171)
@@ -52,8 +52,8 @@
}
else
{
- _ASSERT(GetNumParts () >= 1);
- _ASSERT(GetNumPoints () >= 1);
+ if ( (GetNumParts () == 0) || (GetNumPoints () == 0) )
+ throw FdoException::Create (NlsMsgGet(SHP_READ_GEOMETRY_FAILED, "Found corrupted '%1$ls' geometry.", L"PolylineShape"));
}
}
Modified: branches/3.2.x/Providers/SHP/Src/ShpRead/RowData.cpp
===================================================================
--- branches/3.2.x/Providers/SHP/Src/ShpRead/RowData.cpp 2007-01-29 19:57:14 UTC (rev 170)
+++ branches/3.2.x/Providers/SHP/Src/ShpRead/RowData.cpp 2007-01-31 22:25:22 UTC (rev 171)
@@ -403,7 +403,7 @@
{
ULONG sysCpg = CP_THREAD_ACP;
- if ( codepage )
+ if ( codepage && (wcslen( codepage) != 0))
{
FdoStringP sCpg = FdoStringP( codepage );
Modified: branches/3.2.x/Providers/SHP/Src/ShpRead/Shape.cpp
===================================================================
--- branches/3.2.x/Providers/SHP/Src/ShpRead/Shape.cpp 2007-01-29 19:57:14 UTC (rev 170)
+++ branches/3.2.x/Providers/SHP/Src/ShpRead/Shape.cpp 2007-01-31 22:25:22 UTC (rev 171)
@@ -26,7 +26,7 @@
mOverlay (bOverlay),
mPoints (pPoints)
{
- _ASSERT(NULL != pMemory);
+ //_ASSERT(NULL != pMemory);
// _ASSERT(NULL != pPoints); NullShape does this
}
Modified: branches/3.2.x/Providers/SHP/Src/ShpRead/ShapeFile.cpp
===================================================================
--- branches/3.2.x/Providers/SHP/Src/ShpRead/ShapeFile.cpp 2007-01-29 19:57:14 UTC (rev 170)
+++ branches/3.2.x/Providers/SHP/Src/ShpRead/ShapeFile.cpp 2007-01-31 22:25:22 UTC (rev 171)
@@ -51,9 +51,12 @@
m_szRowBuffer = NULL;
m_nRowBufferSize = 0;
+ m_nLastUsedCacheIndex = 0;
+
ClearRowShapeCache();
m_nCacheHits = 0;
+ m_nCacheHits2 = 0;
m_nCacheMisses = 0;
}
@@ -96,8 +99,8 @@
if ( SHP_PRINT_STATS )
{
double total = m_nCacheHits + m_nCacheMisses;
- printf(" [Records requests %d: hits %d (%d%%) misses %d (%d%%)]\n",
- (int)total, m_nCacheHits, (int)(m_nCacheHits* 100/total), m_nCacheMisses, (int)(m_nCacheMisses*100/total));
+ printf(" [Records requests %d: hits %d (%d%%) hits2 %d (%d%%) (misses %d (%d%%)]\n",
+ (int)total, m_nCacheHits, (int)(m_nCacheHits/total*100), m_nCacheHits2, (int)(m_nCacheHits2/total*100), m_nCacheMisses, (int)(m_nCacheMisses/total*100));
}
}
@@ -1424,24 +1427,44 @@
BYTE *pShapeRecord = NULL;
int rowOffset = 0;
- for ( int i = 0; i < SHP_FILE_READ_CACHE_SIZE && pShapeRecord == NULL; i++ )
+ // Try to find starting from the current cache slot
+ pShapeRecord = GetRowShapeFromCache2( nOffset, m_nLastUsedCacheIndex, SHP_FILE_READ_CACHE_SIZE, nRecordNumber);
+
+ // Not found. Try from the beginning of the cache.
+ if ( pShapeRecord == NULL )
+ {
+ pShapeRecord = GetRowShapeFromCache2( nOffset, 0, m_nLastUsedCacheIndex-1, nRecordNumber);
+ m_nCacheHits2++;
+ }
+
+ if ( pShapeRecord != NULL )
+ m_nCacheHits++;
+ else
+ m_nCacheMisses++;
+
+ return pShapeRecord;
+}
+
+BYTE *ShapeFile::GetRowShapeFromCache2(ULONG nOffset, int nStartCacheIndex, int nEndCacheIndex, int& nRecordNumber)
+{
+ BYTE *pShapeRecord = NULL;
+
+ for ( int i = nStartCacheIndex; i < nEndCacheIndex && pShapeRecord == NULL; i++ )
{
if (!m_ReadRecordsBuffer[i].bOffsetValid)
+ {
break;
+ }
if ( m_ReadRecordsBuffer[i].nOffset == nOffset )
{
pShapeRecord = &m_szRowBuffer[nOffset - m_ReadRecordsBuffer[0].nOffset + sizeof(SHPRecordHeader)];
nRecordNumber = m_ReadRecordsBuffer[i].nRecordNumber;
+ m_nLastUsedCacheIndex = i;
}
}
- if ( pShapeRecord != NULL )
- m_nCacheHits++;
- else
- m_nCacheMisses++;
-
- return pShapeRecord;
+ return pShapeRecord;
}
/*****************************************************************************/
@@ -1452,5 +1475,6 @@
{
m_ReadRecordsBuffer[i].bOffsetValid = false;
}
+ m_nLastUsedCacheIndex = 0;
}
Modified: branches/3.2.x/Providers/SHP/Src/ShpRead/ShapeFile.h
===================================================================
--- branches/3.2.x/Providers/SHP/Src/ShpRead/ShapeFile.h 2007-01-29 19:57:14 UTC (rev 170)
+++ branches/3.2.x/Providers/SHP/Src/ShpRead/ShapeFile.h 2007-01-31 22:25:22 UTC (rev 171)
@@ -71,15 +71,19 @@
BYTE* m_szRowBuffer;
size_t m_nRowBufferSize;
SHPRecordInfo m_ReadRecordsBuffer[SHP_FILE_READ_CACHE_SIZE];
+ ULONG m_nLastUsedCacheIndex;
// Statistics
int m_nCacheHits;
+ int m_nCacheHits2;
int m_nCacheMisses;
private:
void ReadRecordInfo(SHPRecordInfo *pRecordInfo);
void ReadRawDataBlock(ULONG ulStartOffset );
BYTE* GetRowShapeFromCache(ULONG ulOffset, int& nRecordNumber);
+ BYTE* GetRowShapeFromCache2(ULONG nOffset, int nStartCacheIndex, int nEndCacheIndex, int& nRecordNumber);
+
void ClearRowShapeCache();
};
More information about the fdo-commits
mailing list