[postgis-users] Re: Speeding up SQLiteGIS
Stephen Woodbridge
woodbri at swoodbridge.com
Sun Apr 13 20:02:23 PDT 2008
OK, this is cool and evolving very quickly. So if I just want to write a
C program using sqlite and GIS, where do I start.
Do I need to bring in GDAL/OGR now?
Is there a C-api for that?
Does this mean that no one is pursuing just extended SQLite with GIS?
Sorry, I'm getting lost here.
Punkish, I think setting up a wiki somewhere and post some examples
would be very cool, and would be appreciated.
-Steve
Martin Chapman wrote:
> Punkish,
>
> I shared similar code last year for postgresql and mysql but it never made
> it into the branch.
>
> Here is the first change for the GetFeature() function of the sqllite
> driver. It is changed to use a new select statement (because no cursors in
> sqlite). I will pretty it up and send the final version later.
>
>
> 1. replace OGRSQLiteSelectLayer def in
> /GDAL/ogr/ogrsf_frmts/sqlite/ogr_sqlite.h
>
> /************************************************************************/
> /* OGRSQLiteSelectLayer */
> /************************************************************************/
>
> class OGRSQLiteSelectLayer : public OGRSQLiteLayer
> {
> CPLString m_sTableName;
> public:
> OGRSQLiteSelectLayer( OGRSQLiteDataSource *,
> sqlite3_stmt * );
> ~OGRSQLiteSelectLayer();
>
> virtual void ResetReading();
> virtual int GetFeatureCount( int );
>
> virtual OGRFeature *GetFeature( long nFeatureId );
>
> virtual int TestCapability( const char * );
>
> virtual void ClearStatement();
> virtual void SetTableName(const char* pszTableName);
> };
>
>
> 2. Add the following block of code to
> /GDAL/ogr/ogrsf_frmts/sqlite/ogrsqlitedatasource.cpp ExecuteSQL() method at
> the end of the function just before the line " return poLayer;";
>
> std_string sSQLStatement = pszSQLCommand;
> std_string sTableName;
> transform(sSQLStatement.begin(), sSQLStatement.end(),
> sSQLStatement.begin(), tolower);
> int n = (int) sSQLStatement.find("from");
> if (n != -1)
> {
> sTableName = sSQLStatement.substr(n, sSQLStatement.length()
> - n);
> replace(sTableName, std_string("from "), std_string(""));
> trim(sTableName, std_string(" "));
> trim(sTableName, std_string("'"));
>
> n = (int) sTableName.rfind("where");
> if (n != -1)
> sTableName = sTableName.substr(0, n);
>
> n = (int) sTableName.rfind("order by");
> if (n != -1)
> sTableName = sTableName.substr(0, n);
>
> n = (int) sTableName.rfind("'");
> if (n != -1)
> sTableName = sTableName.substr(0, n);
> else
> {
> n = (int) sTableName.find(" ");
> if (n != -1)
> sTableName = sTableName.substr(0, n);
> }
>
> trim(sTableName, string(" "));
> poLayer->SetTableName(sTableName.c_str());
> }
>
>
> 3. The following code replaces the GetFeature() method in
> GDAL/ogr/ogrsf_frmts/sqlite/ogrsqliteselectlayer.cpp file.
>
> /************************************************************************/
> /* GetFeature() */
> /************************************************************************/
>
> OGRFeature *OGRSQLiteSelectLayer::GetFeature( long nFeatureId )
> {
> iNextShapeId = nFeatureId;
>
> CPLString osCommand;
> osCommand.Printf("SELECT * FROM %s WHERE OGC_FID = '%d' Limit 1",
> m_sTableName.c_str(), iNextShapeId);
>
> sqlite3_stmt* hStmt = NULL;
> if (sqlite3_prepare(poDS->GetDB(), osCommand, (int)
> osCommand.size(), &hStmt, NULL ) != SQLITE_OK)
> return NULL;
>
> int rc = sqlite3_step( hStmt );
> if( rc != SQLITE_ROW )
> {
> ClearStatement();
> return NULL;
> }
>
> int iField;
> OGRFeature *poFeature = new OGRFeature( poFeatureDefn );
> if( pszFIDColumn != NULL )
> {
> int iFIDCol;
>
> for( iFIDCol = 0; iFIDCol < sqlite3_column_count(hStmt); iFIDCol++ )
> {
> if( EQUAL(sqlite3_column_name(hStmt,iFIDCol),
> pszFIDColumn) )
> break;
> }
>
> if( iFIDCol == sqlite3_column_count(hStmt) )
> {
> CPLError( CE_Failure, CPLE_AppDefined,
> "Unable to find FID column '%s'.",
> pszFIDColumn );
> return NULL;
> }
>
> poFeature->SetFID( sqlite3_column_int( hStmt, iFIDCol ) );
> }
> else
> poFeature->SetFID( iNextShapeId );
>
> iNextShapeId++;
> m_nFeaturesRead++;
>
> if( pszGeomColumn != NULL )
> {
> int iGeomCol;
>
> for( iGeomCol = 0; iGeomCol < sqlite3_column_count(hStmt);
> iGeomCol++ )
> {
> if( EQUAL(sqlite3_column_name(hStmt,iGeomCol),
> pszGeomColumn) )
> break;
> }
>
> if( iGeomCol == sqlite3_column_count(hStmt) )
> {
> CPLError( CE_Failure, CPLE_AppDefined,
> "Unable to find Geometry column '%s'.",
> pszGeomColumn );
> return NULL;
> }
>
> char *pszWKTCopy, *pszWKT = NULL;
> OGRGeometry *poGeometry = NULL;
>
> pszWKT = (char *) sqlite3_column_text( hStmt, iGeomCol );
> pszWKTCopy = pszWKT;
> if( OGRGeometryFactory::createFromWkt( &pszWKTCopy, NULL,
> &poGeometry ) == OGRERR_NONE
> )
> poFeature->SetGeometryDirectly( poGeometry );
> }
>
> for( iField = 0; iField < poFeatureDefn->GetFieldCount(); iField++ )
> {
> OGRFieldDefn *poFieldDefn = poFeatureDefn->GetFieldDefn( iField );
> int iRawField = panFieldOrdinals[iField] - 1;
>
> if( sqlite3_column_type( hStmt, iRawField ) == SQLITE_NULL )
> continue;
>
> switch( poFieldDefn->GetType() )
> {
> case OFTInteger:
> poFeature->SetField( iField,
> sqlite3_column_int( hStmt, iRawField ) );
> break;
>
> case OFTReal:
> poFeature->SetField( iField,
> sqlite3_column_double( hStmt, iRawField ) );
> break;
>
> case OFTBinary:
> {
> const int nBytes = sqlite3_column_bytes( hStmt, iRawField );
>
> poFeature->SetField( iField, nBytes,
> (GByte*)sqlite3_column_blob( hStmt, iRawField ) );
> }
> break;
>
> case OFTString:
> poFeature->SetField( iField,
> (const char *)
> sqlite3_column_text( hStmt, iRawField ) );
> break;
>
> default:
> break;
> }
> }
>
> #ifdef notdef
> if( pszGeomColumn != NULL )
> {
> int iField = poStmt->GetColId( pszGeomColumn );
> const char *pszGeomText = poStmt->GetColData( iField );
> OGRGeometry *poGeom = NULL;
>
> if( pszGeomText != NULL )
> OGRGeometryFactory::createFromWkt( (char **) &pszGeomText,
> NULL, &poGeom );
>
> if( poGeom != NULL )
> poFeature->SetGeometryDirectly( poGeom );
> }
> #endif
>
> if (hStmt)
> sqlite3_finalize(hStmt);
>
> return poFeature;
> }
>
>
> 4. Rebuild gdal. Note that this change only optimizes the GetFeature(long
> nFeatureId) method of a layer opened by a call to ExecuteSQL(). At least
> random access calls are fast then. I can also make the change in the
> OGRSQLiteLayer and ogrsqlitetablelayer classes. The function will always
> work for random access calls because WHERE, ORDER BY and all other sql
> clauses are irrelevant when accessing a row by ID. Note that
> GetNextFeature() will still be slow although we could speed up that function
> as well.
>
>
>
> ALSO here is the changes for MYSQL and POSTGRESQL for optimized random
> access.
>
> MYSQL:
>
> /************************************************************************/
> /* GetFeature() */
> /************************************************************************/
>
> OGRFeature *OGRMySQLResultLayer::GetFeature( long nFeatureId )
> {
> iNextShapeId = bHasFid == TRUE ? nFeatureId - 1 : nFeatureId;
>
> /* -------------------------------------------------------------------- */
> /* Do we need to establish an initial query? */
> /* -------------------------------------------------------------------- */
> if( hResultSet == NULL )
> {
> CPLAssert( pszQueryStatement != NULL );
>
> poDS->RequestLongResult( this );
>
> if( mysql_query( poDS->GetConn(), pszQueryStatement ) )
> {
> poDS->ReportError( pszQueryStatement );
> return NULL;
> }
>
> hResultSet = mysql_store_result( poDS->GetConn() );
> if( hResultSet == NULL )
> {
> poDS->ReportError( "mysql_use_result() failed on query." );
> return FALSE;
> }
> }
>
> /* -------------------------------------------------------------------- */
> /* Fetch absolute record.
> */
> /* -------------------------------------------------------------------- */
> char **papszRow = NULL;
> unsigned long *panLengths = NULL;
> OGRFeature *poFeature = NULL;
>
> if ( mysql_num_rows( hResultSet ) );
> {
> mysql_data_seek( hResultSet, iNextShapeId );
>
> papszRow = mysql_fetch_row( hResultSet );
> if( !papszRow ) return NULL;
>
> panLengths = mysql_fetch_lengths( hResultSet );
> if ( !panLengths ) return NULL;
>
> /* -------------------------------------------------------------------- */
> /* Process record. */
> /* -------------------------------------------------------------------- */
> poFeature = RecordToFeature( papszRow, panLengths );
> }
>
> return poFeature;
> }
>
>
> POSTGRESQL:
>
>
> /************************************************************************/
> /* GetFeature() */
> /************************************************************************/
>
> OGRFeature *OGRPGResultLayer::GetFeature( long nFeatureId )
>
> {
> if (bHasFid == TRUE && nFeatureId < 1) return NULL;
> PGconn *hPGConn = poDS->GetPGConn();
> CPLString osCommand;
> iNextShapeId = nFeatureId;
>
> if (hCursorResult != NULL)
> {
> PQclear( hCursorResult );
> poDS->FlushSoftTransaction();
> }
>
> poDS->SoftStartTransaction();
> osCommand.Printf( "DECLARE %s CURSOR for %s", pszCursorName,
> pszQueryStatement );
> hCursorResult = PQexec(hPGConn, osCommand );
>
> PQclear( hCursorResult );
> osCommand.Printf( "FETCH ABSOLUTE %d in %s", nFeatureId, pszCursorName
> );
> hCursorResult = PQexec(hPGConn, osCommand );
> OGRFeature *poFeature = RecordToFeature( 0 );
> return poFeature;
> }
>
>
> Best regards,
> Martin
>
>
> -----Original Message-----
> From: P Kishor [mailto:punk.kish at gmail.com]
> Sent: Sunday, April 13, 2008 6:56 PM
> To: Martin Chapman
> Cc: PostGIS Users Discussion; Richard Greenwood; Stephen Woodbridge
> Subject: Re: Speeding up SQLiteGIS
>
> On 4/13/08, Martin Chapman <mchapman at texelinc.com> wrote:
>> I'm working on it right now and will send you the code when I finish.
>> Basically SQLite is fast and the OGR code that uses sqlite is slow
> because
>> of the way they wrote it. My changes make OGR use random access versus
> what
>> they do today and scroll from 0 to n for each record requested. That
> means
>> that row one will return quick but for each sub-sequent request the code
>> loops until it finds the record requested. Very inefficient. It has
> speed
>> it up by about 50x on the very small shape file I am using to test.
>
> Fantastic! Please consider sharing the code with Frank. He is the
> ultimate custodian for OGR, and we want OGR itself to become better to
> the extent that it can.
>
> In the meantime, Frank, thanks yet again for making this possible (and
> for help with the OGR syntax).
>
> la la la (me happy again).
>
>
>>
>> Best regards,
>> Martin
>>
>> -----Original Message-----
>>
>> From: P Kishor [mailto:punk.kish at gmail.com]
>> Sent: Sunday, April 13, 2008 6:41 PM
>> To: PostGIS Users Discussion
>>
>> Cc: Richard Greenwood; Stephen Woodbridge; Martin Chapman
>> Subject: Speeding up SQLiteGIS
>>
>> (started a new thread)
>>
>>
>>
>> On 4/13/08, Martin Chapman <mchapman at texelinc.com> wrote:
>> > I can connect to SQLite using OGR in my application as seen in the
>> > attachment. It's kind of slow compared to shape but I can optimize
> the
>> OGR
>> > code for SQLite like I did for PostgreSQL and MySQL in my local copy
> of
>> the
>> > gdal code. Then it's as fast as shape. I really like SQLite as well.
>> What
>> > is the address of your WMS server?
>>
>> Martin,
>>
>> I am right now working on my laptop, but will soon put it on a
>> publicly accessible server. I am fooling around with a few tricks, and
>> now I am getting the entire table being selected *and* rendered in
>> about 92 ms. I think that is *very* good (this is a table of all the
>> zip code boundaries in the State of Wisconsin, US -- 1121 rows and all
>> the river segments -- > 183K rows).
>>
>> Now, could you kindly share the "optimization" of OGR code *for*
>> SQLite? Have you considered submitted it back to Frank for possible
>> inclusion in the main branch?
>>
>> Also, we could request OSGeo to set up some space for us to share
>> SQLite code, tricks, etc. In the meantime, I have bagged sqlitegis.org
>> and sqlitegis.com, and would be happy to host a wiki/list or whatever
>> crap the dreamhost allows me to host with a single-click install.
>>
>> Seriously, this the first time in years that I have been excited by
>> all this mapping business. SQLite is just such an incredibly powerful
>> tool, not for its power and performance (which is plenty fast for me)
>> but for its incredible simplicity. The fasted program for me is the
>> one that allows me to install, setup and program the fastest.
>>
>>
>> >
>> > Best regards,
>> > Martin
>> >
>> >
>> > -----Original Message-----
>> > From: postgis-users-bounces at postgis.refractions.net
>> > [mailto:postgis-users-bounces at postgis.refractions.net] On Behalf Of P
>> Kishor
>> > Sent: Sunday, April 13, 2008 1:51 PM
>> > To: PostGIS Users Discussion; Richard Greenwood
>> > Subject: Re: [postgis-users] SQLite and postGIS
>> >
>> >
>> > On 4/13/08, Richard Greenwood <richard.greenwood at gmail.com> wrote:
>> > > I should note that ogr2ogr creates a SQLite spatial table even more
>> > > easily that SpatiaLite:
>> > > ogr2ogr -f "SQLite" dest.db source.shp source
>> >
>> > Thanks Rich. You made my day. I am drawing a layer with about 185K
>> > rows, and, yes, there is a time lag, but this is out-of-the-box
>> > performance.
>> >
>> > One question -- the above ogr2ogr command, for me, does not seem to
>> > cooperate when adding multiple shapefiles to the same db. If I do like
>> > so
>> >
>> > ogr2ogr -f "SQLite" dest.db source1.shp source2.shp source3.shp
>> >
>> > I get a dest.db with 0 Kb. I got around that by creating dest1.db,
>> > dest2.db and so on, and then creating dest.db, attaching the other
>> > dbs, and moving the tables into dest.db via
>> >
>> > CREATE TABLE ... AS SELECT * FROM....
>> >
>> > This seems a bit goofy, so I am sure I am doing something wrong with
>> > the ogr command. Suggestions?
>> >
>> > Another note -- I wonder if this merits starting a separate SQLitegis
>> > mailing list? I am happy here as long as the pg users don't mind.
>> >
>> >
>> > >
>> > > Rich
>> > >
>> > >
>> > >
>> > >
>> > > On Sun, Apr 13, 2008 at 9:26 AM, Richard Greenwood
>> > > <richard.greenwood at gmail.com> wrote:
>> > > > On Fri, Apr 11, 2008 at 9:29 PM, Frank Warmerdam
>> <warmerdam at pobox.com>
>> > wrote:
>> > > > > Puneet, Rich,
>> > > > >
>> > > > > SQLite is already supported as a spatial database by OGR.
> The
>> > caveat
>> > > > > is that in GDAL 1.5 it is just using a text column with WKT
>> > geometries so
>> > > > > the spatial performance is not great.
>> > > > >
>> > > > > To use this with MapServer you would use CONNECTIONTYPE OGR
> and
>> the
>> > > > > CONNECTION string would be the path for the sqlite database.
>> The
>> > > > > DATA statement should hold the table name be accessed.
>> > > >
>> > > > Totally cool!
>> > > >
>> > > > I used SpatiaLite (http://www.gaia-gis.it/spatialite/)
>> LoadShapefile()
>> > > > function to import a shapefile into a SQLite db. The geometries
> are
>> > > > stored as BLOBs in a field named "geom" Then:
>> > > > sqlite> alter table ownership add column WKT_GEOMETRY;
>> > > > sqlite> update ownership set WKT_GEOMETRY=astext(geom);
>> > > >
>> > > > Getting MapServer to use the SQLite table was very easy. Recent
>> > > > versions of MS4W have SQLite support in GDAL. So simply adding
>> > > > CONNECTIONTYPE OGR
>> > > > CONNECTION "path/to/SQLite.db"
>> > > > gets MapServer drawing geometries from SQLite.
>> > > >
>> > > > I'm playing with a table containing about 15,000 polygons and
>> > > > performance is fine.
>> > > >
>> > > > Thanks Frank, for pointing me in the right direction.
>> > > >
>> > > > Rich
>> > > >
>>
>>
>>
>> --
>> Puneet Kishor http://punkish.eidesis.org/
>> Nelson Institute for Environmental Studies http://www.nelson.wisc.edu/
>> Open Source Geospatial Foundation (OSGeo) http://www.osgeo.org/
>>
>>
>
More information about the postgis-users
mailing list