[postgis-users] Fast access to PostGIS from c/c++
Boehm, Andreas
boehm at rif.fuedo.de
Thu Dec 7 02:22:15 PST 2006
Hi Mark,
> Can you post your test cases for both MakePoint() and
> GeometryFromText()? Over how many points are you testing?
there are 3 different test cases
1) Inserting with executing "GeometryFromText()"
2) Inserting with executing "PREPARE insert_pt ..." and "EXECUTE" as you
have proposed
3) Inserting using libpqxx's prepared functionality
Case 1 and 2 are similar. No. 3 is the one witch takes about 60 % more
time.
I've made tests with 10,000, 100,000 and - for case 1 - with 1,000,000
points.
Until now I've left the database configuration untouched.
I'll post the complete source. Thanks a lot!
Andi
#include <iostream>
#include <time.h>
#include <assert.h>
#include <math.h>
#include <pqxx/connection>
#include <pqxx/transaction>
#include <pqxx/nontransaction>
#include <pqxx/tablewriter>
#include <pqxx/tablereader>
using namespace PGSTD;
using namespace pqxx;
// Table names
const string POINTS_TABLE = "points";
// Range and sizes
const int POINT_COUNT = 100000;
const int X_MAX = 10000;
const int Y_MAX = 10000;
// Forwards
void DropOldTable(connection &c, const string &tableName);
void CreateNewTable(connection &c, const string &tableName, const string
&geomType);
void CreateIndex(connection &c, const string &tableName);
clock_t PrintElapsedTime(clock_t start);
// Insertion
void InsertPointsFromTxt(connection &c)
{
cout << "Inserting " << to_string(POINT_COUNT)
<< " points via GeometryFromText()..." << endl;
clock_t start = clock();
// Insert POINT_COUNT points
work *t;
srand(42); // instead of "(int) time(NULL)" for
deterministically results
double x, y;
string execStr = "";
for (int i = 0; i < POINT_COUNT;)
{
// If string is to small, double the capacity
if (execStr.size() == execStr.capacity())
execStr.reserve(4 + execStr.capacity() * 2);
// RAND_MAX is 32767 only. For a huge number of points
use rand() twice
x = ((double) rand() + ((double) rand() / RAND_MAX))
/ (RAND_MAX + 1) * X_MAX;
y = ((double) rand() + ((double) rand() / RAND_MAX))
/ (RAND_MAX + 1) * Y_MAX;
// Add new Point
execStr += "INSERT INTO " + POINTS_TABLE + " VALUES ( "
+
to_string(i) + // Primary Key
", GeometryFromText('POINT (" +
to_string(x) + " " + to_string(y) + //
coordinates
")', -1) ); ";
// Exectute and commit every 10000 objects
if ((++i) % 10000 == 0)
{
t = new work(c, "insert_points");
t->exec(execStr);
t->commit();
delete t;
execStr = "";
cerr << ".";
}
}
// Execute the rest
if (execStr.length() > 0)
{
t = new work(c, "insert_points");
t->exec(execStr);
t->commit();
delete t;
}
cerr << "#"<<endl;
PrintElapsedTime(start);
}
void InsertPointsMakePtPrep(connection &c)
{
cout << "Inserting " << to_string(POINT_COUNT)
<< " points via GeometryMakePt() and PREPARE ..." <<
endl;
clock_t start = clock();
// Prepare plan
work *t = new work(c, "insert_points");
string execStr = "PREPARE insert_pt(int, float, float) AS INSERT
INTO " +
POINTS_TABLE + " VALUES ($1, MakePoint($2, $3))";
t->exec(execStr);
t->commit();
delete t;
// Insert POINT_COUNT points
srand(42); // instead of "(int) time(NULL)" for
deterministically results
double x, y;
execStr = "";
for (int i = 0; i < POINT_COUNT;)
{
// If string is to small, double the capacity
if (execStr.size() == execStr.capacity())
execStr.reserve(4 + execStr.capacity() * 2);
// RAND_MAX is 32767 only. For a huge number of points
use rand() twice
x = ((double) rand() + ((double) rand() / RAND_MAX))
/ (RAND_MAX + 1) * X_MAX;
y = ((double) rand() + ((double) rand() / RAND_MAX))
/ (RAND_MAX + 1) * Y_MAX;
// Add new Point
execStr += "EXECUTE insert_pt(" + to_string(i) + ","
+ to_string(x) + "," + to_string(y) + ");";
// Exectute and commit every 10000 objects
if ((++i) % 10000 == 0)
{
t = new work(c, "insert_points");
t->exec(execStr);
t->commit();
delete t;
execStr = "";
cerr << ".";
}
}
// Execute the rest, unprepare
t = new work(c, "insert_points");
t->exec(execStr + "DEALLOCATE insert_pt;");
t->commit();
delete t;
cerr << "#"<<endl;
PrintElapsedTime(start);
}
void InsertPointsMakePt(connection &c)
{
cout << "Inserting " << to_string(POINT_COUNT)
<< " points via MakePt() and libpqxx's prepared..." <<
endl;
clock_t start = clock();
// Prepare Statement
string prepNameWritePt = "InsertPoints";
string prepStr = "INSERT INTO " + POINTS_TABLE +
" VALUES ($1, MakePoint($2, $3))";
c.prepare(prepNameWritePt, prepStr)
("integer", pqxx::prepare::treat_direct)
("float", pqxx::prepare::treat_direct)
("float", pqxx::prepare::treat_direct);
// New transaction
work *t;
t = new work(c, "insert_points");
// Insert POINT_COUNT points
srand(42); // instead of "(int) time(NULL)" for
deterministically results
double x, y;
for (int i = 0; i < POINT_COUNT;)
{
// RAND_MAX is 32767 only. For a huge number of points
use rand() twice
x = ((double) rand() + ((double) rand() / RAND_MAX))
/ (RAND_MAX + 1) * X_MAX;
y = ((double) rand() + ((double) rand() / RAND_MAX))
/ (RAND_MAX + 1) * Y_MAX;
t->prepared(prepNameWritePt)(i)(x)(y).exec();
// every 10000 points start a new transaction
if (++i % 10000 == 0)
{
t->commit();
delete t;
t = new work(c, "insert_points");
cerr << ".";
}
}
// Commit, unprepare
t->commit();
delete t;
c.unprepare(prepNameWritePt);
cerr << "#"<<endl;
PrintElapsedTime(start);
}
// Main
int main(int argc, char *argv[])
{
try
{
// Set up a connection to the backend
connection c("host=geoserver port=5432 dbname=postgis\
user=postgres password=***");
// Creating Points using GeometryFromText()
// Drop old table, create new one, insert objects
cout << "*** Creating points using GeometryFromText ***"
<< endl;
DropOldTable(c, POINTS_TABLE);
CreateNewTable(c, POINTS_TABLE, "POINT");
CreateIndex(c, POINTS_TABLE);
InsertPointsFromTxt(c);
// Creating Points using InsertPointsMakePtPrep()
cout << "*** Creating points using
InsertPointsMakePtPrep ***" << endl;
DropOldTable(c, POINTS_TABLE);
CreateNewTable(c, POINTS_TABLE, "POINT");
CreateIndex(c, POINTS_TABLE);
InsertPointsMakePtPrep(c);
// Create points using MakePt()
cout << "*** Creating points using MakePt() ***" <<
endl;
DropOldTable(c, POINTS_TABLE);
CreateNewTable(c, POINTS_TABLE, "POINT");
CreateIndex(c, POINTS_TABLE);
InsertPointsMakePt(c);
cout << "*** End ***" << endl;
}
catch (const sql_error &e)
{
// If we're interested in the text of a failed query, we
can write
// separate exception handling code for this type of
exception
cerr << "SQL error: " << e.what() << endl
<< "Query was: '" << e.query() << "'" <<
endl;
return 1;
}
catch (const exception &e)
{
// All exceptions thrown by libpqxx are derived from
std::exception
cerr << "Exception: " << e.what() << endl;
return 2;
}
catch (...)
{
// This is really unexpected (see above)
cerr << "Unhandled exception" << endl;
return 100;
}
return 0;
}
// Helpers
void DropOldTable(connection &c, const string &tableName)
{
cout << "Dropping old table '" + tableName + "' ..." << endl;
clock_t start = clock();
try
{
// First create a separate transaction to drop old
table, if any.
// This mayfail if the table didn't previously exist.
work dropTrans(c, "drop_" + tableName);
// Using PostGIS delete statement instead of 'DROP
TABLE'.
// This will delete any geometry column before dropping
table.
dropTrans.exec("SELECT DropGeometryTable('" + tableName
+ "')");
dropTrans.commit();
}
catch (const undefined_table &e)
{
cout << "(Expected) Couldn't drop table: " << e.what()
<< endl
<< "Query was: " << e.query() << endl;
}
catch (const sql_error &e)
{
cerr << "Couldn't drop table: " << e.what() << endl
<< "Query was: " << e.query() << endl;
}
PrintElapsedTime(start);
}
void CreateNewTable(connection &c, const string &tableName,
const string &geomType)
{
cout << "Creating new table '" << tableName << "' ..." << endl;
clock_t start = clock();
// New transaction
work t(c, "create_" + tableName);
// Create table
t.exec("CREATE TABLE " + tableName + "(id integer PRIMARY KEY)");
// Add geometry column
t.exec("SELECT AddGeometryColumn('" + tableName +
"', 'geom', -1, '" + geomType + "', 2)");
// Commit
t.commit();
PrintElapsedTime(start);
}
void CreateIndex(connection &c, const string &tableName)
{
cout << "Creating index for '" << tableName << "' ..." << endl;
clock_t start = clock();
// Don't use a transaction, because CREATE INDEX could run into
a timeout.
// VACUUM cann't run in a transaction block, either.
// Ergo use nontransaction.
nontransaction t(c, "index_" + tableName);
t.exec("CREATE INDEX " + tableName + "_index ON " + tableName +
" USING GIST ( geom GIST_GEOMETRY_OPS )");
t.exec("VACUUM ANALYZE " + tableName + " (geom)");
// Commit
t.commit();
PrintElapsedTime(start);
}
clock_t PrintElapsedTime(clock_t start)
{
clock_t end = clock();
double durationClock = (double)(end - start) / CLOCKS_PER_SEC;
cout << "... elapsed time: " << durationClock << " s." << endl;
return end;
}
More information about the postgis-users
mailing list