[postgis-users] Fast access to PostGIS from c/c++

Boehm, Andreas boehm at rif.fuedo.de
Thu Dec 7 02:22:15 PST 2006


Hi Mark,

> Can you post your test cases for both MakePoint() and 
> GeometryFromText()? Over how many points are you testing? 

there are 3 different test cases
1) Inserting with executing "GeometryFromText()"
2) Inserting with executing "PREPARE insert_pt ..." and "EXECUTE" as you
have proposed
3) Inserting using libpqxx's prepared functionality

Case 1 and 2 are similar. No. 3 is the one witch takes about 60 % more
time.
I've made tests with 10,000, 100,000 and - for case 1 - with 1,000,000
points.
Until now I've left the database configuration untouched.

I'll post the complete source. Thanks a lot!
Andi


#include <iostream>
#include <time.h>
#include <assert.h>
#include <math.h>

#include <pqxx/connection>
#include <pqxx/transaction>
#include <pqxx/nontransaction>
#include <pqxx/tablewriter>
#include <pqxx/tablereader>

using namespace PGSTD;
using namespace pqxx;

// Table names
const string POINTS_TABLE = "points";

// Range and sizes
const int POINT_COUNT = 100000;
const int X_MAX = 10000;
const int Y_MAX = 10000;

// Forwards
void DropOldTable(connection &c, const string &tableName);
void CreateNewTable(connection &c, const string &tableName, const string
&geomType);
void CreateIndex(connection &c, const string &tableName);
clock_t PrintElapsedTime(clock_t start);

// Insertion
void InsertPointsFromTxt(connection &c)
{
	cout << "Inserting " << to_string(POINT_COUNT) 
		<< " points via GeometryFromText()..." << endl;
	clock_t start = clock();

	// Insert POINT_COUNT points
	work *t;
	srand(42); // instead of "(int) time(NULL)" for
deterministically results
	double x, y;
	string execStr = "";
	for (int i = 0; i < POINT_COUNT;)
	{
		// If string is to small, double the capacity 
		if (execStr.size() == execStr.capacity())
			execStr.reserve(4 + execStr.capacity() * 2);

		// RAND_MAX is 32767 only. For a huge number of points
use rand() twice 
		x = ((double) rand() + ((double) rand() / RAND_MAX)) 
			/ (RAND_MAX + 1) * X_MAX;
		y = ((double) rand() + ((double) rand() / RAND_MAX)) 
			/ (RAND_MAX + 1) * Y_MAX;

		// Add new Point
		execStr += "INSERT INTO " + POINTS_TABLE + " VALUES ( "
+
			to_string(i) +		// Primary Key
			", GeometryFromText('POINT (" +
			to_string(x) + " " + to_string(y) +	//
coordinates
			")', -1) ); ";

		// Exectute and commit every 10000 objects
		if ((++i) % 10000 == 0)
		{
			t = new work(c, "insert_points");
			t->exec(execStr);
			t->commit();
			delete t;
			execStr = "";
			cerr << ".";
		}
	}

	// Execute the rest
	if (execStr.length() > 0)
	{
		t = new work(c, "insert_points");
		t->exec(execStr);
		t->commit();
		delete t;
	}

	cerr << "#"<<endl;
	PrintElapsedTime(start);
}

void InsertPointsMakePtPrep(connection &c)
{
	cout << "Inserting " << to_string(POINT_COUNT) 
		<< " points via GeometryMakePt() and PREPARE ..." <<
endl;
	clock_t start = clock();

	// Prepare plan
	work *t = new work(c, "insert_points");
	string execStr = "PREPARE insert_pt(int, float, float) AS INSERT
INTO " +
		POINTS_TABLE + " VALUES ($1, MakePoint($2, $3))";
	t->exec(execStr);
	t->commit();
	delete t;

	// Insert POINT_COUNT points
	srand(42); // instead of "(int) time(NULL)" for
deterministically results
	double x, y;
	execStr = "";
	for (int i = 0; i < POINT_COUNT;)
	{
		// If string is to small, double the capacity 
		if (execStr.size() == execStr.capacity())
			execStr.reserve(4 + execStr.capacity() * 2);

		// RAND_MAX is 32767 only. For a huge number of points
use rand() twice 
		x = ((double) rand() + ((double) rand() / RAND_MAX)) 
			/ (RAND_MAX + 1) * X_MAX;
		y = ((double) rand() + ((double) rand() / RAND_MAX)) 
			/ (RAND_MAX + 1) * Y_MAX;

		// Add new Point
		execStr += "EXECUTE insert_pt(" + to_string(i) + "," 
			+ to_string(x) + "," + to_string(y) + ");";

		// Exectute and commit every 10000 objects
		if ((++i) % 10000 == 0)
		{
			t = new work(c, "insert_points");
			t->exec(execStr);
			t->commit();
			delete t;
			execStr = "";
			cerr << ".";
		}
	}

	// Execute the rest, unprepare
	t = new work(c, "insert_points");
	t->exec(execStr + "DEALLOCATE insert_pt;");
	t->commit();
	delete t;

	cerr << "#"<<endl;
	PrintElapsedTime(start);
}

void InsertPointsMakePt(connection &c)
{
	cout << "Inserting " << to_string(POINT_COUNT) 
		<< " points via MakePt() and libpqxx's prepared..." <<
endl;
	clock_t start = clock();

	// Prepare Statement
	string prepNameWritePt = "InsertPoints";
	string prepStr = "INSERT INTO " + POINTS_TABLE + 
		" VALUES ($1, MakePoint($2, $3))";
	c.prepare(prepNameWritePt, prepStr)
		("integer", pqxx::prepare::treat_direct)
		("float", pqxx::prepare::treat_direct)
		("float", pqxx::prepare::treat_direct);

	// New transaction
	work *t;
	t = new work(c, "insert_points");

	// Insert POINT_COUNT points
	srand(42); // instead of "(int) time(NULL)" for
deterministically results
	double x, y;
	for (int i = 0; i < POINT_COUNT;)
	{
		// RAND_MAX is 32767 only. For a huge number of points
use rand() twice 
		x = ((double) rand() + ((double) rand() / RAND_MAX)) 
			/ (RAND_MAX + 1) * X_MAX;
		y = ((double) rand() + ((double) rand() / RAND_MAX)) 
			/ (RAND_MAX + 1) * Y_MAX;
		t->prepared(prepNameWritePt)(i)(x)(y).exec();

		// every 10000 points start a new transaction
		if (++i % 10000 == 0)
		{
			t->commit();
			delete t;
			t = new work(c, "insert_points");
			cerr << ".";
		}
	}

	// Commit, unprepare
	t->commit();
	delete t;
	c.unprepare(prepNameWritePt);

	cerr << "#"<<endl;
	PrintElapsedTime(start);
}


// Main
int main(int argc, char *argv[])
{
	try
	{
		// Set up a connection to the backend
		connection c("host=geoserver port=5432 dbname=postgis\
			user=postgres password=***");

		// Creating Points using GeometryFromText()
		// Drop old table, create new one, insert objects
		cout << "*** Creating points using GeometryFromText ***"
<< endl;
		DropOldTable(c, POINTS_TABLE);
		CreateNewTable(c, POINTS_TABLE, "POINT");
		CreateIndex(c, POINTS_TABLE);
		InsertPointsFromTxt(c);

		// Creating Points using InsertPointsMakePtPrep()
		cout << "*** Creating points using
InsertPointsMakePtPrep ***" << endl;
		DropOldTable(c, POINTS_TABLE);
		CreateNewTable(c, POINTS_TABLE, "POINT");
		CreateIndex(c, POINTS_TABLE);
		InsertPointsMakePtPrep(c);

		// Create points using MakePt()
		cout << "*** Creating points using MakePt() ***" <<
endl;
		DropOldTable(c, POINTS_TABLE);
		CreateNewTable(c, POINTS_TABLE, "POINT");
		CreateIndex(c, POINTS_TABLE);
		InsertPointsMakePt(c);

		cout << "*** End ***" << endl;
	}
	catch (const sql_error &e)
	{
		// If we're interested in the text of a failed query, we
can write 
		// separate exception handling code for this type of
exception
		cerr << "SQL error: " << e.what() << endl
				<< "Query was: '" << e.query() << "'" <<
endl;
		return 1;
	}
	catch (const exception &e)
	{
		// All exceptions thrown by libpqxx are derived from
std::exception
		cerr << "Exception: " << e.what() << endl;
		return 2;
	}
	catch (...)
	{
		// This is really unexpected (see above)
		cerr << "Unhandled exception" << endl;
		return 100;
	}

	return 0;
}

// Helpers
void DropOldTable(connection &c, const string &tableName)
{
	cout << "Dropping old table '" + tableName + "' ..." << endl;
	clock_t start = clock();

	try
	{
		// First create a separate transaction to drop old
table, if any.  
		// This mayfail if the table didn't previously exist.
		work dropTrans(c, "drop_" + tableName);

		// Using PostGIS delete statement instead of 'DROP
TABLE'. 
		// This will delete any geometry column before dropping
table.
		dropTrans.exec("SELECT DropGeometryTable('" + tableName
+ "')");
		dropTrans.commit();
	}
	catch (const undefined_table &e)
	{
		cout << "(Expected) Couldn't drop table: " << e.what()
<< endl
		<< "Query was: " << e.query() << endl;
	}
	catch (const sql_error &e)
	{
		cerr << "Couldn't drop table: " << e.what() << endl
		<< "Query was: " << e.query() << endl;
	}

	PrintElapsedTime(start);
}

void CreateNewTable(connection &c, const string &tableName, 
					const string &geomType)
{
	cout << "Creating new table '" << tableName << "' ..." << endl;
	clock_t start = clock();

	// New transaction
	work t(c, "create_" + tableName);

	// Create table
    t.exec("CREATE TABLE " + tableName + "(id integer PRIMARY KEY)");

	// Add geometry column
	t.exec("SELECT AddGeometryColumn('" + tableName + 
		"', 'geom', -1, '" + geomType + "', 2)");

	// Commit
	t.commit();

	PrintElapsedTime(start);
}

void CreateIndex(connection &c, const string &tableName)
{
	cout << "Creating index for '" << tableName << "' ..." << endl;
	clock_t start = clock();

	// Don't use a transaction, because CREATE INDEX could run into
a timeout.
	// VACUUM cann't run in a transaction block, either.
	// Ergo use nontransaction.
	nontransaction t(c, "index_" + tableName);

	t.exec("CREATE INDEX " + tableName + "_index ON " + tableName + 
		" USING GIST ( geom GIST_GEOMETRY_OPS )");
	t.exec("VACUUM ANALYZE " + tableName + " (geom)");

	// Commit
	t.commit();

	PrintElapsedTime(start);
}

clock_t PrintElapsedTime(clock_t start)
{
	clock_t end = clock();
	double durationClock = (double)(end - start) / CLOCKS_PER_SEC;
	cout << "... elapsed time: " << durationClock << " s." << endl;
	return end;
}



More information about the postgis-users mailing list