[postgis-users] grouping points by time offset

Kurt Schwehr schwehr at ccom.unh.edu
Sun Jul 1 13:15:35 PDT 2007


>
> 6.  "Indexes", not "keys", are what (sometimes) speed up queries.

Thanks!

> HTH and let us all know how you finally do fix it.

Okay... So indexes make a huge difference.  My G4 laptop has been  
chugging on my python solution for about 5 hours now and is about 1/3  
through the ships.  In the meantime, I tried creating an index on  
userid on a quad core G5 box with a copy of the same 4+ million  
record database and it finished somewhere less than a couple  
minutes... I wasn't ready for it to finish :)

Would be good to figure out how do this within the server... that  
would let me make a trigger to updates as I receive new data to look  
for new transits/update the last transit for a ship.

Now I need to work out the MakeLine()...

-kurt

psql ais_2006
ais_2006=# CREATE INDEX userid_idx ON position (userid);
took about 30 seconds...

ais_2006=# \d position
...
Indexes:
     "position_pkey" PRIMARY KEY, btree ("key")
     "userid_idx" btree (userid)
...

date; (nohup ./pg_create_transit_table.py -C -d ais_2006 &)
Sun Jul  1 15:55:56 EDT 2007

echo "select count(distinct(id)) from transit;" | psql ais_2006
count
-------
   3448
(1 row)

I was expecting over 5K transits so I may have a bug somewhere, but  
here is what I have done in python...

#!/usr/bin/env python

__version__ = '$Revision: 4791 $'.split()[1]

import os,sys

if __name__=='__main__':
	from optparse import OptionParser
	parser = OptionParser(usage="%prog [options] ",version="%prog  
"+__version__)
	parser.add_option('-d','--database- 
name',dest='databaseName',default='ais',
			  help='Name of database within the postgres server [default: % 
default]')
	parser.add_option('-D','--database- 
host',dest='databaseHost',default='localhost',
			  help='Host name of the computer serving the dbx [default: % 
default]')
         defaultUser = os.getlogin()
	parser.add_option('-u','--database- 
user',dest='databaseUser',default=defaultUser,
			  help='Host name of the to access the database with [default: % 
default]')
	parser.add_option('-C','--with- 
create',dest='createTables',default=False, action='store_true',
			  help='Do not create the tables in the database')

	parser.add_option('-t','--delta-time',dest='deltaT'
                           ,default=60*60
                           ,type='int'
			  ,help='Time gap in seconds that determines when a new transit  
starts [default: %default]')

	(options,args) = parser.parse_args()

         import psycopg2 as psycopg
         deltaT = options.deltaT
         connectStr = "dbname='"+options.databaseName+"'  
user='"+options.databaseUser+"' host='"+options.databaseHost+"'"
         cx = psycopg.connect(connectStr)
         cu = cx.cursor()

         if options.createTables:
             cu.execute('''
CREATE TABLE transit
(
   id serial NOT NULL,
   userid integer NOT NULL,
   startpos integer NOT NULL,
   endpos integer NOT NULL,
   CONSTRAINT transit_pkey PRIMARY KEY (id)
);
''')
             cx.commit()


	# BEGIN algorithm to find transits...

         cu.execute('SELECT DISTINCT(userid) FROM position;');
         ships= [ship[0] for ship in cu.fetchall()]
         print ships
         for ship in ships:
             print 'Processing ship: ',ship
             cu.execute('SELECT key,cg_sec FROM position WHERE userid= 
%s ORDER BY cg_sec',(ship,))
             startKey,startTime=cu.fetchone()
             print startKey,startTime
             lastKey,lastTime=startKey,startTime
             needFinal=True
             # Now go through the rest of the ship position records
             for row in cu.fetchall():
                 needFinal=True
                 key,time = row
                 if time>lastTime+deltaT:
                     print  'FOUND',startKey,startTime,'- 
 >',lastKey,lastTime
                     cu.execute('INSERT INTO transit  
(userid,startPos,endPos) VALUES (%s,%s,%s);',(ship,startKey,lastKey))
                     startKey,startTime=key,time
                     needFinal=False
                 lastKey,lastTime=key,time # Save for the next loop
                 #sys.exit()

             if needFinal:
                 print 'Final transit...'
                 print 'FOUND',startKey,startTime,'->',lastKey,lastTime
                 cu.execute('INSERT INTO transit  
(userid,startPos,endPos) VALUES (%s,%s,%s);',(ship,startKey,lastKey))
             cx.commit()




More information about the postgis-users mailing list