[geos-devel] Performance enhance in Parsing WKT data

Cheng Shan sd_shancheng at yahoo.com.cn
Thu Apr 14 06:23:17 EDT 2005


Hello,
    These days I have been using GEOS to parse WKT
format data. And I found that the performance decreace
dramatically when loading big geometry. For my result,
it takes 24~30 second to load 337 geometry. But it
needs only about 2~3 second when using JTS. I suspect
there are somethings wrong.
    After about two days study, I found that the class
"StringTokenizer" allocate and deallocate memory
frequently.
So I made some modification on the class. Now it takes
3~4 second to load the same data. And the tests passed
without any problem.
    Also I modify the type of the function
WKTReader::read's parameter "wellKnownText" from
"string" to "const string&", this can avoid the copy
of the argument.
    Maybe the work is useful. And I would like to
share it with all people who use the project GEOS. The
following is the difference list:
? build
Index: source/headers/geos/io.h
===================================================================
RCS file:
/home/cvs/postgis/geos/source/headers/geos/io.h,v
retrieving revision 1.5
diff -u -r1.5 io.h
--- source/headers/geos/io.h 19 Jul 2004 10:33:12
-0000 1.5
+++ source/headers/geos/io.h 14 Apr 2005 09:16:40
-0000
@@ -92,16 +92,17 @@
   TT_WORD
  };
  StringTokenizer();
- StringTokenizer(string txt);
+ StringTokenizer(const string& txt);
  ~StringTokenizer();
  int nextToken();
  int peekNextToken();
  double getNVal();
  string getSVal();
 private:
- string str;
+ const string str;
  string stok;
  double ntok;
+ string::const_iterator iter;
 };
 
 /**
@@ -125,7 +126,7 @@
  ~WKTReader();
 
  /// Parse a WKT string returning a Geometry
- Geometry* read(string wellKnownText);
+ Geometry* read(const string& wellKnownText);
 
 // Geometry* read(Reader reader); //Not implemented
yet
 
Index: source/io/StringTokenizer.cpp
===================================================================
RCS file:
/home/cvs/postgis/geos/source/io/StringTokenizer.cpp,v
retrieving revision 1.11
diff -u -r1.11 StringTokenizer.cpp
--- source/io/StringTokenizer.cpp 2 Jul 2004 13:28:27
-0000 1.11
+++ source/io/StringTokenizer.cpp 14 Apr 2005 09:16:42
-0000
@@ -39,59 +39,57 @@
 
 namespace geos {
 
-StringTokenizer::StringTokenizer(){
- str="";
+StringTokenizer::StringTokenizer()
+ :str("") {
  stok="";
  ntok=0.0;
+ iter=str.begin();
 }
-StringTokenizer::StringTokenizer(string txt) {
- str=txt;
+StringTokenizer::StringTokenizer(const string& txt)
+ : str(txt) {
  stok="";
  ntok=0.0;
+ iter=str.begin();
 }
 StringTokenizer::~StringTokenizer(){}
 
-int StringTokenizer::nextToken(){
+int StringTokenizer::nextToken() {
  string tok="";
- if (str.size()==0)
+ if (iter==str.end())
   return StringTokenizer::TT_EOF;
- switch(str[0]) {
-  case '(':
-   str=str.substr(1);
-   return '(';
-  case ')':
-   str=str.substr(1);
-   return ')';
-  case ',':
-   str=str.substr(1);
-   return ',';
-  case '\n':
-  case '\r':
-  case '\t':
-  case ' ':
-   string::size_type pos=str.find_first_not_of("
\n\r\t");
-   if (pos==string::npos) {
-    return StringTokenizer::TT_EOF;
-   } else {
-    str=str.substr(pos);
-    return nextToken();
-   }
-  }
- string::size_type pos=str.find_first_of("\n\r\t()
,");
+ switch (*iter) {
+ case '(':
+ case ')':
+ case ',':
+  return *iter++;
+ case '\n':
+ case '\r':
+ case '\t':
+ case ' ':
+  string::size_type pos=str.find_first_not_of("
\n\r\t",iter-str.begin());
+  if (pos==string::npos) {
+   return StringTokenizer::TT_EOF;
+  } else {
+   iter=str.begin()+pos;
+   return nextToken();
+  } 
+ }
+ 
+ string::size_type pos=str.find_first_of("\n\r\t()
,", iter-str.begin());
  if (pos==string::npos) {
-  if (str.size()>0) {
-   tok=str.substr(0);
-   str="";
+  if (iter!=str.end()) {
+   tok.assign(iter,str.end());
+   iter=str.end();
   } else {
    return StringTokenizer::TT_EOF;
   }
  } else {
-  tok=str.substr(0,pos);
-  str=str.substr(pos);
+  tok.assign(iter,str.begin()+pos);
+  iter=str.begin()+pos;
  }
- char *stopstring;
+ char* stopstring;
  double dbl=strtod(tok.c_str(),&stopstring);
- if (strcmp(stopstring,"")==0) {
+ if (*stopstring=='\0') {
   ntok=dbl;
   stok="";
   return StringTokenizer::TT_NUMBER;
@@ -102,35 +100,35 @@
  }
 }
 
-int StringTokenizer::peekNextToken(){
+int StringTokenizer::peekNextToken() {
  string::size_type pos;
  string tok="";
- if (str.size()==0)
+ if (iter==str.end())
+  return StringTokenizer::TT_EOF;
+ 
+ pos=str.find_first_not_of("
\r\n\t",iter-str.begin());
+ if (pos==string::npos) 
   return StringTokenizer::TT_EOF;
-
- pos=str.find_first_not_of(" \r\n\t");
- if (pos==string::npos) return
StringTokenizer::TT_EOF;
  switch(str[pos]) {
   case '(':
-   return '(';
   case ')':
-   return ')';
   case ',':
-   return ',';
+   return str[pos];
  }
- pos=str.find_first_of("\n\r\t() ,");
+
+ pos=str.find_first_of("\n\r\t()
,",iter-str.begin());
  if (pos==string::npos) {
-  if (str.size()>0) {
-   tok=str.substr(0);
+  if (iter!=str.end()) {
+   tok.assign(iter,str.end());
   } else {
    return StringTokenizer::TT_EOF;
   }
  } else {
-  tok=str.substr(0,pos);
+  tok.assign(iter,str.end());
  }
  char *stopstring;
  double dbl=strtod(tok.c_str(),&stopstring);
- if (strcmp(stopstring,"")==0) {
+ if (*stopstring=='\0') {
   ntok=dbl;
   stok="";
   return StringTokenizer::TT_NUMBER;
Index: source/io/WKTReader.cpp
===================================================================
RCS file:
/home/cvs/postgis/geos/source/io/WKTReader.cpp,v
retrieving revision 1.30
diff -u -r1.30 WKTReader.cpp
--- source/io/WKTReader.cpp 8 Dec 2004 13:54:43 -0000
1.30
+++ source/io/WKTReader.cpp 14 Apr 2005 09:16:42 -0000
@@ -33,7 +33,7 @@
  //delete geometryFactory;
 }
 
-Geometry* WKTReader::read(string wellKnownText){
+Geometry* WKTReader::read(const string&
wellKnownText){
  auto_ptr<StringTokenizer> tokenizer(new
StringTokenizer(wellKnownText));
  StringTokenizer *st=tokenizer.release();
  Geometry *g=NULL;


_________________________________________________________
Do You Yahoo!?
150ÍòÇúMP3·è¿ñËÑ£¬´øÄú´³ÈëÒôÀÖµîÌÃ
http://cn.rd.yahoo.com/mail_cn/tag/yisou/music/*http://music.yisou.com/
ÃÀÅ®Ã÷ÐÇÓ¦Óо¡ÓУ¬ËѱéÃÀͼ¡¢ÑÞͼºÍ¿áͼ
http://cn.rd.yahoo.com/mail_cn/tag/yisou/image/*http://image.yisou.com
1G¾ÍÊÇ1000Õ×£¬ÑÅ»¢µçÓÊ×ÔÖúÀ©ÈÝ£¡
http://cn.rd.yahoo.com/mail_cn/tag/1g/*http://cn.mail.yahoo.com/event/mail_1g/



More information about the geos-devel mailing list