[geos-devel] Performance enhance in Parsing WKT data

strk at refractions.net strk at refractions.net
Thu Apr 14 07:47:30 EDT 2005


Thank a lot Cheng!
I've applied your patch with a minor modification
(removed the default constructor of StringTokenizer
so it fully works with a reference and makes no
copies at all.

If you have tests handy, please run them.
Thanks
--strk;

On Thu, Apr 14, 2005 at 06:23:17PM +0800, Cheng Shan wrote:
> Hello,
>     These days I have been using GEOS to parse WKT
> format data. And I found that the performance decreace
> dramatically when loading big geometry. For my result,
> it takes 24~30 second to load 337 geometry. But it
> needs only about 2~3 second when using JTS. I suspect
> there are somethings wrong.
>     After about two days study, I found that the class
> "StringTokenizer" allocate and deallocate memory
> frequently.
> So I made some modification on the class. Now it takes
> 3~4 second to load the same data. And the tests passed
> without any problem.
>     Also I modify the type of the function
> WKTReader::read's parameter "wellKnownText" from
> "string" to "const string&", this can avoid the copy
> of the argument.
>     Maybe the work is useful. And I would like to
> share it with all people who use the project GEOS. The
> following is the difference list:
> ? build
> Index: source/headers/geos/io.h
> ===================================================================
> RCS file:
> /home/cvs/postgis/geos/source/headers/geos/io.h,v
> retrieving revision 1.5
> diff -u -r1.5 io.h
> --- source/headers/geos/io.h 19 Jul 2004 10:33:12
> -0000 1.5
> +++ source/headers/geos/io.h 14 Apr 2005 09:16:40
> -0000
> @@ -92,16 +92,17 @@
>    TT_WORD
>   };
>   StringTokenizer();
> - StringTokenizer(string txt);
> + StringTokenizer(const string& txt);
>   ~StringTokenizer();
>   int nextToken();
>   int peekNextToken();
>   double getNVal();
>   string getSVal();
>  private:
> - string str;
> + const string str;
>   string stok;
>   double ntok;
> + string::const_iterator iter;
>  };
>  
>  /**
> @@ -125,7 +126,7 @@
>   ~WKTReader();
>  
>   /// Parse a WKT string returning a Geometry
> - Geometry* read(string wellKnownText);
> + Geometry* read(const string& wellKnownText);
>  
>  // Geometry* read(Reader reader); //Not implemented
> yet
>  
> Index: source/io/StringTokenizer.cpp
> ===================================================================
> RCS file:
> /home/cvs/postgis/geos/source/io/StringTokenizer.cpp,v
> retrieving revision 1.11
> diff -u -r1.11 StringTokenizer.cpp
> --- source/io/StringTokenizer.cpp 2 Jul 2004 13:28:27
> -0000 1.11
> +++ source/io/StringTokenizer.cpp 14 Apr 2005 09:16:42
> -0000
> @@ -39,59 +39,57 @@
>  
>  namespace geos {
>  
> -StringTokenizer::StringTokenizer(){
> - str="";
> +StringTokenizer::StringTokenizer()
> + :str("") {
>   stok="";
>   ntok=0.0;
> + iter=str.begin();
>  }
> -StringTokenizer::StringTokenizer(string txt) {
> - str=txt;
> +StringTokenizer::StringTokenizer(const string& txt)
> + : str(txt) {
>   stok="";
>   ntok=0.0;
> + iter=str.begin();
>  }
>  StringTokenizer::~StringTokenizer(){}
>  
> -int StringTokenizer::nextToken(){
> +int StringTokenizer::nextToken() {
>   string tok="";
> - if (str.size()==0)
> + if (iter==str.end())
>    return StringTokenizer::TT_EOF;
> - switch(str[0]) {
> -  case '(':
> -   str=str.substr(1);
> -   return '(';
> -  case ')':
> -   str=str.substr(1);
> -   return ')';
> -  case ',':
> -   str=str.substr(1);
> -   return ',';
> -  case '\n':
> -  case '\r':
> -  case '\t':
> -  case ' ':
> -   string::size_type pos=str.find_first_not_of("
> \n\r\t");
> -   if (pos==string::npos) {
> -    return StringTokenizer::TT_EOF;
> -   } else {
> -    str=str.substr(pos);
> -    return nextToken();
> -   }
> -  }
> - string::size_type pos=str.find_first_of("\n\r\t()
> ,");
> + switch (*iter) {
> + case '(':
> + case ')':
> + case ',':
> +  return *iter++;
> + case '\n':
> + case '\r':
> + case '\t':
> + case ' ':
> +  string::size_type pos=str.find_first_not_of("
> \n\r\t",iter-str.begin());
> +  if (pos==string::npos) {
> +   return StringTokenizer::TT_EOF;
> +  } else {
> +   iter=str.begin()+pos;
> +   return nextToken();
> +  } 
> + }
> + 
> + string::size_type pos=str.find_first_of("\n\r\t()
> ,", iter-str.begin());
>   if (pos==string::npos) {
> -  if (str.size()>0) {
> -   tok=str.substr(0);
> -   str="";
> +  if (iter!=str.end()) {
> +   tok.assign(iter,str.end());
> +   iter=str.end();
>    } else {
>     return StringTokenizer::TT_EOF;
>    }
>   } else {
> -  tok=str.substr(0,pos);
> -  str=str.substr(pos);
> +  tok.assign(iter,str.begin()+pos);
> +  iter=str.begin()+pos;
>   }
> - char *stopstring;
> + char* stopstring;
>   double dbl=strtod(tok.c_str(),&stopstring);
> - if (strcmp(stopstring,"")==0) {
> + if (*stopstring=='\0') {
>    ntok=dbl;
>    stok="";
>    return StringTokenizer::TT_NUMBER;
> @@ -102,35 +100,35 @@
>   }
>  }
>  
> -int StringTokenizer::peekNextToken(){
> +int StringTokenizer::peekNextToken() {
>   string::size_type pos;
>   string tok="";
> - if (str.size()==0)
> + if (iter==str.end())
> +  return StringTokenizer::TT_EOF;
> + 
> + pos=str.find_first_not_of("
> \r\n\t",iter-str.begin());
> + if (pos==string::npos) 
>    return StringTokenizer::TT_EOF;
> -
> - pos=str.find_first_not_of(" \r\n\t");
> - if (pos==string::npos) return
> StringTokenizer::TT_EOF;
>   switch(str[pos]) {
>    case '(':
> -   return '(';
>    case ')':
> -   return ')';
>    case ',':
> -   return ',';
> +   return str[pos];
>   }
> - pos=str.find_first_of("\n\r\t() ,");
> +
> + pos=str.find_first_of("\n\r\t()
> ,",iter-str.begin());
>   if (pos==string::npos) {
> -  if (str.size()>0) {
> -   tok=str.substr(0);
> +  if (iter!=str.end()) {
> +   tok.assign(iter,str.end());
>    } else {
>     return StringTokenizer::TT_EOF;
>    }
>   } else {
> -  tok=str.substr(0,pos);
> +  tok.assign(iter,str.end());
>   }
>   char *stopstring;
>   double dbl=strtod(tok.c_str(),&stopstring);
> - if (strcmp(stopstring,"")==0) {
> + if (*stopstring=='\0') {
>    ntok=dbl;
>    stok="";
>    return StringTokenizer::TT_NUMBER;
> Index: source/io/WKTReader.cpp
> ===================================================================
> RCS file:
> /home/cvs/postgis/geos/source/io/WKTReader.cpp,v
> retrieving revision 1.30
> diff -u -r1.30 WKTReader.cpp
> --- source/io/WKTReader.cpp 8 Dec 2004 13:54:43 -0000
> 1.30
> +++ source/io/WKTReader.cpp 14 Apr 2005 09:16:42 -0000
> @@ -33,7 +33,7 @@
>   //delete geometryFactory;
>  }
>  
> -Geometry* WKTReader::read(string wellKnownText){
> +Geometry* WKTReader::read(const string&
> wellKnownText){
>   auto_ptr<StringTokenizer> tokenizer(new
> StringTokenizer(wellKnownText));
>   StringTokenizer *st=tokenizer.release();
>   Geometry *g=NULL;
> 
> 
> _________________________________________________________
> Do You Yahoo!?
> 150ÍòÇúMP3·è¿ñËÑ£¬´øÄú´³ÈëÒôÀÖµîÌÃ
> http://cn.rd.yahoo.com/mail_cn/tag/yisou/music/*http://music.yisou.com/
> ÃÀÅ®Ã÷ÐÇÓ¦Óо¡ÓУ¬ËѱéÃÀͼ¡¢ÑÞͼºÍ¿áͼ
> http://cn.rd.yahoo.com/mail_cn/tag/yisou/image/*http://image.yisou.com
> 1G¾ÍÊÇ1000Õ×£¬ÑÅ»¢µçÓÊ×ÔÖúÀ©ÈÝ£¡
> http://cn.rd.yahoo.com/mail_cn/tag/1g/*http://cn.mail.yahoo.com/event/mail_1g/
> _______________________________________________
> geos-devel mailing list
> geos-devel at geos.refractions.net
> http://geos.refractions.net/mailman/listinfo/geos-devel



More information about the geos-devel mailing list