[geos-devel] Performance enhance in Parsing WKT data
Cheng Shan
sd_shancheng at yahoo.com.cn
Thu Apr 14 06:23:17 EDT 2005
Hello,
These days I have been using GEOS to parse WKT
format data. And I found that the performance decreace
dramatically when loading big geometry. For my result,
it takes 24~30 second to load 337 geometry. But it
needs only about 2~3 second when using JTS. I suspect
there are somethings wrong.
After about two days study, I found that the class
"StringTokenizer" allocate and deallocate memory
frequently.
So I made some modification on the class. Now it takes
3~4 second to load the same data. And the tests passed
without any problem.
Also I modify the type of the function
WKTReader::read's parameter "wellKnownText" from
"string" to "const string&", this can avoid the copy
of the argument.
Maybe the work is useful. And I would like to
share it with all people who use the project GEOS. The
following is the difference list:
? build
Index: source/headers/geos/io.h
===================================================================
RCS file:
/home/cvs/postgis/geos/source/headers/geos/io.h,v
retrieving revision 1.5
diff -u -r1.5 io.h
--- source/headers/geos/io.h 19 Jul 2004 10:33:12
-0000 1.5
+++ source/headers/geos/io.h 14 Apr 2005 09:16:40
-0000
@@ -92,16 +92,17 @@
TT_WORD
};
StringTokenizer();
- StringTokenizer(string txt);
+ StringTokenizer(const string& txt);
~StringTokenizer();
int nextToken();
int peekNextToken();
double getNVal();
string getSVal();
private:
- string str;
+ const string str;
string stok;
double ntok;
+ string::const_iterator iter;
};
/**
@@ -125,7 +126,7 @@
~WKTReader();
/// Parse a WKT string returning a Geometry
- Geometry* read(string wellKnownText);
+ Geometry* read(const string& wellKnownText);
// Geometry* read(Reader reader); //Not implemented
yet
Index: source/io/StringTokenizer.cpp
===================================================================
RCS file:
/home/cvs/postgis/geos/source/io/StringTokenizer.cpp,v
retrieving revision 1.11
diff -u -r1.11 StringTokenizer.cpp
--- source/io/StringTokenizer.cpp 2 Jul 2004 13:28:27
-0000 1.11
+++ source/io/StringTokenizer.cpp 14 Apr 2005 09:16:42
-0000
@@ -39,59 +39,57 @@
namespace geos {
-StringTokenizer::StringTokenizer(){
- str="";
+StringTokenizer::StringTokenizer()
+ :str("") {
stok="";
ntok=0.0;
+ iter=str.begin();
}
-StringTokenizer::StringTokenizer(string txt) {
- str=txt;
+StringTokenizer::StringTokenizer(const string& txt)
+ : str(txt) {
stok="";
ntok=0.0;
+ iter=str.begin();
}
StringTokenizer::~StringTokenizer(){}
-int StringTokenizer::nextToken(){
+int StringTokenizer::nextToken() {
string tok="";
- if (str.size()==0)
+ if (iter==str.end())
return StringTokenizer::TT_EOF;
- switch(str[0]) {
- case '(':
- str=str.substr(1);
- return '(';
- case ')':
- str=str.substr(1);
- return ')';
- case ',':
- str=str.substr(1);
- return ',';
- case '\n':
- case '\r':
- case '\t':
- case ' ':
- string::size_type pos=str.find_first_not_of("
\n\r\t");
- if (pos==string::npos) {
- return StringTokenizer::TT_EOF;
- } else {
- str=str.substr(pos);
- return nextToken();
- }
- }
- string::size_type pos=str.find_first_of("\n\r\t()
,");
+ switch (*iter) {
+ case '(':
+ case ')':
+ case ',':
+ return *iter++;
+ case '\n':
+ case '\r':
+ case '\t':
+ case ' ':
+ string::size_type pos=str.find_first_not_of("
\n\r\t",iter-str.begin());
+ if (pos==string::npos) {
+ return StringTokenizer::TT_EOF;
+ } else {
+ iter=str.begin()+pos;
+ return nextToken();
+ }
+ }
+
+ string::size_type pos=str.find_first_of("\n\r\t()
,", iter-str.begin());
if (pos==string::npos) {
- if (str.size()>0) {
- tok=str.substr(0);
- str="";
+ if (iter!=str.end()) {
+ tok.assign(iter,str.end());
+ iter=str.end();
} else {
return StringTokenizer::TT_EOF;
}
} else {
- tok=str.substr(0,pos);
- str=str.substr(pos);
+ tok.assign(iter,str.begin()+pos);
+ iter=str.begin()+pos;
}
- char *stopstring;
+ char* stopstring;
double dbl=strtod(tok.c_str(),&stopstring);
- if (strcmp(stopstring,"")==0) {
+ if (*stopstring=='\0') {
ntok=dbl;
stok="";
return StringTokenizer::TT_NUMBER;
@@ -102,35 +100,35 @@
}
}
-int StringTokenizer::peekNextToken(){
+int StringTokenizer::peekNextToken() {
string::size_type pos;
string tok="";
- if (str.size()==0)
+ if (iter==str.end())
+ return StringTokenizer::TT_EOF;
+
+ pos=str.find_first_not_of("
\r\n\t",iter-str.begin());
+ if (pos==string::npos)
return StringTokenizer::TT_EOF;
-
- pos=str.find_first_not_of(" \r\n\t");
- if (pos==string::npos) return
StringTokenizer::TT_EOF;
switch(str[pos]) {
case '(':
- return '(';
case ')':
- return ')';
case ',':
- return ',';
+ return str[pos];
}
- pos=str.find_first_of("\n\r\t() ,");
+
+ pos=str.find_first_of("\n\r\t()
,",iter-str.begin());
if (pos==string::npos) {
- if (str.size()>0) {
- tok=str.substr(0);
+ if (iter!=str.end()) {
+ tok.assign(iter,str.end());
} else {
return StringTokenizer::TT_EOF;
}
} else {
- tok=str.substr(0,pos);
+ tok.assign(iter,str.end());
}
char *stopstring;
double dbl=strtod(tok.c_str(),&stopstring);
- if (strcmp(stopstring,"")==0) {
+ if (*stopstring=='\0') {
ntok=dbl;
stok="";
return StringTokenizer::TT_NUMBER;
Index: source/io/WKTReader.cpp
===================================================================
RCS file:
/home/cvs/postgis/geos/source/io/WKTReader.cpp,v
retrieving revision 1.30
diff -u -r1.30 WKTReader.cpp
--- source/io/WKTReader.cpp 8 Dec 2004 13:54:43 -0000
1.30
+++ source/io/WKTReader.cpp 14 Apr 2005 09:16:42 -0000
@@ -33,7 +33,7 @@
//delete geometryFactory;
}
-Geometry* WKTReader::read(string wellKnownText){
+Geometry* WKTReader::read(const string&
wellKnownText){
auto_ptr<StringTokenizer> tokenizer(new
StringTokenizer(wellKnownText));
StringTokenizer *st=tokenizer.release();
Geometry *g=NULL;
_________________________________________________________
Do You Yahoo!?
150ÍòÇúMP3·è¿ñËÑ£¬´øÄú´³ÈëÒôÀÖµîÌÃ
http://cn.rd.yahoo.com/mail_cn/tag/yisou/music/*http://music.yisou.com/
ÃÀÅ®Ã÷ÐÇÓ¦Óо¡ÓУ¬ËѱéÃÀͼ¡¢ÑÞͼºÍ¿áͼ
http://cn.rd.yahoo.com/mail_cn/tag/yisou/image/*http://image.yisou.com
1G¾ÍÊÇ1000Õ×£¬ÑÅ»¢µçÓÊ×ÔÖúÀ©ÈÝ£¡
http://cn.rd.yahoo.com/mail_cn/tag/1g/*http://cn.mail.yahoo.com/event/mail_1g/
More information about the geos-devel
mailing list