[GRASS-dev] pthreads in r.mapcalc slower than without

Hamish hamish_b at yahoo.com
Sat Jun 23 19:49:40 PDT 2012


Hi,

I've just been running some benchmarks for r.mapcalc
to try and find the best method for parallelizing
a script / best way to minimize overheads.

I'd like to understand where it is useful to combine
expressions into a single r.mapcalc process, and
where it isn't, and what sort of mapcalc expressions
can best take advantage of pthreads support, and
which are not good matches for it.
also, as with some OpenMP experiments, if it makes
sense to parallelize by row (given a target column
length of 1000-3000 cells), or by some other way
(e.g. for a 1000 row tall raster spawn 4 x 250row
each threads)
?

summary of results: r.mapcalc built without pthread
support was the fastest for my test case. When built
with pthread support, using WORKERS=1 was the fastest option (default is 8)*. executing r.mapcalc
as three different processes was the fastest of all.
1-worker grass7 x3 processes was faster than
grass6.5svn.

[*] note that even with r.mapcalc built without
pthreads (make clean r.mapcalc dir + edited r.mapcalc
Makefile) it still uses more than one CPU core.
maybe because of lib/gis/counter.c(?)


test case: r.blend's 3-in-1 r.mapcalc expression,
with a region of rows: 2355, cols: 3045

test machine: 6-core CPU (full cores, not hyperthreaded ones)

timing results: follow.


thanks for any ideas,
Hamish

----
(hoping linewrap doesn't mess it up)

# grass7, spearfish
FIRST=elevation.10m
SECOND=aspect
PERCENT=35
OUTPUT=tmp.mpclc
g.region n=4323650 s=4318940 w=289020 e=295110 res=2

for w in `seq 14` ; do
   echo -n "Workers: $w"
   time WORKERS=$w  r.mapcalc "$OUTPUT.r = r#$FIRST * $PERCENT/100.0 + (1.0 - $PERCENT/100.0) * r#$SECOND ; \
        $OUTPUT.g = g#$FIRST * $PERCENT/100.0 + (1.0 - $PERCENT/100.0) * g#$SECOND ; \
        $OUTPUT.b = b#$FIRST * $PERCENT/100.0 + (1.0 - $PERCENT/100.0) * b#$SECOND" \
	  --overwrite --quiet
   echo
done

# three replicates shown columnwise
Workers: 1
real	0m5.711s	real	0m5.716s	real	0m5.672s
user	0m7.544s	user	0m7.552s	user	0m7.596s
sys	0m0.332s	sys	0m0.348s	sys	0m0.260s

Workers: 2
real	0m5.987s	real	0m6.142s	real	0m5.976s
user	0m7.028s	user	0m7.224s	user	0m7.028s
sys	0m0.584s	sys	0m0.596s	sys	0m0.560s

Workers: 3
real	0m6.300s	real	0m6.180s	real	0m6.184s
user	0m7.512s	user	0m7.408s	user	0m7.488s
sys	0m0.620s	sys	0m0.612s	sys	0m0.604s

Workers: 4
real	0m6.693s	real	0m6.605s	real	0m6.642s
user	0m7.780s	user	0m7.632s	user	0m7.612s
sys	0m0.856s	sys	0m0.840s	sys	0m0.888s

Workers: 5
real	0m6.279s	real	0m6.203s	real	0m6.246s
user	0m7.496s	user	0m7.528s	user	0m7.484s
sys	0m0.996s	sys	0m0.920s	sys	0m0.972s

Workers: 6
real	0m6.231s	real	0m6.378s	real	0m6.180s
user	0m7.588s	user	0m7.712s	user	0m7.656s
sys	0m1.028s	sys	0m0.980s	sys	0m0.924s

Workers: 7
real	0m6.080s	real	0m6.114s	real	0m6.148s
user	0m7.616s	user	0m7.588s	user	0m7.704s
sys	0m1.040s	sys	0m1.096s	sys	0m1.016s

Workers: 8
real	0m6.181s	real	0m6.294s	real	0m6.251s
user	0m7.700s	user	0m7.820s	user	0m7.756s
sys	0m1.100s	sys	0m1.096s	sys	0m1.140s

Workers: 9
real	0m6.307s	real	0m6.263s	real	0m6.269s
user	0m7.772s	user	0m7.784s	user	0m7.732s
sys	0m1.196s	sys	0m1.160s	sys	0m1.172s

Workers: 10
real	0m6.333s	real	0m6.333s	real	0m6.278s
user	0m7.720s	user	0m7.696s	user	0m7.752s
sys	0m1.272s	sys	0m1.304s	sys	0m1.204s

Workers: 11
real	0m6.315s	real	0m6.322s	real	0m6.292s
user	0m7.820s	user	0m7.760s	user	0m7.820s
sys	0m1.192s	sys	0m1.232s	sys	0m1.160s

Workers: 12
real	0m6.275s	real	0m6.307s	real	0m6.322s
user	0m7.724s	user	0m7.732s	user	0m7.824s
sys	0m1.240s	sys	0m1.256s	sys	0m1.192s

Workers: 13
real	0m6.324s	real	0m6.316s	real	0m6.301s
user	0m7.816s	user	0m7.852s	user	0m7.784s
sys	0m1.204s	sys	0m1.176s	sys	0m1.192s

Workers: 14
real	0m6.333s	real	0m6.316s	real	0m6.291s
user	0m7.668s	user	0m7.796s	user	0m7.840s
sys	0m1.336s	sys	0m1.224s	sys	0m1.156s


# split combined mapcalc expression into three different processes
export WORKERS=1
export GRASS_OVERWRITE=1
export GRASS_VERBOSE=0
time (
 r.mapcalc "$OUTPUT.r = r#$FIRST * $PERCENT/100.0 + (1.0 - $PERCENT/100.0) * r#$SECOND"
 r.mapcalc "$OUTPUT.g = g#$FIRST * $PERCENT/100.0 + (1.0 - $PERCENT/100.0) * g#$SECOND"
 r.mapcalc "$OUTPUT.b = b#$FIRST * $PERCENT/100.0 + (1.0 - $PERCENT/100.0) * b#$SECOND" 
)
real	0m5.650s	real	0m5.700s	real	0m5.727s
user	0m7.428s	user	0m7.596s	user	0m7.468s
sys	0m0.392s	sys	0m0.312s	sys	0m0.432s



# shell backgrounding as parallelization method:
export WORKERS=1
time (
 r.mapcalc "$OUTPUT.r = r#$FIRST * $PERCENT/100.0 + (1.0 - $PERCENT/100.0) * r#$SECOND" &
 r.mapcalc "$OUTPUT.g = g#$FIRST * $PERCENT/100.0 + (1.0 - $PERCENT/100.0) * g#$SECOND" &
 r.mapcalc "$OUTPUT.b = b#$FIRST * $PERCENT/100.0 + (1.0 - $PERCENT/100.0) * b#$SECOND" &
 wait
)
real	0m2.110s	real	0m2.115s	real	0m2.088s
user	0m8.121s	user	0m8.241s	user	0m8.229s
sys	0m0.544s	sys	0m0.476s	sys	0m0.408s



#same, but with r.mapcalc recompiled without pthreads libs
export WORKERS=1
real	0m2.100s	real	0m2.086s	real	0m2.095s
user	0m8.049s	user	0m8.037s	user	0m8.093s
sys	0m0.528s	sys	0m0.472s	sys	0m0.468s



#same, but with r.mapcalc recompiled without pthreads libs
export WORKERS=8
time (
 r.mapcalc "$OUTPUT.r = r#$FIRST * $PERCENT/100.0 + (1.0 - $PERCENT/100.0) * r#$SECOND" &
 r.mapcalc "$OUTPUT.g = g#$FIRST * $PERCENT/100.0 + (1.0 - $PERCENT/100.0) * g#$SECOND" &
 r.mapcalc "$OUTPUT.b = b#$FIRST * $PERCENT/100.0 + (1.0 - $PERCENT/100.0) * b#$SECOND" &
 wait
)
real	0m2.455s	real	0m2.466s	real	0m2.457s
user	0m8.229s	user	0m8.125s	user	0m8.225s
sys	0m0.940s	sys	0m1.072s	sys	0m0.956s


#same, for grass 6.5svn:
real	0m3.805s	real	0m3.651s	real	0m3.689s
user	0m10.481s	user	0m10.533s	user	0m10.557s
sys	0m0.484s	sys	0m0.372s	sys	0m0.400s



More information about the grass-dev mailing list