<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<meta name="Generator" content="Microsoft Word 15 (filtered medium)">
<style><!--
/* Font Definitions */
@font-face
        {font-family:Wingdings;
        panose-1:5 0 0 0 0 0 0 0 0 0;}
@font-face
        {font-family:"Cambria Math";
        panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
        {font-family:Calibri;
        panose-1:2 15 5 2 2 2 4 3 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0cm;
        margin-bottom:.0001pt;
        font-size:11.0pt;
        font-family:"Calibri",sans-serif;
        mso-fareast-language:EN-US;}
a:link, span.MsoHyperlink
        {mso-style-priority:99;
        color:#0563C1;
        text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
        {mso-style-priority:99;
        color:#954F72;
        text-decoration:underline;}
p.MsoListParagraph, li.MsoListParagraph, div.MsoListParagraph
        {mso-style-priority:34;
        margin-top:0cm;
        margin-right:0cm;
        margin-bottom:0cm;
        margin-left:36.0pt;
        margin-bottom:.0001pt;
        font-size:11.0pt;
        font-family:"Calibri",sans-serif;
        mso-fareast-language:EN-US;}
span.EmailStyle17
        {mso-style-type:personal-compose;
        font-family:"Arial",sans-serif;
        color:windowtext;}
.MsoChpDefault
        {mso-style-type:export-only;
        font-family:"Calibri",sans-serif;
        mso-fareast-language:EN-US;}
@page WordSection1
        {size:612.0pt 792.0pt;
        margin:72.0pt 72.0pt 72.0pt 72.0pt;}
div.WordSection1
        {page:WordSection1;}
/* List Definitions */
@list l0
        {mso-list-id:1188373440;
        mso-list-type:hybrid;
        mso-list-template-ids:-1794878010 134807567 134807577 134807579 134807567 134807577 134807579 134807567 134807577 134807579;}
@list l0:level1
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l0:level2
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l0:level3
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l0:level4
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l0:level5
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l0:level6
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l0:level7
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l0:level8
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l0:level9
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l1
        {mso-list-id:1519659712;
        mso-list-type:hybrid;
        mso-list-template-ids:-657832082 -1341992582 134807555 134807557 134807553 134807555 134807557 134807553 134807555 134807557;}
@list l1:level1
        {mso-level-number-format:bullet;
        mso-level-text:\F0B7;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:Symbol;
        mso-fareast-font-family:Calibri;
        mso-bidi-font-family:"Times New Roman";}
@list l1:level2
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:"Courier New";}
@list l1:level3
        {mso-level-number-format:bullet;
        mso-level-text:\F0A7;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:Wingdings;}
@list l1:level4
        {mso-level-number-format:bullet;
        mso-level-text:\F0B7;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:Symbol;}
@list l1:level5
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:"Courier New";}
@list l1:level6
        {mso-level-number-format:bullet;
        mso-level-text:\F0A7;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:Wingdings;}
@list l1:level7
        {mso-level-number-format:bullet;
        mso-level-text:\F0B7;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:Symbol;}
@list l1:level8
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:"Courier New";}
@list l1:level9
        {mso-level-number-format:bullet;
        mso-level-text:\F0A7;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:Wingdings;}
@list l2
        {mso-list-id:2023315444;
        mso-list-type:hybrid;
        mso-list-template-ids:-108881578 134807567 134807577 134807579 134807567 134807577 134807579 134807567 134807577 134807579;}
@list l2:level1
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l2:level2
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l2:level3
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l2:level4
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l2:level5
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l2:level6
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
@list l2:level7
        {mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l2:level8
        {mso-level-number-format:alpha-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-18.0pt;}
@list l2:level9
        {mso-level-number-format:roman-lower;
        mso-level-tab-stop:none;
        mso-level-number-position:right;
        text-indent:-9.0pt;}
ol
        {margin-bottom:0cm;}
ul
        {margin-bottom:0cm;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]-->
</head>
<body lang="EN-GB" link="#0563C1" vlink="#954F72">
<div class="WordSection1">
<p class="MsoNormal"><a name="x__MailAutoSig"><span style="font-family:"Arial",sans-serif">Hi,<o:p></o:p></span></a></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif"><o:p> </o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif">I have a large (global, 30m resolution, 50GB+) GeoTIFF dataset, from which I need to read many (millions) of pixel values at given input coordinates.
 I’ve got reasonable performance out of the code, about a million queries over five minutes, but:<o:p></o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif"><o:p> </o:p></span></span></p>
<ol style="margin-top:0cm" start="1" type="1">
<li class="MsoListParagraph" style="margin-left:0cm;mso-list:l0 level1 lfo1"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif">There are actually twelve separate datasets of this size to query, not just one, so it takes
 approximately an hour.<o:p></o:p></span></span></li><li class="MsoListParagraph" style="margin-left:0cm;mso-list:l0 level1 lfo1"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif">This is by far the slowest portion of the program, and the users demand speed!<o:p></o:p></span></span></li><li class="MsoListParagraph" style="margin-left:0cm;mso-list:l0 level1 lfo1"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif">The users would also like to move towards higher resolution datasets, which we see run about
 5x slower.<o:p></o:p></span></span></li><li class="MsoListParagraph" style="margin-left:0cm;mso-list:l0 level1 lfo1"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif">When querying the data on a particular piece of network storage mounted as part of the local
 filesystem, we see a slowdown approaching two orders of magnitude – bulk file copies off the network storage are reasonable, but each IO request shows a significant overhead (up to a second), and GDAL is sending one for each coordinate queried.<o:p></o:p></span></span></li></ol>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif"><o:p> </o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif">The implementation is in Python, directly calling down to GDAL. The short, long-running snippet of code which performs the actual queries the dataset,
 having converted real-world coordinates to pixels, is:<o:p></o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif"><o:p> </o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif">value_arrays = (<o:p></o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif">            raster_ds.ReadAsArray(<o:p></o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif">                        xoff=coord[0] - buffer_size,<o:p></o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif">                        yoff=coord[1] - buffer_size,<o:p></o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif">                        xsize=npix,<o:p></o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif">                        ysize=npix<o:p></o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif">            ) for coord in offsets<o:p></o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif">)<o:p></o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif"><o:p> </o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif">There are a few things that are probably worth noting:<o:p></o:p></span></span></p>
<ol style="margin-top:0cm" start="1" type="1">
<li class="MsoListParagraph" style="margin-left:0cm;mso-list:l2 level1 lfo2"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif">It is not necessarily a single pixel that is being read – for each coordinate, the program may
 be asked to get all pixel values within a given radius (typically a couple of pixels), and use some function to summarise these into a single value (mean, median, …). GDAL currently returns a numpy array for each query, which is passed to the user-specified
 function after the snippet above.<o:p></o:p></span></span></li><li class="MsoListParagraph" style="margin-left:0cm;mso-list:l2 level1 lfo2"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif">The dataset is made up of 2048x2048 LZW-Compressed tiles containing floating point data (essentially
 conforming to COG, but with no overviews), grouped together in a VRT (performance is identical with plain GeoTIFFs, though).<o:p></o:p></span></span></li></ol>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif"><o:p> </o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif">Multiprocessing has not been found to help - we actually lose throughput as the disk read head is moving back and forth constantly. Better hardware (especially
 SSDs) is known to help, but no one wants to pay for that.<o:p></o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif"><o:p> </o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif">We see no particular performance difference from setting GDAL_DISABLE_READDIR_ON_OPEN=TRUE, and GDAL_CACHEMAX is left at the default 5% (64GB+ RAM available).<o:p></o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif"><o:p> </o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif"><o:p> </o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif"><o:p> </o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif">Does the Python interface to GDAL provide a way to supply a large number of offsets and get blocks of pixels back, avoiding the need to come back up
 to Python after each query? (I suspect not)<o:p></o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif">Is there some way to optimise GDAL so that queries of files on the mounted network storage are more efficient?<o:p></o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif"><o:p> </o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><span style="font-family:"Arial",sans-serif"><o:p> </o:p></span></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><b><span style="font-family:"Arial",sans-serif;color:black;mso-fareast-language:EN-GB"><o:p> </o:p></span></b></span></p>
<p class="MsoNormal"><span style="mso-bookmark:x__MailAutoSig"><b><span style="font-family:"Arial",sans-serif;color:black;mso-fareast-language:EN-GB">Dr. Daniel Evans</span></b></span><span style="mso-bookmark:x__MailAutoSig"></span><span style="font-family:"Arial",sans-serif;color:black;mso-fareast-language:EN-GB"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-family:"Arial",sans-serif;color:#F6A124;mso-fareast-language:EN-GB">Software Developer</span><span style="font-family:"Arial",sans-serif;color:black;mso-fareast-language:EN-GB"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-family:"Arial",sans-serif;color:#F6A124;mso-fareast-language:EN-GB"> </span><span style="font-family:"Arial",sans-serif;color:black;mso-fareast-language:EN-GB"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="font-family:"Arial",sans-serif;color:black;mso-fareast-language:EN-GB"><a href="sip:daniel.evans@jbarisk.com"><b><span style="color:#F6A124">Skype</span></b></a><o:p></o:p></span></p>
<p class="MsoNormal"><o:p> </o:p></p>
</div>
<p><strong><span style="color: #f6a125; font-family:arial;">T</span></strong><span style="text-decoration: none; color: #000000; font-family:arial;"> +44 (0) 1756 799919</span><br>
<a style="text-decoration: none; color: #f6a125; font-family:arial;" href="http://www.jbarisk.com">www.jbarisk.com</a></p>
<p><a href="http://www.jbarisk.com"><img src="http://www.jbagroup.co.uk/imgstore/JBA-Email-Sig-Icons-JBA.png" alt="Visit our website" width="33" height="26"></a> <a href=""><img src="http://www.jbagroup.co.uk/imgstore/JBA-Email-Sig-Icons-LINKEDIN.png" alt="" height="26"></a>
<a href="https://twitter.com/jbarisk"><img src="http://www.jbagroup.co.uk/imgstore/JBA-Email-Sig-Icons-TWITTER.png" alt="Follow us on Twitter" width="33" height="26"></a></p>
<o:p></o:p>
<p></p>
<p class="MsoNormal"><span style="font-family:"Arial",sans-serif;color:#FF9C00">Our postal address and registered office is JBA Risk Management</span><span style="font-family:"Arial",sans-serif;color:black">
</span><span style="font-family:"Arial",sans-serif;color:#FF9C00">Limited, 1 Broughton Park, Old Lane North, Broughton, Skipton, North Yorkshire BD23 3FD.</span><o:p></o:p></p>
<p><b><span style="background: white; color: black; font-family: "Arial",sans-serif; font-size: 10pt;">Find out more about us here:
<a href="http://www.jbarisk.com/"><font color="#0563c1">www.jbarisk.com</font></a> and
</span></b><b><span style="background: white; color: rgb(68, 68, 68); font-family: "Arial",sans-serif; font-size: 10pt;"><a href="http://twitter.com/JBARisk" target="_blank"><font color="#0563c1">follow us on Twitter @JBARisk</font></a> and
<a href="https://www.linkedin.com/company/2370847?trk=tyah&trkInfo=clickedVertical%3Acompany%2CclickedEntityId%3A2370847%2Cidx%3A2-1-2%2CtarId%3A1447414259786%2Ctas%3AJBA%20RISK%20MANAGEMENT">
<font color="#0563c1">LinkedIn</font></a> </span></b></p>
<p><span style="background: white; color: black; font-family: "Arial",sans-serif; font-size: 8pt;">The JBA Group supports the JBA Trust.</span></p>
<p style="margin: 0cm 0cm 0pt;"><span style="background: white; color: rgb(68, 68, 68); font-family: "Arial",sans-serif; font-size: 8pt;">All JBA Risk Management's email messages contain confidential information and are intended only for the individual(s) named.
 If you are not the named addressee you should not disseminate, distribute or copy this e-mail.</span><br>
<span style="color: rgb(68, 68, 68); font-family: "Arial",sans-serif; font-size: 8pt;"><span style="background: white;">Please notify the sender immediately by email if you have received this email by mistake and delete this email from your system.</span></span><br>
 </p>
<p style="margin: 0cm 0cm 0pt;"><span style="color: rgb(68, 68, 68); font-family: "Arial",sans-serif; font-size: 8pt;"><span style="background: white;">JBA Risk Management Limited is registered in England, company number 07732946, 1 Broughton Park, Old Lane
 North, Broughton, Skipton, North Yorkshire, BD23 3FD, </span></span><span style="background: white; color: black; font-family: "Arial",sans-serif; font-size: 8pt;">Telephone: +441756799919</span></p>
<p> </p>
</body>
</html>