[Liblas-commits] hg-main-tree: improved stats collection mechanism

liblas-commits at liblas.org liblas-commits at liblas.org
Thu Aug 18 20:06:47 EDT 2011


details:   http://hg.libpc.orghg-main-tree/rev/017ef48bc1bd
changeset: 1142:017ef48bc1bd
user:      Michael P. Gerlek <mpg at flaxen.com>
date:      Thu Aug 18 17:06:27 2011 -0700
description:
improved stats collection mechanism

diffstat:

 include/pdal/filters/StatsFilter.hpp |   51 +++++++-
 src/filters/StatsFilter.cpp          |  196 ++++++++++++++++++++++------------
 test/unit/StatsFilterTest.cpp        |   33 +++--
 3 files changed, 184 insertions(+), 96 deletions(-)

diffs (truncated from 376 to 300 lines):

diff -r f449609b8009 -r 017ef48bc1bd include/pdal/filters/StatsFilter.hpp
--- a/include/pdal/filters/StatsFilter.hpp	Thu Aug 18 13:33:19 2011 -0700
+++ b/include/pdal/filters/StatsFilter.hpp	Thu Aug 18 17:06:27 2011 -0700
@@ -45,6 +45,29 @@
 
 class StatsFilterSequentialIterator;
 
+class StatsCollector
+{
+public:
+    StatsCollector();
+
+    void insert(double value);
+    void reset();
+
+    double minimum() const { return m_minimum; }
+    double maximum() const { return m_maximum; }
+    double average() const { return m_sum / (double)m_count; }
+    boost::uint64_t count() const { return m_count; }
+    
+    boost::property_tree::ptree toPTree() const;
+
+private:
+    boost::uint64_t m_count;
+    double m_minimum;
+    double m_maximum;
+    double m_sum;
+};
+
+
 // this is just a pass-thorugh filter, which collects some stats about the points
 // that are fed through it
 class PDAL_DLL StatsFilter : public Filter
@@ -54,6 +77,7 @@
 
     StatsFilter(Stage& prevStage, const Options&);
     StatsFilter(Stage& prevStage);
+    ~StatsFilter();
 
     virtual void initialize();
     virtual const Options getDefaultOptions() const;
@@ -72,19 +96,28 @@
 
     void processBuffer(PointBuffer& data) const;
 
-    // clears the counters
+    // returns the stats for field i
+    const StatsCollector& getStats(Dimension::Field field) const;
+
+    // clears the counters for all fields
     void reset();
-    void getData(boost::uint64_t& count, 
-                 double& minx, double& miny, double& minz, 
-                 double& maxx, double& maxy, double& maxz,
-                 double& avgx, double& avgy, double& avgz) const;
+
+    // return a tree like this:
+    //    X:
+    //        cout: 100
+    //        min: 1.0
+    //        max: 100.0
+    //    Y:
+    //        cout: 100
+    //        min: 11.0
+    //        max: 110.0
+    //
+    boost::property_tree::ptree toStatsPTree() const;
 
 private:
+// the stats are keyed by the field name
     // BUG: not threadsafe, these should maybe live in the iterator
-    mutable boost::uint64_t m_totalPoints;
-    mutable double m_minimumX, m_minimumY, m_minimumZ;
-    mutable double m_maximumX, m_maximumY, m_maximumZ;
-    mutable double m_sumX, m_sumY, m_sumZ;
+    std::map<Dimension::Field,StatsCollector*> m_stats; // one Stats item per field in the schema
 
     StatsFilter& operator=(const StatsFilter&); // not implemented
     StatsFilter(const StatsFilter&); // not implemented
diff -r f449609b8009 -r 017ef48bc1bd src/filters/StatsFilter.cpp
--- a/src/filters/StatsFilter.cpp	Thu Aug 18 13:33:19 2011 -0700
+++ b/src/filters/StatsFilter.cpp	Thu Aug 18 17:06:27 2011 -0700
@@ -43,6 +43,64 @@
 
 namespace pdal { namespace filters {
 
+//---------------------------------------------------------------------------
+
+StatsCollector::StatsCollector()
+    : m_count(0)
+    , m_minimum(0.0)
+    , m_maximum(0.0)
+    , m_sum(0.0)
+{
+    return;
+}
+
+
+void StatsCollector::reset()
+{
+    m_count = 0;
+    m_minimum = 0.0;
+    m_maximum = 0.0;
+    m_sum = 0.0;
+    return;
+}
+
+
+void StatsCollector::insert(double value)
+{
+    if (m_count==0)
+    {
+        m_minimum = value;
+        m_maximum = value;
+        m_sum = value;
+    }
+    else
+    {
+        m_minimum = std::min(m_minimum, value);
+        m_maximum = std::max(m_maximum, value);
+        m_sum += value;
+    }
+
+    ++m_count;
+
+    return;
+}
+
+
+boost::property_tree::ptree StatsCollector::toPTree() const
+{
+    boost::property_tree::ptree tree;
+
+    tree.put("count", count());
+    tree.put("minimum", minimum());
+    tree.put("maximum", maximum());
+    tree.put("average", average());
+
+    return tree;
+}
+
+
+//---------------------------------------------------------------------------
+
 
 StatsFilter::StatsFilter(Stage& prevStage, const Options& options)
     : pdal::Filter(prevStage, options)
@@ -58,11 +116,28 @@
 }
 
 
+StatsFilter::~StatsFilter()
+{
+    Schema::Dimensions dims = getSchema().getDimensions();
+    for (Schema::DimensionsIter iter = dims.begin(); iter != dims.end(); ++iter)
+    {
+        const Dimension& dim = *iter;
+        StatsCollector* stats = m_stats[dim.getField()];
+        delete stats;
+        m_stats.erase(dim.getField());
+    }
+}
+
 void StatsFilter::initialize()
 {
     Filter::initialize();
 
-    reset();
+    const Schema::Dimensions dims = getSchema().getDimensions();
+    for (Schema::DimensionsCIter iter = dims.begin(); iter != dims.end(); ++iter)
+    {
+        const Dimension& dim = *iter;
+        m_stats[dim.getField()] = new StatsCollector();
+    }
 
     return;
 }
@@ -77,43 +152,19 @@
 
 void StatsFilter::reset()
 {
-    m_totalPoints = 0;
+    const Schema::Dimensions dims = getSchema().getDimensions();
+    for (Schema::DimensionsCIter iter = dims.begin(); iter != dims.end(); ++iter)
+    {
+        const Dimension& dim = *iter;
+        m_stats[dim.getField()]->reset();
+    }
+}
+    
 
-    m_minimumX = 0.0;
-    m_minimumY = 0.0;
-    m_minimumZ = 0.0;
-    
-    m_maximumX = 0.0;
-    m_maximumY = 0.0;
-    m_maximumZ = 0.0;
-    
-    m_sumX = 0.0;
-    m_sumY = 0;
-    m_sumZ = 0;
-
-    return;
-}
-
-
-void StatsFilter::getData(boost::uint64_t& count, 
-                          double& minx, double& miny, double& minz, 
-                          double& maxx, double& maxy, double& maxz,
-                          double& avgx, double& avgy, double& avgz) const
+const StatsCollector& StatsFilter::getStats(Dimension::Field field) const
 {
-    minx = m_minimumX;
-    miny = m_minimumY;
-    minz = m_minimumZ;
-    maxx = m_maximumX;
-    maxy = m_maximumY;
-    maxz = m_maximumZ;
-
-    avgx = m_sumX / (double)m_totalPoints;
-    avgy = m_sumY / (double)m_totalPoints;
-    avgz = m_sumZ / (double)m_totalPoints;
-
-    count = m_totalPoints;
-
-    return;
+    const StatsCollector* s = m_stats.find(field)->second;
+    return *s;
 }
 
 
@@ -124,46 +175,23 @@
     const SchemaLayout& schemaLayout = data.getSchemaLayout();
     const Schema& schema = schemaLayout.getSchema();
 
-    const int indexX = schema.getDimensionIndex(Dimension::Field_X, Dimension::Double);
-    const int indexY = schema.getDimensionIndex(Dimension::Field_Y, Dimension::Double);
-    const int indexZ = schema.getDimensionIndex(Dimension::Field_Z, Dimension::Double);
+    const int indexX = schema.getDimensionIndex(Dimension::Field_X, Dimension::Int32);
+    const int indexY = schema.getDimensionIndex(Dimension::Field_Y, Dimension::Int32);
+    const int indexZ = schema.getDimensionIndex(Dimension::Field_Z, Dimension::Int32);
+
+    StatsCollector& statsX = *(m_stats.find(Dimension::Field_X)->second);
+    StatsCollector& statsY = *(m_stats.find(Dimension::Field_Y)->second);
+    StatsCollector& statsZ = *(m_stats.find(Dimension::Field_Z)->second);
 
     for (boost::uint32_t pointIndex=0; pointIndex<numPoints; pointIndex++)
     {
-        const double x = data.getField<double>(pointIndex, indexX);
-        const double y = data.getField<double>(pointIndex, indexY);
-        const double z = data.getField<double>(pointIndex, indexZ);
+        const double x = data.getField<boost::int32_t>(pointIndex, indexX);
+        const double y = data.getField<boost::int32_t>(pointIndex, indexY);
+        const double z = data.getField<boost::int32_t>(pointIndex, indexZ);
 
-        if (m_totalPoints==0)
-        {
-            m_minimumX = x;
-            m_minimumY = y;
-            m_minimumZ = z;
-
-            m_maximumX = x;
-            m_maximumY = y;
-            m_maximumZ = z;
-
-            m_sumX = x;
-            m_sumY = y;
-            m_sumZ = z;
-        }
-        else
-        {
-            m_minimumX = std::min(m_minimumX, x);
-            m_minimumY = std::min(m_minimumY, y);
-            m_minimumZ = std::min(m_minimumZ, z);
-
-            m_maximumX = std::max(m_maximumX, x);
-            m_maximumY = std::max(m_maximumY, y);
-            m_maximumZ = std::max(m_maximumZ, z);
-
-            m_sumX += x;
-            m_sumY += y;
-            m_sumZ += z;
-        }
-
-        ++m_totalPoints;
+        statsX.insert(x);
+        statsY.insert(y);
+        statsZ.insert(z);
 
         data.setNumPoints(pointIndex+1);
     }
@@ -177,4 +205,28 @@
     return new StatsFilterSequentialIterator(*this);
 }
 
+
+boost::property_tree::ptree StatsFilter::toStatsPTree() const
+{
+    boost::property_tree::ptree tree;


More information about the Liblas-commits mailing list