[Liblas-commits] hg: Index support for very large files of 1 billion points or mo...

liblas-commits at liblas.org liblas-commits at liblas.org
Mon Oct 25 14:03:18 EDT 2010


changeset 397aeabc3e2e in /Volumes/Data/www/liblas.org/hg
details: http://hg.liblas.orghg?cmd=changeset;node=397aeabc3e2e
summary: Index support for very large files of 1 billion points or more. 64 bit numeric values used for temporary file offsets.

diffstat:

 include/liblas/detail/index/indexcell.hpp |   7 ++--
 include/liblas/lasindex.hpp               |   3 +-
 src/detail/index/indexcell.cpp            |   4 +-
 src/lasindex.cpp                          |  43 +++++++++++++++---------------
 4 files changed, 29 insertions(+), 28 deletions(-)

diffs (201 lines):

diff -r 02df82d848f4 -r 397aeabc3e2e include/liblas/detail/index/indexcell.hpp
--- a/include/liblas/detail/index/indexcell.hpp	Mon Oct 25 10:40:08 2010 -0500
+++ b/include/liblas/detail/index/indexcell.hpp	Mon Oct 25 12:02:28 2010 -0600
@@ -55,6 +55,7 @@
 typedef boost::uint8_t	ConsecPtAccumulator;
 typedef std::map<boost::uint32_t, ConsecPtAccumulator> IndexCellData;
 typedef std::map<boost::uint32_t, IndexCellData> IndexSubCellData;
+typedef boost::uint64_t	TempFileOffsetType;
 
 class IndexCell
 {
@@ -62,7 +63,7 @@
 	IndexCell();
 	
 private:
-	boost::uint32_t m_FileOffset;
+	TempFileOffsetType m_FileOffset;
 	boost::uint32_t m_NumPoints;
 	ElevExtrema m_MinZ, m_MaxZ;
 	IndexCellData m_PtRecords;
@@ -70,9 +71,9 @@
 	IndexSubCellData m_SubCellRecords;
 
 public:
-	void SetFileOffset(boost::uint32_t fos);
+	void SetFileOffset(TempFileOffsetType fos);
 	void SetNumPoints(boost::uint32_t nmp);
-	boost::uint32_t GetFileOffset(void) const;
+	TempFileOffsetType GetFileOffset(void) const;
 	boost::uint32_t GetNumRecords(void) const;
 	boost::uint32_t GetNumPoints(void) const;
 	boost::uint32_t GetNumSubCellRecords(void) const;
diff -r 02df82d848f4 -r 397aeabc3e2e include/liblas/lasindex.hpp
--- a/include/liblas/lasindex.hpp	Mon Oct 25 10:40:08 2010 -0500
+++ b/include/liblas/lasindex.hpp	Mon Oct 25 12:02:28 2010 -0600
@@ -140,7 +140,8 @@
 	int m_debugOutputLevel;
 	boost::uint8_t m_versionMajor, m_versionMinor;
     boost::uint32_t m_pointRecordsCount, m_maxMemoryUsage, m_cellsX, m_cellsY, m_cellsZ, m_totalCells, 
-		m_tempFileWrittenBytes, m_DataVLR_ID;
+		m_DataVLR_ID;
+    liblas::detail::TempFileOffsetType m_tempFileWrittenBytes;
     double m_rangeX, m_rangeY, m_rangeZ, m_cellSizeZ, m_cellSizeX, m_cellSizeY;
 	std::string m_tempFileName;	
 	std::string m_indexAuthor;
diff -r 02df82d848f4 -r 397aeabc3e2e src/detail/index/indexcell.cpp
--- a/src/detail/index/indexcell.cpp	Mon Oct 25 10:40:08 2010 -0500
+++ b/src/detail/index/indexcell.cpp	Mon Oct 25 12:02:28 2010 -0600
@@ -57,7 +57,7 @@
 } // IndexCell::IndexCell
 
 
-void IndexCell::SetFileOffset(boost::uint32_t fos)
+void IndexCell::SetFileOffset(TempFileOffsetType fos)
 {
 	m_FileOffset = fos;
 } // IndexCell::SetFileOffset
@@ -67,7 +67,7 @@
 	m_NumPoints = nmp;
 } // IndexCell::SetNumPoints
 
-boost::uint32_t IndexCell::GetFileOffset(void) const
+TempFileOffsetType IndexCell::GetFileOffset(void) const
 {
 	return(m_FileOffset);
 } // IndexCell::GetFileOffset
diff -r 02df82d848f4 -r 397aeabc3e2e src/lasindex.cpp
--- a/src/lasindex.cpp	Mon Oct 25 10:40:08 2010 -0500
+++ b/src/lasindex.cpp	Mon Oct 25 12:02:28 2010 -0600
@@ -122,7 +122,8 @@
 	m_DataVLR_ID = 43;
 	m_maxMemoryUsage = LIBLAS_INDEX_MAXMEMDEFAULT;
     m_rangeX = m_rangeY = m_rangeZ = m_cellSizeZ = m_cellSizeX = m_cellSizeY = 
-    m_pointRecordsCount = m_maxMemoryUsage = m_cellsX = m_cellsY = m_cellsZ = m_totalCells = m_tempFileWrittenBytes = 0;
+		m_pointRecordsCount = m_maxMemoryUsage = m_cellsX = m_cellsY = m_cellsZ = m_totalCells = 0;
+    m_tempFileWrittenBytes = 0;
 	m_indexBuilt = m_tempFileStarted = m_readerCreated = false;
 } // Index::SetValues
 
@@ -1025,8 +1026,7 @@
 	m_versionMinor = LIBLAS_INDEX_VERSIONMINOR;
 	
 	// reset to beginning of point data records in case points had been examined before index is built
-	m_reader->Reset();
-
+	m_reader->seek(0);
 	// need the header to get number of point records
     m_pointRecordsCount = m_pointheader.GetPointRecordsCount();
     // get the bounds of the data and scale factors in case they are needed for point translation
@@ -1164,11 +1164,9 @@
 		// If a cell contains too many points, subdivide the cell and save sub-cells within the cell structure
 		// If Z-binning is desired, define the bounds of each Z zone and subdivide sort each cell's points into Z bins
 		// Save Z bins within the cell structure.
-		// reset Reader to beginning of point data records in case points had been examined before index is built
 		
 		if (IndexOut.InitiateOutput())
 		{
-			m_reader->Reset();
 			for (boost::uint32_t x = 0; x < m_cellsX; ++x)
 			{
 				for (boost::uint32_t y = 0; y < m_cellsY; ++y)
@@ -1384,7 +1382,7 @@
 {
 	if (m_tempFile || OpenTempFile())
 	{
-		boost::uint32_t EmptyOffset = 0;	// this might not be large enough
+		liblas::detail::TempFileOffsetType EmptyOffset = 0;	// this might not be large enough
 		
 		if (! m_tempFileStarted)
 		{
@@ -1392,12 +1390,12 @@
 			// write out a block of file offsets the size of the number of cells
 			for (boost::uint32_t i = 0; i < m_totalCells; ++i)
 			{
-				if (fwrite(&EmptyOffset, sizeof(boost::uint32_t), 1, m_tempFile) < 1)
+				if (fwrite(&EmptyOffset, sizeof(liblas::detail::TempFileOffsetType), 1, m_tempFile) < 1)
 				{
 					return (FileError("Index::PurgePointsToTempFile"));
 				} // if error
 			} // for
-			m_tempFileWrittenBytes = m_totalCells * sizeof(boost::uint32_t);
+			m_tempFileWrittenBytes = m_totalCells * sizeof(liblas::detail::TempFileOffsetType);
 			m_tempFileStarted = true;
 		} // if
 		for (boost::uint32_t x = 0; x < m_cellsX; ++x)
@@ -1411,20 +1409,20 @@
 					// if cell block header is 0 write the current file location in the file header
 					// otherwise write the current file location at the file location specified in the 
 					// cell block header
-					boost::uint32_t LastWriteLocation = CellBlock[x][y].GetFileOffset();
+					liblas::detail::TempFileOffsetType LastWriteLocation = CellBlock[x][y].GetFileOffset();
 					if (LastWriteLocation == 0)
-						LastWriteLocation = (x * m_cellsY + y) * sizeof(boost::uint32_t);
-					fseek(m_tempFile, LastWriteLocation, SEEK_SET);
-					if (fwrite(&m_tempFileWrittenBytes, sizeof(boost::uint32_t), 1, m_tempFile) < 1)
+						LastWriteLocation = (x * m_cellsY + y) * sizeof(liblas::detail::TempFileOffsetType);
+					_fseeki64(m_tempFile, LastWriteLocation, SEEK_SET);
+					if (fwrite(&m_tempFileWrittenBytes, sizeof(liblas::detail::TempFileOffsetType), 1, m_tempFile) < 1)
 						return (FileError("Index::PurgePointsToTempFile"));
 					CellBlock[x][y].SetFileOffset(m_tempFileWrittenBytes);
 
 					// seek to end of file where next block of data will be written
-					fseek(m_tempFile, 0, SEEK_END);
+					_fseeki64(m_tempFile, 0, SEEK_END);
 					// write a blank space for later placement of next file block for this cell
-					if (fwrite(&EmptyOffset, sizeof(boost::uint32_t), 1, m_tempFile) < 1)
+					if (fwrite(&EmptyOffset, sizeof(liblas::detail::TempFileOffsetType), 1, m_tempFile) < 1)
 						return (FileError("Index::PurgePointsToTempFile"));
-					m_tempFileWrittenBytes += sizeof(boost::uint32_t);
+					m_tempFileWrittenBytes += sizeof(liblas::detail::TempFileOffsetType);
 					// write the number of records stored in this section
 					if (fwrite(&RecordsToWrite, sizeof(boost::uint32_t), 1, m_tempFile) < 1)
 						return (FileError("Index::PurgePointsToTempFile"));
@@ -1462,23 +1460,24 @@
 	boost::uint32_t CurCellX, boost::uint32_t CurCellY)
 {
 
-	boost::uint32_t FileOffset, RecordsToRead, FormerNumPts, NewNumPts = 0;
+	boost::uint32_t RecordsToRead, FormerNumPts, NewNumPts = 0;
+	liblas::detail::TempFileOffsetType FileOffset;
 	
 	FormerNumPts = CellBlock->GetNumPoints();
 	CellBlock->SetNumPoints(0);
 	
 	// load the cell as it was written
 	// read the first offset for this cell
-	if (fseek(m_tempFile, (CurCellX * m_cellsY + CurCellY) * sizeof (boost::uint32_t), SEEK_SET))
+	if (_fseeki64(m_tempFile, (CurCellX * m_cellsY + CurCellY) * sizeof (liblas::detail::TempFileOffsetType), SEEK_SET))
 		return (FileError("Index::LoadCellFromTempFile"));
-	if (fread(&FileOffset, sizeof (boost::uint32_t), 1, m_tempFile) < 1)
+	if (fread(&FileOffset, sizeof (liblas::detail::TempFileOffsetType), 1, m_tempFile) < 1)
 		return (FileError("Index::LoadCellFromTempFile"));
 	while (FileOffset > 0)
 	{
 		// jump to the first block for this cell, read the next offset
-		if (fseek(m_tempFile, FileOffset, SEEK_SET))
+		if (_fseeki64(m_tempFile, FileOffset, SEEK_SET))
 			return (FileError("Index::LoadCellFromTempFile"));
-		if (fread(&FileOffset, sizeof (boost::uint32_t), 1, m_tempFile) < 1)
+		if (fread(&FileOffset, sizeof (liblas::detail::TempFileOffsetType), 1, m_tempFile) < 1)
 			return (FileError("Index::LoadCellFromTempFile"));
 		// read the data for the cell in this block
 		// first is the number of items to read now
@@ -1534,7 +1533,7 @@
 {
 	try {
 		Writer writer(*m_ofs, m_idxheader);
-		m_reader->Reset();
+		m_reader->seek(0);
 		while (m_reader->ReadNextPoint())
 		{
 			Point CurPt = m_reader->GetPoint();
@@ -1553,7 +1552,7 @@
 	try {
 		Writer writer(*m_ofs, m_idxheader);
 		/* test block - uncommenting this makes it just like above version with included points
-		m_reader->Reset();
+		m_reader->seek(0);
 		while (m_reader->ReadNextPoint())
 		{
 			Point CurPt = m_reader->GetPoint();


More information about the Liblas-commits mailing list