Advice on loading OpenTopography data into a PostGIS database apreciated
thiemo at gelassene-pferde.biz
thiemo at gelassene-pferde.biz
Thu Oct 31 16:34:47 PDT 2024
Hi
I have problems to load OpenTopography data into a PostGIS
installation. This is within my project 33
https://sourceforge.net/projects/treintaytres/ . You can get the
latest source code from in case you are interested in. My problem is
at revision 32. I want to load OpenTopography raster files into a
PostGIS database using Python3. I defined the following function.
def process_files(data_directory, file_name_regexp, conn, source_id, logger):
loaded_files =
get_loaded_files(conn=conn,source_id=source_id,logger=logger,)
existing_hashes = {file_hash for file_hash, _ in loaded_files}
# "next" inhibits recursion, so only the top level is retrieved
logger.info(f"Looking into '{data_directory}'")
try:
cur = conn.cursor()
cur.execute("set postgis.gdal_enabled_drivers = 'ENABLE_ALL';")
root, dirs, files = next(os.walk(data_directory))
for file_name in files:
if re.match(file_name_regexp, file_name):
file_path = os.path.join(root, file_name)
logger.info(f"Processing '{file_path}'")
file_hash = calculate_file_sha3_512_hash(file_path)
file_creation_time =
datetime.fromtimestamp(os.path.getctime(file_path))
# If the hash is alread present, skip this file FIXME
check on file names
if file_hash in existing_hashes:
continue
# Get the raster data
with open(file_path, 'rb') as f:
raster_data = f.read()
statement = """insert into TOPO_FILES (TILE
,FILE_NAME
,FILE_CREATION_PIT
,FILE_HASH
,SOURCE_ID)
values (ST_FromGDALRaster(%s)
,%s
,%s
,%s
,%s::uuid);"""
logger.debug("statement")
logger.debug(statement)
logger.debug("First 100 bytes of raster_data")
logger.debug(f"{raster_data[:100]}")
logger.debug(f"file_name:{file_name}")
logger.debug(f"file_creation_time:{file_creation_time}")
logger.debug(f"file_hash:{file_hash}")
logger.debug(f"source_id:{source_id}")
params = (psycopg2.Binary(raster_data), file_name,
file_creation_time, file_hash, source_id)
# params = (raster_data
# ,file_name
# ,file_creation_time
# ,file_hash
# ,source_id)
# logger.debug(f"params:{params}")
cur.execute(statement, params)
conn.commit()
cur.close()
except StopIteration:
logger.error(f"Error: '{data_directory}' could not be walked.
Directory might be empty or inaccessible.")
However, it results in the following error, I am unable to iron out.
2024-11-01 00:28:33 - root - DEBUG -
source_id:4f68d890-a08c-4c06-8aa5-741ad36b6abe
Traceback (most recent call last):
File
"/home/thiemo/external_projects/svn/33/trunk/code_files/data_storage/load_OpenTopography_data.py", line 737, in
<module>
main()
File
"/home/thiemo/external_projects/svn/33/trunk/code_files/data_storage/load_OpenTopography_data.py", line 714, in
main
process_files(
File
"/home/thiemo/external_projects/svn/33/trunk/code_files/data_storage/load_OpenTopography_data.py", line 442, in
process_files
cur.execute(statement, params)
psycopg2.errors.InternalError_: RASTER_fromGDALRaster: Could not open
bytea with GDAL. Check that the bytea is of a GDAL supported format
I tried a variety of things, among other not conversion to binary, or
explicit conversion to bytea, i.e. "ST_FromGDALRaster(%s::bytea)". I
have run out of ideas and would very much appreciate, if someone could
nudge me into the proper direction.
Kind regards
Thiemo
More information about the postgis-users
mailing list