[postgis-tickets] r16685 - Speedup MVT column parsing

Raul raul at rmr.ninja
Fri Aug 10 08:57:40 PDT 2018


Author: algunenano
Date: 2018-08-10 08:57:40 -0700 (Fri, 10 Aug 2018)
New Revision: 16685

Modified:
   trunk/NEWS
   trunk/postgis/mvt.c
   trunk/postgis/mvt.h
   trunk/regress/mvt.sql
   trunk/regress/mvt_expected
   trunk/regress/mvt_jsonb.sql
   trunk/regress/mvt_jsonb_expected
Log:
Speedup MVT column parsing

Closes #4145
Closes https://github.com/postgis/postgis/pull/286


Modified: trunk/NEWS
===================================================================
--- trunk/NEWS	2018-08-10 11:33:36 UTC (rev 16684)
+++ trunk/NEWS	2018-08-10 15:57:40 UTC (rev 16685)
@@ -11,6 +11,7 @@
   - #4140, Use user-provided CFLAGS in address standardizer and the
     topology module (Raúl Marín)
   - #4143, Fix backend crash when ST_OffsetCurve fails (Dan Baston)
+  - #4145, Speedup MVT column parsing (Raúl Marín)
 
   See PostGIS 2.5.0 section for full details
 

Modified: trunk/postgis/mvt.c
===================================================================
--- trunk/postgis/mvt.c	2018-08-10 11:33:36 UTC (rev 16684)
+++ trunk/postgis/mvt.c	2018-08-10 15:57:40 UTC (rev 16685)
@@ -295,10 +295,9 @@
 	return tupdesc;
 }
 
-static uint32_t get_key_index(mvt_agg_context *ctx, char *name)
+static uint32_t get_key_index_with_size(mvt_agg_context *ctx, const char *name, size_t size)
 {
 	struct mvt_kv_key *kv;
-	size_t size = strlen(name);
 	HASH_FIND(hh, ctx->keys_hash, name, size, kv);
 	if (!kv)
 		return UINT32_MAX;
@@ -318,27 +317,38 @@
 
 static void parse_column_keys(mvt_agg_context *ctx)
 {
-	TupleDesc tupdesc = get_tuple_desc(ctx);
-	uint32_t natts = (uint32_t) tupdesc->natts;
-	uint32_t i;
+	uint32_t i, natts;
 	bool geom_found = false;
-	char *key;
+
 	POSTGIS_DEBUG(2, "parse_column_keys called");
 
+	ctx->column_cache.tupdesc = get_tuple_desc(ctx);
+	natts = ctx->column_cache.tupdesc->natts;
+
+	ctx->column_cache.column_keys_index = palloc(sizeof(uint32_t) * natts);
+	ctx->column_cache.column_oid = palloc(sizeof(uint32_t) * natts);
+	ctx->column_cache.values = palloc(sizeof(Datum) * natts);
+	ctx->column_cache.nulls = palloc(sizeof(bool) * natts);
+
 	for (i = 0; i < natts; i++)
 	{
 #if POSTGIS_PGSQL_VERSION < 110
-		Oid typoid = getBaseType(tupdesc->attrs[i]->atttypid);
-		char *tkey = tupdesc->attrs[i]->attname.data;
+		Oid typoid = getBaseType(ctx->column_cache.tupdesc->attrs[i]->atttypid);
+		char *tkey = ctx->column_cache.tupdesc->attrs[i]->attname.data;
 #else
-		Oid typoid = getBaseType(tupdesc->attrs[i].atttypid);
-		char *tkey = tupdesc->attrs[i].attname.data;
+		Oid typoid = getBaseType(ctx->column_cache.tupdesc->attrs[i].atttypid);
+		char *tkey = ctx->column_cache.tupdesc->attrs[i].attname.data;
 #endif
+
+		ctx->column_cache.column_oid[i] = typoid;
 #if POSTGIS_PGSQL_VERSION >= 94
 		if (typoid == JSONBOID)
+		{
+			ctx->column_cache.column_keys_index[i] = UINT32_MAX;
 			continue;
+		}
 #endif
-		key = pstrdup(tkey);
+
 		if (ctx->geom_name == NULL)
 		{
 			if (!geom_found && typoid == TypenameGetTypid("geometry"))
@@ -350,7 +360,7 @@
 		}
 		else
 		{
-			if (!geom_found && strcmp(key, ctx->geom_name) == 0)
+			if (!geom_found && strcmp(tkey, ctx->geom_name) == 0)
 			{
 				ctx->geom_index = i;
 				geom_found = true;
@@ -357,11 +367,12 @@
 				continue;
 			}
 		}
-		add_key(ctx, key);
+
+		ctx->column_cache.column_keys_index[i] = add_key(ctx, pstrdup(tkey));
 	}
+
 	if (!geom_found)
 		elog(ERROR, "parse_column_keys: no geometry column found");
-	ReleaseTupleDesc(tupdesc);
 }
 
 static void encode_keys(mvt_agg_context *ctx)
@@ -389,7 +400,8 @@
 	POSTGIS_DEBUG(2, "MVT_CREATE_VALUES called"); \
 	{ \
 		struct kvtype *kv; \
-		for (kv = ctx->hash; kv != NULL; kv=kv->hh.next) { \
+		for (kv = ctx->hash; kv != NULL; kv=kv->hh.next) \
+		{ \
 			VectorTile__Tile__Value *value = create_value(); \
 			value->hasfield = 1; \
 			value->valuefield = kv->valuefield; \
@@ -433,6 +445,14 @@
 	HASH_CLEAR(hh, ctx->uint_values_hash);
 	HASH_CLEAR(hh, ctx->sint_values_hash);
 	HASH_CLEAR(hh, ctx->bool_values_hash);
+
+	pfree(ctx->column_cache.column_keys_index);
+	pfree(ctx->column_cache.column_oid);
+	pfree(ctx->column_cache.values);
+	pfree(ctx->column_cache.nulls);
+	ReleaseTupleDesc(ctx->column_cache.tupdesc);
+	memset(&ctx->column_cache, 0, sizeof(ctx->column_cache));
+
 }
 
 #define MVT_PARSE_VALUE(value, kvtype, hash, valuefield, size) \
@@ -486,11 +506,10 @@
 	MVT_PARSE_INT_VALUE(value); \
 }
 
-static void add_value_as_string(mvt_agg_context *ctx,
-	char *value, uint32_t *tags, uint32_t k)
+static void add_value_as_string_with_size(mvt_agg_context *ctx,
+	char *value, size_t size, uint32_t *tags, uint32_t k)
 {
 	struct mvt_kv_string_value *kv;
-	size_t size = strlen(value);
 	POSTGIS_DEBUG(2, "add_value_as_string called");
 	HASH_FIND(hh, ctx->string_values_hash, value, size, kv);
 	if (!kv)
@@ -508,6 +527,12 @@
 	tags[ctx->row_columns*2+1] = kv->id;
 }
 
+static void add_value_as_string(mvt_agg_context *ctx,
+	char *value, uint32_t *tags, uint32_t k)
+{
+	return add_value_as_string_with_size(ctx, value, strlen(value), tags, k);
+}
+
 static void parse_datum_as_string(mvt_agg_context *ctx, Oid typoid,
 	Datum datum, uint32_t *tags, uint32_t k)
 {
@@ -542,15 +567,17 @@
 
 		if (r == WJB_KEY && v.type != jbvNull)
 		{
-			char *key;
-			key = palloc(v.val.string.len + 1 * sizeof(char));
-			memcpy(key, v.val.string.val, v.val.string.len);
-			key[v.val.string.len] = '\0';
 
-			k = get_key_index(ctx, key);
+			k = get_key_index_with_size(ctx, v.val.string.val, v.val.string.len);
 			if (k == UINT32_MAX)
 			{
+				char *key;
 				uint32_t newSize = ctx->keys_hash_i + 1;
+
+				key = palloc(v.val.string.len + 1);
+				memcpy(key, v.val.string.val, v.val.string.len);
+				key[v.val.string.len] = '\0';
+
 				tags = repalloc(tags, newSize * 2 * sizeof(*tags));
 				k = add_key(ctx, key);
 			}
@@ -560,7 +587,7 @@
 			if (v.type == jbvString)
 			{
 				char *value;
-				value = palloc(v.val.string.len + 1 * sizeof(char));
+				value = palloc(v.val.string.len + 1);
 				memcpy(value, v.val.string.val, v.val.string.len);
 				value[v.val.string.len] = '\0';
 				add_value_as_string(ctx, value, tags, k);
@@ -603,13 +630,24 @@
 {
 	uint32_t n_keys = ctx->keys_hash_i;
 	uint32_t *tags = palloc(n_keys * 2 * sizeof(*tags));
-	bool isnull;
-	uint32_t i, k;
-	TupleDesc tupdesc = get_tuple_desc(ctx);
-	uint32_t natts = (uint32_t) tupdesc->natts;
+	uint32_t i;
+	mvt_column_cache cc = ctx->column_cache;
+	uint32_t natts = (uint32_t) cc.tupdesc->natts;
+
+	HeapTupleData tuple;
+
+	POSTGIS_DEBUG(2, "parse_values called");
 	ctx->row_columns = 0;
-	POSTGIS_DEBUG(2, "parse_values called");
 
+	/* Build a temporary HeapTuple control structure */
+	tuple.t_len = HeapTupleHeaderGetDatumLength(ctx->row);
+	ItemPointerSetInvalid(&(tuple.t_self));
+	tuple.t_tableOid = InvalidOid;
+	tuple.t_data = ctx->row;
+
+	/* We use heap_deform_tuple as it costs only O(N) vs O(N^2) of GetAttributeByNum */
+	heap_deform_tuple(&tuple, cc.tupdesc, cc.values, cc.nulls);
+
 	POSTGIS_DEBUGF(3, "parse_values natts: %d", natts);
 
 	for (i = 0; i < natts; i++)
@@ -616,25 +654,26 @@
 	{
 		char *key;
 		Oid typoid;
-		Datum datum;
+		uint32_t k;
+		Datum datum = cc.values[i];
 
 		if (i == ctx->geom_index)
 			continue;
 
-#if POSTGIS_PGSQL_VERSION < 110
-		key = tupdesc->attrs[i]->attname.data;
-		typoid = getBaseType(tupdesc->attrs[i]->atttypid);
-#else
-		key = tupdesc->attrs[i].attname.data;
-		typoid = getBaseType(tupdesc->attrs[i].atttypid);
-#endif
-		datum = GetAttributeByNum(ctx->row, i+1, &isnull);
-		k = get_key_index(ctx, key);
-		if (isnull)
+		if (cc.nulls[i])
 		{
 			POSTGIS_DEBUG(3, "parse_values isnull detected");
 			continue;
 		}
+
+#if POSTGIS_PGSQL_VERSION < 110
+		key = cc.tupdesc->attrs[i]->attname.data;
+#else
+		key = cc.tupdesc->attrs[i].attname.data;
+#endif
+		k = cc.column_keys_index[i];
+		typoid = cc.column_oid[i];
+
 #if POSTGIS_PGSQL_VERSION >= 94
 		if (k == UINT32_MAX && typoid != JSONBOID)
 			elog(ERROR, "parse_values: unexpectedly could not find parsed key name '%s'", key);
@@ -644,7 +683,7 @@
 			continue;
 		}
 #else
-		if (k == -1)
+		if (k == UINT32_MAX)
 			elog(ERROR, "parse_values: unexpectedly could not find parsed key name '%s'", key);
 #endif
 
@@ -681,7 +720,6 @@
 		ctx->row_columns++;
 	}
 
-	ReleaseTupleDesc(tupdesc);
 
 	ctx->feature->n_tags = ctx->row_columns * 2;
 	ctx->feature->tags = tags;
@@ -857,6 +895,8 @@
 	ctx->values_hash_i = 0;
 	ctx->keys_hash_i = 0;
 
+	memset(&ctx->column_cache, 0, sizeof(ctx->column_cache));
+
 	layer = palloc(sizeof(*layer));
 	vector_tile__tile__layer__init(layer);
 	layer->version = 2;

Modified: trunk/postgis/mvt.h
===================================================================
--- trunk/postgis/mvt.h	2018-08-10 11:33:36 UTC (rev 16684)
+++ trunk/postgis/mvt.h	2018-08-10 15:57:40 UTC (rev 16685)
@@ -46,6 +46,15 @@
 
 #include "vector_tile.pb-c.h"
 
+typedef struct mvt_column_cache
+{
+    uint32_t *column_keys_index;
+    uint32_t *column_oid;
+    Datum *values;
+    bool *nulls;
+    TupleDesc tupdesc;
+} mvt_column_cache;
+
 typedef struct mvt_agg_context
 {
 	char *name;
@@ -67,6 +76,7 @@
 	uint32_t values_hash_i;
 	uint32_t keys_hash_i;
 	uint32_t row_columns;
+	mvt_column_cache column_cache;
 } mvt_agg_context;
 
 /* Prototypes */

Modified: trunk/regress/mvt.sql
===================================================================
--- trunk/regress/mvt.sql	2018-08-10 11:33:36 UTC (rev 16684)
+++ trunk/regress/mvt.sql	2018-08-10 15:57:40 UTC (rev 16685)
@@ -343,6 +343,48 @@
 	) AS geom
 ) AS q;
 
+-- Strings and text
+SELECT 'TA11', encode(ST_AsMVT(q, 'test', 4096, 'geom'), 'base64') FROM (
+	SELECT 'AbcDfg'::varchar AS cstring,
+	       'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus sed nulla augue. Pellentesque ut vulputate ex. Nunc et odio placerat, lacinia lectus sed, fermentum sapien. Sed massa velit, ullamcorper et est quis, congue rhoncus orci. Suspendisse in ante varius, convallis enim ut, fermentum amet.'::text as ctext,
+	       ST_AsMVTGeom(ST_GeomFromText('POINT(25 17)'),
+		ST_MakeBox2D(ST_Point(0, 0), ST_Point(4096, 4096)), 4096, 0, false) AS geom
+) AS q;
+
+
+-- Check null attributes
+SELECT 'TA12', encode(ST_AsMVT(q, 'test', 4096, 'geom'), 'base64') FROM (
+	SELECT 1::int AS c1, NULL::double precision AS c2, ST_AsMVTGeom(ST_GeomFromText('POINT(25 17)'),
+		ST_MakeBox2D(ST_Point(0, 0), ST_Point(4096, 4096)), 4096, 0, false) AS geom
+	UNION
+	SELECT NULL AS c1, 2.0 AS c2, ST_AsMVTGeom(ST_GeomFromText('POINT(26 18)'),
+		ST_MakeBox2D(ST_Point(0, 0), ST_Point(4096, 4096)), 4096, 0, false) AS geom
+) AS q;
+
+SELECT 'TA13', encode(ST_AsMVT(q, 'test', 4096, 'geom'), 'base64') FROM (
+	SELECT 1::int AS c1, NULL::double precision AS c2, ST_AsMVTGeom(ST_GeomFromText('POINT(25 17)'),
+		ST_MakeBox2D(ST_Point(0, 0), ST_Point(4096, 4096)), 4096, 0, false) AS geom
+	UNION
+	SELECT 5 AS c1, 2.0 AS c2, null AS geom
+) AS q;
+
+-- Extra geometry as parameter (casted as string)
+SELECT 'TA14', encode(ST_AsMVT(q, 'test', 4096, 'geom'), 'base64') FROM
+(
+	SELECT geom, St_Expand(geom, 10) as other_geom FROM
+	(
+		SELECT 'SRID=3857;MULTILINESTRING((105209.784484008 5267849.91657293,102374.204885822 5266414.05020624,99717.9874419115 5267379.35282178,90157.5689699989 5266091.78724987,86186.0622890498 5271349.34154337,78713.0972659854 5272871.78773217,76281.8581230672 5277951.00736649,81783.372341432 5289800.59747023))'::geometry as geom
+	) _sq
+) AS q;
+
+-- Numeric: Currently being casted as strings
+SELECT 'TA15', encode(ST_AsMVT(q, 'test', 4096, 'geom'), 'base64') FROM
+(
+	SELECT 1::numeric AS c1, '12.232389283223239'::numeric AS c2,
+	       '1' AS cstring, ST_AsMVTGeom(ST_GeomFromText('POINT(25 17)'),
+			ST_MakeBox2D(ST_Point(0, 0), ST_Point(4096, 4096)), 4096, 0, false) AS geom
+) AS q;
+
 -- default values tests
 SELECT 'D1', encode(ST_AsMVT(q, 'test', 4096, 'geom'), 'base64') FROM (SELECT 1 AS c1, 'abcd'::text AS c2,
 	ST_AsMVTGeom(ST_GeomFromText('POINT(25 17)'),

Modified: trunk/regress/mvt_expected
===================================================================
--- trunk/regress/mvt_expected	2018-08-10 11:33:36 UTC (rev 16684)
+++ trunk/regress/mvt_expected	2018-08-10 15:57:40 UTC (rev 16685)
@@ -68,6 +68,23 @@
 AmMxGgJjMiICKAEiAigCIgIoAyiAIHgC
 TA9|0
 TA10|49
+TA11|GucCCgR0ZXN0Eg4SBAAAAQEYASIECTLePxoHY3N0cmluZxoFY3RleHQiCAoGQWJjRGZnIq8CCqwC
+TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdC4g
+UGhhc2VsbHVzIHNlZCBudWxsYSBhdWd1ZS4gUGVsbGVudGVzcXVlIHV0IHZ1bHB1dGF0ZSBleC4g
+TnVuYyBldCBvZGlvIHBsYWNlcmF0LCBsYWNpbmlhIGxlY3R1cyBzZWQsIGZlcm1lbnR1bSBzYXBp
+ZW4uIFNlZCBtYXNzYSB2ZWxpdCwgdWxsYW1jb3JwZXIgZXQgZXN0IHF1aXMsIGNvbmd1ZSByaG9u
+Y3VzIG9yY2kuIFN1c3BlbmRpc3NlIGluIGFudGUgdmFyaXVzLCBjb252YWxsaXMgZW5pbSB1dCwg
+ZmVybWVudHVtIGFtZXQuKIAgeAI=
+TA12|Gj4KBHRlc3QSDBICAAAYASIECTLePxIMEgIBARgBIgQJNNw/GgJjMRoCYzIiAigBIgkZAAAAAAAA
+AEAogCB4Ag==
+TA13|GiUKBHRlc3QSDBICAAAYASIECTLePxoCYzEaAmMyIgIoASiAIHgC
+TA14|GpACCgR0ZXN0Ei8SAgAAGAIiJwny6wyShoMFOqUstRbBKYoPr5UBjxSFPpRS4XTkF/8lsE/8VZK5
+ARoKb3RoZXJfZ2VvbSLFAQrCATAxMDMwMDAwMjAxMTBGMDAwMDAxMDAwMDAwMDUwMDAwMDBEOTQw
+REZCQUZEOUVGMjQwNDc0RDYyNzJBODE2NTQ0MUQ5NDBERkJBRkQ5RUYyNDBDN0YzM0NBNkQ0MkQ1
+NDQxNkExQTNGOEQzQ0IwRjk0MEM3RjMzQ0E2RDQyRDU0NDE2QTFBM0Y4RDNDQjBGOTQwNDc0RDYy
+NzJBODE2NTQ0MUQ5NDBERkJBRkQ5RUYyNDA0NzRENjI3MkE4MTY1NDQxKIAgeAI=
+TA15|GkkKBHRlc3QSEBIGAAABAQIAGAEiBAky3j8aAmMxGgJjMhoHY3N0cmluZyIDCgExIhQKEjEyLjIz
+MjM4OTI4MzIyMzIzOSiAIHgC
 D1|Gi8KBHRlc3QSDhIEAAABARgBIgQJMt4/GgJjMRoCYzIiAigBIgYKBGFiY2QogCB4Ag==
 D2|Gi8KBHRlc3QSDhIEAAABARgBIgQJMt4/GgJjMRoCYzIiAigBIgYKBGFiY2QogCB4Ag==
 D3|Gi8KBHRlc3QSDhIEAAABARgBIgQJMt4/GgJjMRoCYzIiAigBIgYKBGFiY2QogCB4Ag==

Modified: trunk/regress/mvt_jsonb.sql
===================================================================
--- trunk/regress/mvt_jsonb.sql	2018-08-10 11:33:36 UTC (rev 16684)
+++ trunk/regress/mvt_jsonb.sql	2018-08-10 15:57:40 UTC (rev 16685)
@@ -1,6 +1,13 @@
-SELECT 'TA9', encode(ST_AsMVT(q, 'test', 4096, 'geom'), 'base64') FROM (SELECT '{"c1":1,"c2":"abcd"}'::jsonb,
+SELECT 'J1', encode(ST_AsMVT(q, 'test', 4096, 'geom'), 'base64') FROM (SELECT '{"c1":1,"c2":"abcd"}'::jsonb,
     ST_AsMVTGeom(ST_GeomFromText('POINT(25 17)'),
     ST_MakeBox2D(ST_Point(0, 0), ST_Point(4096, 4096)), 4096, 0, false) AS geom) AS q;
-SELECT 'TA10', encode(ST_AsMVT(q, 'test', 4096, 'geom'), 'base64') FROM (SELECT '{"c1":"abcd", "c2":"abcd"}'::jsonb,
+SELECT 'J2', encode(ST_AsMVT(q, 'test', 4096, 'geom'), 'base64') FROM (SELECT '{"c1":"abcd", "c2":"abcd"}'::jsonb,
     ST_AsMVTGeom(ST_GeomFromText('POINT(25 17)'),
     ST_MakeBox2D(ST_Point(0, 0), ST_Point(4096, 4096)), 4096, 0, false) AS geom) AS q;
+SELECT 'J3', encode(ST_AsMVT(q, 'test', 4096, 'geom'), 'base64') FROM (
+    SELECT '{"c1":"abcd", "c2":"abcd"}'::jsonb,
+            ST_AsMVTGeom(ST_GeomFromText('POINT(25 17)'), ST_MakeBox2D(ST_Point(0, 0), ST_Point(4096, 4096)), 4096, 0, false) AS geom
+    UNION ALL
+    SELECT '{"c3":"abasdadcd", "c1":5}'::jsonb,
+            ST_AsMVTGeom(ST_GeomFromText('POINT(25 17)'), ST_MakeBox2D(ST_Point(0, 0), ST_Point(4096, 4096)), 4096, 0, false) AS geom
+) AS q;

Modified: trunk/regress/mvt_jsonb_expected
===================================================================
--- trunk/regress/mvt_jsonb_expected	2018-08-10 11:33:36 UTC (rev 16684)
+++ trunk/regress/mvt_jsonb_expected	2018-08-10 15:57:40 UTC (rev 16685)
@@ -1,2 +1,4 @@
-TA9|Gi8KBHRlc3QSDhIEAAABARgBIgQJMt4/GgJjMRoCYzIiAigBIgYKBGFiY2QogCB4Ag==
-TA10|GisKBHRlc3QSDhIEAAABABgBIgQJMt4/GgJjMRoCYzIiBgoEYWJjZCiAIHgC
+J1|Gi8KBHRlc3QSDhIEAAABARgBIgQJMt4/GgJjMRoCYzIiAigBIgYKBGFiY2QogCB4Ag==
+J2|GisKBHRlc3QSDhIEAAABABgBIgQJMt4/GgJjMRoCYzIiBgoEYWJjZCiAIHgC
+J3|GlAKBHRlc3QSDhIEAAABABgBIgQJMt4/Eg4SBAABAgIYASIECTLePxoCYzEaAmMyGgJjMyIGCgRh
+YmNkIgIoBSILCglhYmFzZGFkY2QogCB4Ag==



More information about the postgis-tickets mailing list