[postgis-tickets] r15385 - ST_AsMVT parse JSONB into keys/values

bjorn at wololo.org bjorn at wololo.org
Wed May 3 15:32:09 PDT 2017


Author: bjornharrtell
Date: 2017-05-03 15:32:09 -0700 (Wed, 03 May 2017)
New Revision: 15385

Modified:
   trunk/postgis/mvt.c
   trunk/postgis/mvt.h
   trunk/regress/mvt.sql
   trunk/regress/mvt_expected
Log:
ST_AsMVT parse JSONB into keys/values
References #3749

Modified: trunk/postgis/mvt.c
===================================================================
--- trunk/postgis/mvt.c	2017-05-03 13:38:11 UTC (rev 15384)
+++ trunk/postgis/mvt.c	2017-05-03 22:32:09 UTC (rev 15385)
@@ -42,6 +42,12 @@
 	MVT_RING = 3
 };
 
+struct mvt_kv_key {
+	char *name;
+	uint32_t id;
+	UT_hash_handle hh;
+};
+
 struct mvt_kv_string_value {
 	char *string_value;
 	uint32_t id;
@@ -265,15 +271,38 @@
 	return tupdesc;
 }
 
-static void encode_keys(struct mvt_agg_context *ctx)
+static uint32_t get_key_index(struct mvt_agg_context *ctx, char *name)
 {
-	POSTGIS_DEBUG(2, "encode_keys called");
+	struct mvt_kv_key *kv;
+	size_t size = strlen(name);
+	HASH_FIND(hh, ctx->keys_hash, name, size, kv);
+	if (!kv)
+		return -1;
+	return kv->id;
+}
+
+static uint32_t add_key(struct mvt_agg_context *ctx, char *name)
+{
+	struct mvt_kv_key *kv;
+	kv = palloc(sizeof(*kv));
+	size_t size = strlen(name);
+	kv->id = ctx->keys_hash_i++;
+	kv->name = name;
+	HASH_ADD_KEYPTR(hh, ctx->keys_hash, name, size, kv);
+	return kv->id;
+}
+
+static void parse_column_keys(struct mvt_agg_context *ctx)
+{
+	POSTGIS_DEBUG(2, "parse_column_keys called");
 	TupleDesc tupdesc = get_tuple_desc(ctx);
 	int natts = tupdesc->natts;
-	char **keys = palloc(natts * sizeof(*keys));
-	uint32_t i, k = 0;
+	uint32_t i;
 	bool geom_name_found = false;
 	for (i = 0; i < natts; i++) {
+		Oid typoid = getBaseType(tupdesc->attrs[i]->atttypid);
+		if (typoid == JSONBOID)
+			continue;
 		char *tkey = tupdesc->attrs[i]->attname.data;
 		char *key = palloc(strlen(tkey) + 1);
 		strcpy(key, tkey);
@@ -282,15 +311,26 @@
 			geom_name_found = 1;
 			continue;
 		}
-		keys[k++] = key;
+		add_key(ctx, key);
 	}
 	if (!geom_name_found)
-		lwerror("encode_keys: no column '%s' found", ctx->geom_name);
-	ctx->layer->n_keys = k;
-	ctx->layer->keys = keys;
+		lwerror("parse_column_keys: no column '%s' found", ctx->geom_name);
 	ReleaseTupleDesc(tupdesc);
 }
 
+static void encode_keys(struct mvt_agg_context *ctx) {
+	struct mvt_kv_key *kv;
+	size_t n_keys = ctx->keys_hash_i;
+	char **keys = palloc(n_keys * sizeof(*keys));
+	for (kv = ctx->keys_hash; kv != NULL; kv=kv->hh.next) {
+		keys[kv->id] = kv->name;
+	}
+	ctx->layer->n_keys = n_keys;
+	ctx->layer->keys = keys;
+
+	HASH_CLEAR(hh, ctx->keys_hash);
+}
+
 static VectorTile__Tile__Value *create_value() {
 	VectorTile__Tile__Value *value = palloc(sizeof(*value));
 	vector_tile__tile__value__init(value);
@@ -351,24 +391,18 @@
 	if (!kv) { \
 		POSTGIS_DEBUG(4, "MVT_PARSE_VALUE value not found"); \
 		kv = palloc(sizeof(*kv)); \
-		POSTGIS_DEBUGF(4, "MVT_PARSE_VALUE new hash key: %d", ctx->values_hash_i); \
+		POSTGIS_DEBUGF(4, "MVT_PARSE_VALUE new hash key: %d", \
+			ctx->values_hash_i); \
 		kv->id = ctx->values_hash_i++; \
 		kv->valuefield = value; \
 		HASH_ADD(hh, ctx->hash, valuefield, size, kv); \
 	} \
-	tags[c*2] = k - 1; \
-	tags[c*2+1] = kv->id; \
+	tags[ctx->c*2] = k; \
+	tags[ctx->c*2+1] = kv->id; \
 }
 
-#define MVT_PARSE_DATUM(type, kvtype, hash, valuefield, datumfunc, size) \
+#define MVT_PARSE_INT_VALUE(value) \
 { \
-	type value = datumfunc(datum); \
-	MVT_PARSE_VALUE(value, kvtype, hash, valuefield, size); \
-}
-
-#define MVT_PARSE_INT_DATUM(type, datumfunc) \
-{ \
-	type value = datumfunc(datum); \
 	if (value >= 0) { \
 		uint64_t cvalue = value; \
 		MVT_PARSE_VALUE(cvalue, mvt_kv_uint_value, \
@@ -382,52 +416,147 @@
 	} \
 }
 
-static void parse_value_as_string(struct mvt_agg_context *ctx, Oid typoid,
-	Datum datum, uint32_t *tags, uint32_t c, uint32_t k)
+#define MVT_PARSE_DATUM(type, kvtype, hash, valuefield, datumfunc, size) \
+{ \
+	type value = datumfunc(datum); \
+	MVT_PARSE_VALUE(value, kvtype, hash, valuefield, size); \
+}
+
+#define MVT_PARSE_INT_DATUM(type, datumfunc) \
+{ \
+	type value = datumfunc(datum); \
+	MVT_PARSE_INT_VALUE(value); \
+}
+
+static void add_value_as_string(struct mvt_agg_context *ctx,
+	char *value, uint32_t *tags, uint32_t k)
 {
-	POSTGIS_DEBUG(2, "parse_value_as_string called");
+	POSTGIS_DEBUG(2, "add_value_as_string called");
 	struct mvt_kv_string_value *kv;
-	Oid foutoid;
-	bool typisvarlena;
-	getTypeOutputInfo(typoid, &foutoid, &typisvarlena);
-	char *value = OidOutputFunctionCall(foutoid, datum);
-	POSTGIS_DEBUGF(4, "parse_value_as_string value: %s", value);
 	size_t size = strlen(value);
 	HASH_FIND(hh, ctx->string_values_hash, value, size, kv);
 	if (!kv) {
-		POSTGIS_DEBUG(4, "parse_value_as_string value not found");
+		POSTGIS_DEBUG(4, "add_value_as_string value not found");
 		kv = palloc(sizeof(*kv));
-		POSTGIS_DEBUGF(4, "parse_value_as_string new hash key: %d", ctx->values_hash_i);
+		POSTGIS_DEBUGF(4, "add_value_as_string new hash key: %d",
+			ctx->values_hash_i);
 		kv->id = ctx->values_hash_i++;
 		kv->string_value = value;
-		HASH_ADD_KEYPTR(hh, ctx->string_values_hash, kv->string_value, size, kv);
+		HASH_ADD_KEYPTR(hh, ctx->string_values_hash, kv->string_value,
+			size, kv);
 	}
-	tags[c*2] = k - 1;
-	tags[c*2+1] = kv->id;
+	tags[ctx->c*2] = k;
+	tags[ctx->c*2+1] = kv->id;
 }
 
+static void parse_datum_as_string(struct mvt_agg_context *ctx, Oid typoid,
+	Datum datum, uint32_t *tags, uint32_t k)
+{
+	POSTGIS_DEBUG(2, "parse_value_as_string called");
+	struct mvt_kv_string_value *kv;
+	Oid foutoid;
+	bool typisvarlena;
+	getTypeOutputInfo(typoid, &foutoid, &typisvarlena);
+	char *value = OidOutputFunctionCall(foutoid, datum);
+	POSTGIS_DEBUGF(4, "parse_value_as_string value: %s", value);
+	add_value_as_string(ctx, value, tags, k);
+}
+
+static uint32_t *parse_jsonb(struct mvt_agg_context *ctx, Jsonb *jb,
+	uint32_t *tags)
+{
+	JsonbIterator *it;
+	JsonbValue v;
+	bool skipNested = false;
+	JsonbIteratorToken r;
+	uint32_t k;
+
+	if (!JB_ROOT_IS_OBJECT(jb))
+		return tags;
+
+	it = JsonbIteratorInit(&jb->root);
+
+	while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE) {
+		skipNested = true;
+
+		if (r == WJB_KEY && v.type != jbvNull) {
+			char *key;
+			key = palloc(v.val.string.len + 1 * sizeof(char));
+			memcpy(key, v.val.string.val, v.val.string.len);
+			key[v.val.string.len] = '\0';
+
+			k = get_key_index(ctx, key);
+			if (k == -1) {
+				uint32_t newSize = ctx->keys_hash_i + 1;
+				tags = repalloc(tags, newSize * 2 * sizeof(*tags));
+				k = add_key(ctx, key);
+			}
+
+			r = JsonbIteratorNext(&it, &v, skipNested);
+
+			if (v.type == jbvString) {
+				char *value;
+				value = palloc(v.val.string.len + 1 * sizeof(char));
+				memcpy(value, v.val.string.val, v.val.string.len);
+				value[v.val.string.len] = '\0';
+				add_value_as_string(ctx, value, tags, k);
+				ctx->c++;
+			} else if (v.type == jbvBool) {
+				MVT_PARSE_VALUE(v.val.boolean, mvt_kv_bool_value,
+					bool_values_hash, bool_value, sizeof(protobuf_c_boolean));
+				ctx->c++;
+			} else if (v.type == jbvNumeric) {
+				char *str;
+				str = DatumGetCString(DirectFunctionCall1(numeric_out,
+					PointerGetDatum(v.val.numeric)));
+				double d = strtod(str, NULL);
+				long l = strtol(str, NULL, 10);
+				if ((long) d != l) {
+					MVT_PARSE_VALUE(d, mvt_kv_double_value, double_values_hash,
+						double_value, sizeof(double));
+				} else {
+					MVT_PARSE_INT_VALUE(l);
+				}
+				ctx->c++;
+			}
+		}
+	}
+
+	return tags;
+}
+
 static void parse_values(struct mvt_agg_context *ctx)
 {
 	POSTGIS_DEBUG(2, "parse_values called");
-	uint32_t n_keys = ctx->layer->n_keys;
+	uint32_t n_keys = ctx->keys_hash_i;
 	uint32_t *tags = palloc(n_keys * 2 * sizeof(*tags));
 	bool isnull;
-	uint32_t i, k = 0, c = 0;
+	uint32_t i, k;
 	TupleDesc tupdesc = get_tuple_desc(ctx);
 	int natts = tupdesc->natts;
+	ctx->c = 0;
 
 	POSTGIS_DEBUGF(3, "parse_values natts: %d", natts);
 
 	for (i = 0; i < natts; i++) {
 		if (i == ctx->geom_index)
 			continue;
-		k++;
+
+		char *key = tupdesc->attrs[i]->attname.data;
 		Datum datum = GetAttributeByNum(ctx->row, i+1, &isnull);
+		Oid typoid = getBaseType(tupdesc->attrs[i]->atttypid);
+		k = get_key_index(ctx, key);
+		if (k == -1 && typoid != JSONBOID)
+			lwerror("parse_values: unexpectedly could not find parsed key name",
+				key);
 		if (isnull) {
 			POSTGIS_DEBUG(3, "parse_values isnull detected");
 			continue;
 		}
-		Oid typoid = getBaseType(tupdesc->attrs[i]->atttypid);
+		if (typoid == JSONBOID) {
+			tags = parse_jsonb(ctx, DatumGetJsonb(datum), tags);
+			continue;
+		}
 		switch (typoid) {
 		case BOOLOID:
 			MVT_PARSE_DATUM(protobuf_c_boolean, mvt_kv_bool_value,
@@ -454,20 +583,19 @@
 				DatumGetFloat8, sizeof(double));
 			break;
 		default:
-			parse_value_as_string(ctx, typoid, datum, tags, c, k);
+			parse_datum_as_string(ctx, typoid, datum, tags, k);
 			break;
 		}
-		c++;
+		ctx->c++;
 	}
 
 	ReleaseTupleDesc(tupdesc);
 
-	ctx->feature->n_tags = c * 2;
+	ctx->feature->n_tags = ctx->c * 2;
 	ctx->feature->tags = tags;
 
 	POSTGIS_DEBUGF(3, "parse_values n_tags %d", ctx->feature->n_tags);
 }
-
 static int max_type(LWCOLLECTION *lwcoll)
 {
 	int i, max = POINTTYPE;
@@ -585,6 +713,7 @@
 		lwerror("mvt_agg_init_context: extent cannot be 0");
 
 	ctx->features_capacity = FEATURES_CAPACITY_INITIAL;
+	ctx->keys_hash = NULL;
 	ctx->string_values_hash = NULL;
 	ctx->float_values_hash = NULL;
 	ctx->double_values_hash = NULL;
@@ -592,6 +721,7 @@
 	ctx->sint_values_hash = NULL;
 	ctx->bool_values_hash = NULL;
 	ctx->values_hash_i = 0;
+	ctx->keys_hash_i = 0;
 
 	layer = palloc(sizeof(*layer));
 	vector_tile__tile__layer__init(layer);
@@ -631,7 +761,7 @@
 
 	ctx->feature = feature;
 	if (layer->n_features == 0)
-		encode_keys(ctx);
+		parse_column_keys(ctx);
 
 	bool isnull;
 	Datum datum = GetAttributeByNum(ctx->row, ctx->geom_index + 1, &isnull);
@@ -640,7 +770,8 @@
 	GSERIALIZED *gs = (GSERIALIZED *) PG_DETOAST_DATUM(datum);
 	LWGEOM *lwgeom = lwgeom_from_gserialized(gs);
 
-	POSTGIS_DEBUGF(3, "mvt_agg_transfn encoded feature count: %d", layer->n_features);
+	POSTGIS_DEBUGF(3, "mvt_agg_transfn encoded feature count: %d",
+		layer->n_features);
 	layer->features[layer->n_features++] = feature;
 
 	encode_geometry(ctx, lwgeom);
@@ -659,6 +790,7 @@
 {
 	POSTGIS_DEBUG(2, "mvt_agg_finalfn called");
 
+	encode_keys(ctx);
 	encode_values(ctx);
 
 	VectorTile__Tile__Layer *layers[1];

Modified: trunk/postgis/mvt.h
===================================================================
--- trunk/postgis/mvt.h	2017-05-03 13:38:11 UTC (rev 15384)
+++ trunk/postgis/mvt.h	2017-05-03 22:32:09 UTC (rev 15385)
@@ -31,6 +31,7 @@
 #include "utils/array.h"
 #include "utils/typcache.h"
 #include "utils/lsyscache.h"
+#include "utils/jsonb.h"
 #include "catalog/pg_type.h"
 #include "executor/executor.h"
 #include "access/htup_details.h"
@@ -54,6 +55,7 @@
 	VectorTile__Tile__Feature *feature;
 	VectorTile__Tile__Layer *layer;
 	size_t features_capacity;
+	struct mvt_kv_key *keys_hash;
 	struct mvt_kv_string_value *string_values_hash;
 	struct mvt_kv_float_value *float_values_hash;
 	struct mvt_kv_double_value *double_values_hash;
@@ -61,6 +63,8 @@
 	struct mvt_kv_sint_value *sint_values_hash;
 	struct mvt_kv_bool_value *bool_values_hash;
 	uint32_t values_hash_i;
+	uint32_t keys_hash_i;
+	uint32_t c;
 } ;
 
 LWGEOM *mvt_geom(LWGEOM *geom, GBOX *bounds, uint32_t extent, uint32_t buffer,

Modified: trunk/regress/mvt.sql
===================================================================
--- trunk/regress/mvt.sql	2017-05-03 13:38:11 UTC (rev 15384)
+++ trunk/regress/mvt.sql	2017-05-03 22:32:09 UTC (rev 15385)
@@ -109,6 +109,12 @@
     UNION
     SELECT 2::int AS c1, ST_AsMVTGeom(ST_GeomFromText('POINT(26 18)'),
     ST_MakeBox2D(ST_Point(0, 0), ST_Point(4096, 4096)), 4096, 0, false) AS geom) AS q;
+SELECT 'TA9', encode(ST_AsMVT('test', 4096, 'geom', q), 'base64') FROM (SELECT '{"c1":1,"c2":"abcd"}'::jsonb,
+    ST_AsMVTGeom(ST_GeomFromText('POINT(25 17)'),
+    ST_MakeBox2D(ST_Point(0, 0), ST_Point(4096, 4096)), 4096, 0, false) AS geom) AS q;
+SELECT 'TA10', encode(ST_AsMVT('test', 4096, 'geom', q), 'base64') FROM (SELECT '{"c1":"abcd", "c2":"abcd"}'::jsonb,
+    ST_AsMVTGeom(ST_GeomFromText('POINT(25 17)'),
+    ST_MakeBox2D(ST_Point(0, 0), ST_Point(4096, 4096)), 4096, 0, false) AS geom) AS q;
 
 -- unsupported input
 SELECT 'TU2';

Modified: trunk/regress/mvt_expected
===================================================================
--- trunk/regress/mvt_expected	2017-05-03 13:38:11 UTC (rev 15384)
+++ trunk/regress/mvt_expected	2017-05-03 22:32:09 UTC (rev 15385)
@@ -28,6 +28,8 @@
 CW90aGVydGVzdCIGCgR0ZXN0KIAgeAI=
 TA8|GkEKBHRlc3QSDBICAAAYASIECTLePxIMEgIAABgBIgQJNNw/EgwSAgABGAEiBAk03D8aAmMxIgIo
 ASICKAIogCB4Ag==
+TA9|Gi8KBHRlc3QSDhIEAAABARgBIgQJMt4/GgJjMRoCYzIiAigBIgYKBGFiY2QogCB4Ag==
+TA10|GisKBHRlc3QSDhIEAAABABgBIgQJMt4/GgJjMRoCYzIiBgoEYWJjZCiAIHgC
 TU2
 ERROR:  pgis_asmvt_transfn: parameter row cannot be other than a rowtype
 TU3



More information about the postgis-tickets mailing list