[postgis-tickets] r17446 - Fix selectivity issue with support functions, wherein... default

Paul Ramsey pramsey at cleverelephant.ca
Mon May 27 02:28:06 PDT 2019


Author: pramsey
Date: 2019-05-27 14:28:06 -0700 (Mon, 27 May 2019)
New Revision: 17446

Modified:
   trunk/postgis/geography_centroid.c
   trunk/postgis/geography_inout.c
   trunk/postgis/gserialized_estimate.c
   trunk/postgis/gserialized_supportfn.c
Log:
Fix selectivity issue with support functions, wherein... default 
selectivities were the only thing we were returning.
Closes #4404


Modified: trunk/postgis/geography_centroid.c
===================================================================
--- trunk/postgis/geography_centroid.c	2019-05-26 20:51:28 UTC (rev 17445)
+++ trunk/postgis/geography_centroid.c	2019-05-27 21:28:06 UTC (rev 17446)
@@ -60,7 +60,6 @@
 	int32_t srid;
 	bool use_spheroid = true;
 	SPHEROID s;
-	uint32_t type;
 
 	/* Get our geometry object loaded into memory. */
 	g = PG_GETARG_GSERIALIZED_P(0);
@@ -91,9 +90,7 @@
 	if ( ! use_spheroid )
 		s.a = s.b = s.radius;
 
-	type = gserialized_get_type(g);
-
-	switch (type)
+	switch (lwgeom_get_type(lwgeom))
 	{
 
 	case POINTTYPE:

Modified: trunk/postgis/geography_inout.c
===================================================================
--- trunk/postgis/geography_inout.c	2019-05-26 20:51:28 UTC (rev 17445)
+++ trunk/postgis/geography_inout.c	2019-05-27 21:28:06 UTC (rev 17446)
@@ -536,14 +536,12 @@
 PG_FUNCTION_INFO_V1(geography_from_geometry);
 Datum geography_from_geometry(PG_FUNCTION_ARGS)
 {
+	GSERIALIZED *g_ser = NULL;
 	GSERIALIZED *geom = PG_GETARG_GSERIALIZED_P_COPY(0);
-	LWGEOM *lwgeom = NULL;
-	GSERIALIZED *g_ser = NULL;
+	LWGEOM *lwgeom = lwgeom_from_gserialized(geom);
 
-	geography_valid_type(gserialized_get_type(geom));
+	geography_valid_type(lwgeom_get_type(lwgeom));
 
-	lwgeom = lwgeom_from_gserialized(geom);
-
 	/* Force default SRID */
 	if ( (int)lwgeom->srid <= 0 )
 	{

Modified: trunk/postgis/gserialized_estimate.c
===================================================================
--- trunk/postgis/gserialized_estimate.c	2019-05-26 20:51:28 UTC (rev 17445)
+++ trunk/postgis/gserialized_estimate.c	2019-05-27 21:28:06 UTC (rev 17446)
@@ -143,7 +143,11 @@
 static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type);
 static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type);
 
+/* Other prototypes */
+float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode);
+float8 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode);
 
+
 /* Old Prototype */
 Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
 
@@ -1259,49 +1263,25 @@
 	));
 }
 
-/**
-* Join selectivity of the && operator. The selectivity
-* is the ratio of the number of rows we think will be
-* returned divided the maximum number of rows the join
-* could possibly return (the full combinatoric join).
-*
-* joinsel = estimated_nrows / (totalrows1 * totalrows2)
-*/
-PG_FUNCTION_INFO_V1(gserialized_gist_joinsel);
-Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
+double
+gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode)
 {
-	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-	/* Oid operator = PG_GETARG_OID(1); */
-	List *args = (List *) PG_GETARG_POINTER(2);
-	JoinType jointype = (JoinType) PG_GETARG_INT16(3);
-	int mode = PG_GETARG_INT32(4);
-
-	Node *arg1, *arg2;
-	Var *var1, *var2;
+	float8 selectivity;
 	Oid relid1, relid2;
-
 	ND_STATS *stats1, *stats2;
-	float8 selectivity;
+	Node *arg1 = (Node*) linitial(args);
+	Node *arg2 = (Node*) lsecond(args);
+	Var *var1 = (Var*) arg1;
+	Var *var2 = (Var*) arg2;
 
-	/* Only respond to an inner join/unknown context join */
-	if (jointype != JOIN_INNER)
-	{
-		elog(DEBUG1, "%s: jointype %d not supported", __func__, jointype);
-		PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
-	}
+	elog(DEBUG2, "%s: entered function", __func__);
 
-	/* Find Oids of the geometry columns we are working with */
-	arg1 = (Node*) linitial(args);
-	arg2 = (Node*) lsecond(args);
-	var1 = (Var*) arg1;
-	var2 = (Var*) arg2;
-
 	/* We only do column joins right now, no functional joins */
 	/* TODO: handle g1 && ST_Expand(g2) */
 	if (!IsA(arg1, Var) || !IsA(arg2, Var))
 	{
 		elog(DEBUG1, "%s called with arguments that are not column references", __func__);
-		PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
+		return DEFAULT_ND_JOINSEL;
 	}
 
 	/* What are the Oids of our tables/relations? */
@@ -1308,36 +1288,68 @@
 	relid1 = rt_fetch(var1->varno, root->parse->rtable)->relid;
 	relid2 = rt_fetch(var2->varno, root->parse->rtable)->relid;
 
-	POSTGIS_DEBUGF(3, "using relations \"%s\" Oid(%d), \"%s\" Oid(%d)",
-	                 get_rel_name(relid1) ? get_rel_name(relid1) : "NULL", relid1, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL", relid2);
-
 	/* Pull the stats from the stats system. */
 	stats1 = pg_get_nd_stats(relid1, var1->varattno, mode, false);
 	stats2 = pg_get_nd_stats(relid2, var2->varattno, mode, false);
 
 	/* If we can't get stats, we have to stop here! */
-	if ( ! stats1 )
+	if (!stats1)
 	{
-		POSTGIS_DEBUGF(3, "unable to retrieve stats for \"%s\" Oid(%d)", get_rel_name(relid1) ? get_rel_name(relid1) : "NULL" , relid1);
-		PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
+		elog(DEBUG2, "%s: cannot find stats for \"%s\"",  __func__, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL");
+		return DEFAULT_ND_JOINSEL;
 	}
-	else if ( ! stats2 )
+	else if (!stats2)
 	{
-		POSTGIS_DEBUGF(3, "unable to retrieve stats for \"%s\" Oid(%d)", get_rel_name(relid2) ? get_rel_name(relid2) : "NULL", relid2);
-		PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
+		elog(DEBUG2, "%s: cannot find stats for \"%s\"",  __func__, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL");
+		return DEFAULT_ND_JOINSEL;
 	}
 
 	selectivity = estimate_join_selectivity(stats1, stats2);
-	POSTGIS_DEBUGF(2, "got selectivity %g", selectivity);
-
+	elog(DEBUG2, "got selectivity %g", selectivity);
 	pfree(stats1);
 	pfree(stats2);
-	PG_RETURN_FLOAT8(selectivity);
+	return selectivity;
 }
 
+/**
+* Join selectivity of the && operator. The selectivity
+* is the ratio of the number of rows we think will be
+* returned divided the maximum number of rows the join
+* could possibly return (the full combinatoric join).
+*
+* joinsel = estimated_nrows / (totalrows1 * totalrows2)
+*/
+PG_FUNCTION_INFO_V1(gserialized_gist_joinsel);
+Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
+{
+	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+	/* Oid operator = PG_GETARG_OID(1); */
+	List *args = (List *) PG_GETARG_POINTER(2);
+	JoinType jointype = (JoinType) PG_GETARG_INT16(3);
+	int mode = PG_GETARG_INT32(4);
 
+	elog(DEBUG2, "%s: entered function", __func__);
 
+	/* Check length of args and punt on > 2 */
+	if (list_length(args) != 2)
+	{
+		elog(DEBUG2, "%s: got nargs == %d", __func__, list_length(args));
+		PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
+	}
 
+	/* Only respond to an inner join/unknown context join */
+	if (jointype != JOIN_INNER)
+	{
+		elog(DEBUG1, "%s: jointype %d not supported", __func__, jointype);
+		PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
+	}
+
+	PG_RETURN_FLOAT8(gserialized_joinsel_internal(root, args, jointype, mode));
+}
+
+
+
+
 /**
  * The gserialized_analyze_nd sets this function as a
  * callback on the stats object when called by the ANALYZE
@@ -2208,6 +2220,7 @@
 	));
 }
 
+
 /**
  * This function should return an estimation of the number of
  * rows returned by a query involving an overlap check
@@ -2221,87 +2234,63 @@
  * and invoke the work-horse.
  *
  */
-PG_FUNCTION_INFO_V1(gserialized_gist_sel);
-Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
+
+float8
+gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode)
 {
-	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
-	/* Oid operator_oid = PG_GETARG_OID(1); */
-	List *args = (List *) PG_GETARG_POINTER(2);
-	/* int varRelid = PG_GETARG_INT32(3); */
-	int mode = PG_GETARG_INT32(4);
-
 	VariableStatData vardata;
+	Node *other = NULL;
+	bool varonleft;
 	ND_STATS *nd_stats = NULL;
 
-	Node *other;
-	Var *self;
 	GBOX search_box;
 	float8 selectivity = 0;
 
-	POSTGIS_DEBUG(2, "gserialized_gist_sel called");
+	elog(DEBUG2, "%s: entered function", __func__);
 
-	/*
-	 * TODO: This is a big one,
-	 * All this statistics code *only* tries to generate a valid
-	 * selectivity for && and &&&. That leaves all the other
-	 * geometry operators with bad stats! The selectivity
-	 * calculation should take account of the incoming operator
-	 * type and do the right thing.
-	 */
-
-	/* Fail if not a binary opclause (probably shouldn't happen) */
-	if (list_length(args) != 2)
+	if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft))
 	{
-		POSTGIS_DEBUG(3, "gserialized_gist_sel: not a binary opclause");
-		PG_RETURN_FLOAT8(DEFAULT_ND_SEL);
+		elog(DEBUG2, "%s: could not find vardata", __func__);
+		return DEFAULT_ND_SEL;
 	}
 
-	/* Find the constant part */
-	other = (Node *) linitial(args);
-	if ( ! IsA(other, Const) )
+	if (!IsA(other, Const))
 	{
-		self = (Var *)other;
-		other = (Node *) lsecond(args);
+		ReleaseVariableStats(vardata);
+		elog(DEBUG2, "%s: no constant argument, returning default selectivity %g", __func__, DEFAULT_ND_SEL);
+		return DEFAULT_ND_SEL;
 	}
-	else
-	{
-		self = (Var *) lsecond(args);
-	}
 
-	if ( ! IsA(other, Const) )
+	if (!gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box))
 	{
-		POSTGIS_DEBUG(3, " no constant arguments - returning a default selectivity");
-		PG_RETURN_FLOAT8(DEFAULT_ND_SEL);
+		ReleaseVariableStats(vardata);
+		elog(DEBUG2, "%s: search box is EMPTY", __func__);
+		return 0.0;
 	}
 
-	/* Convert the constant to a BOX */
-	if( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) )
+	if (!vardata.statsTuple)
 	{
-		POSTGIS_DEBUG(3, "search box is EMPTY");
-		PG_RETURN_FLOAT8(0.0);
+		elog(DEBUG1, "%s: no statistics available on table, run ANALYZE", __func__);
+		return DEFAULT_ND_SEL;
 	}
-	POSTGIS_DEBUGF(4, " requested search box is: %s", gbox_to_string(&search_box));
 
-	/* Get pg_statistic row */
-	examine_variable(root, (Node*)self, 0, &vardata);
-	if ( vardata.statsTuple ) {
-		nd_stats = pg_nd_stats_from_tuple(vardata.statsTuple, mode);
-	}
+	nd_stats = pg_nd_stats_from_tuple(vardata.statsTuple, mode);
 	ReleaseVariableStats(vardata);
-
-	if ( ! nd_stats )
-	{
-		POSTGIS_DEBUG(3, " unable to load stats from syscache, not analyzed yet?");
-		PG_RETURN_FLOAT8(FALLBACK_ND_SEL);
-	}
-
-	POSTGIS_DEBUGF(4, " got stats:\n%s", nd_stats_to_json(nd_stats));
-
-	/* Do the estimation! */
 	selectivity = estimate_selectivity(&search_box, nd_stats, mode);
-	POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity);
+	pfree(nd_stats);
+	return selectivity;
+}
 
-	pfree(nd_stats);
+PG_FUNCTION_INFO_V1(gserialized_gist_sel);
+Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
+{
+	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+	// Oid operator_oid = PG_GETARG_OID(1);
+	List *args = (List *) PG_GETARG_POINTER(2);
+	int varRelid = PG_GETARG_INT32(3);
+	int mode = PG_GETARG_INT32(4);
+	float8 selectivity = gserialized_sel_internal(root, args, varRelid, mode);
+	elog(DEBUG2, "%s: selectivity is %g", __func__, selectivity);
 	PG_RETURN_FLOAT8(selectivity);
 }
 

Modified: trunk/postgis/gserialized_supportfn.c
===================================================================
--- trunk/postgis/gserialized_supportfn.c	2019-05-26 20:51:28 UTC (rev 17445)
+++ trunk/postgis/gserialized_supportfn.c	2019-05-27 21:28:06 UTC (rev 17446)
@@ -46,10 +46,14 @@
 /* PostGIS */
 #include "liblwgeom.h"
 
-/* Prototypes */
+/* Local prototypes */
 Datum postgis_index_supportfn(PG_FUNCTION_ARGS);
 
+/* From gserialized_estimate.c */
+float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode);
+float8 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode);
 
+
 /*
 * Depending on the function, we will deploy different
 * index enhancement strategies. Containment functions
@@ -198,6 +202,22 @@
 	Node *rawreq = (Node *) PG_GETARG_POINTER(0);
 	Node *ret = NULL;
 
+	if (IsA(rawreq, SupportRequestSelectivity))
+	{
+		SupportRequestSelectivity *req = (SupportRequestSelectivity *) rawreq;
+
+		if (req->is_join)
+		{
+			req->selectivity = gserialized_joinsel_internal(req->root, req->args, req->jointype, 2);
+		}
+		else
+		{
+			req->selectivity = gserialized_sel_internal(req->root, req->args, req->varRelid, 2);
+		}
+		elog(DEBUG2, "%s: got selectivity %g", __func__, req->selectivity);
+		PG_RETURN_POINTER(req);
+	}
+
 	/*
 	* This support function is strictly for adding spatial index
 	* support.



More information about the postgis-tickets mailing list