[postgis-tickets] r17446 - Fix selectivity issue with support functions, wherein... default
Paul Ramsey
pramsey at cleverelephant.ca
Mon May 27 02:28:06 PDT 2019
Author: pramsey
Date: 2019-05-27 14:28:06 -0700 (Mon, 27 May 2019)
New Revision: 17446
Modified:
trunk/postgis/geography_centroid.c
trunk/postgis/geography_inout.c
trunk/postgis/gserialized_estimate.c
trunk/postgis/gserialized_supportfn.c
Log:
Fix selectivity issue with support functions, wherein... default
selectivities were the only thing we were returning.
Closes #4404
Modified: trunk/postgis/geography_centroid.c
===================================================================
--- trunk/postgis/geography_centroid.c 2019-05-26 20:51:28 UTC (rev 17445)
+++ trunk/postgis/geography_centroid.c 2019-05-27 21:28:06 UTC (rev 17446)
@@ -60,7 +60,6 @@
int32_t srid;
bool use_spheroid = true;
SPHEROID s;
- uint32_t type;
/* Get our geometry object loaded into memory. */
g = PG_GETARG_GSERIALIZED_P(0);
@@ -91,9 +90,7 @@
if ( ! use_spheroid )
s.a = s.b = s.radius;
- type = gserialized_get_type(g);
-
- switch (type)
+ switch (lwgeom_get_type(lwgeom))
{
case POINTTYPE:
Modified: trunk/postgis/geography_inout.c
===================================================================
--- trunk/postgis/geography_inout.c 2019-05-26 20:51:28 UTC (rev 17445)
+++ trunk/postgis/geography_inout.c 2019-05-27 21:28:06 UTC (rev 17446)
@@ -536,14 +536,12 @@
PG_FUNCTION_INFO_V1(geography_from_geometry);
Datum geography_from_geometry(PG_FUNCTION_ARGS)
{
+ GSERIALIZED *g_ser = NULL;
GSERIALIZED *geom = PG_GETARG_GSERIALIZED_P_COPY(0);
- LWGEOM *lwgeom = NULL;
- GSERIALIZED *g_ser = NULL;
+ LWGEOM *lwgeom = lwgeom_from_gserialized(geom);
- geography_valid_type(gserialized_get_type(geom));
+ geography_valid_type(lwgeom_get_type(lwgeom));
- lwgeom = lwgeom_from_gserialized(geom);
-
/* Force default SRID */
if ( (int)lwgeom->srid <= 0 )
{
Modified: trunk/postgis/gserialized_estimate.c
===================================================================
--- trunk/postgis/gserialized_estimate.c 2019-05-26 20:51:28 UTC (rev 17445)
+++ trunk/postgis/gserialized_estimate.c 2019-05-27 21:28:06 UTC (rev 17446)
@@ -143,7 +143,11 @@
static Oid table_get_spatial_index(Oid tbl_oid, text *col, int *key_type);
static GBOX * spatial_index_read_extent(Oid idx_oid, int key_type);
+/* Other prototypes */
+float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode);
+float8 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode);
+
/* Old Prototype */
Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
@@ -1259,49 +1263,25 @@
));
}
-/**
-* Join selectivity of the && operator. The selectivity
-* is the ratio of the number of rows we think will be
-* returned divided the maximum number of rows the join
-* could possibly return (the full combinatoric join).
-*
-* joinsel = estimated_nrows / (totalrows1 * totalrows2)
-*/
-PG_FUNCTION_INFO_V1(gserialized_gist_joinsel);
-Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
+double
+gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- /* Oid operator = PG_GETARG_OID(1); */
- List *args = (List *) PG_GETARG_POINTER(2);
- JoinType jointype = (JoinType) PG_GETARG_INT16(3);
- int mode = PG_GETARG_INT32(4);
-
- Node *arg1, *arg2;
- Var *var1, *var2;
+ float8 selectivity;
Oid relid1, relid2;
-
ND_STATS *stats1, *stats2;
- float8 selectivity;
+ Node *arg1 = (Node*) linitial(args);
+ Node *arg2 = (Node*) lsecond(args);
+ Var *var1 = (Var*) arg1;
+ Var *var2 = (Var*) arg2;
- /* Only respond to an inner join/unknown context join */
- if (jointype != JOIN_INNER)
- {
- elog(DEBUG1, "%s: jointype %d not supported", __func__, jointype);
- PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
- }
+ elog(DEBUG2, "%s: entered function", __func__);
- /* Find Oids of the geometry columns we are working with */
- arg1 = (Node*) linitial(args);
- arg2 = (Node*) lsecond(args);
- var1 = (Var*) arg1;
- var2 = (Var*) arg2;
-
/* We only do column joins right now, no functional joins */
/* TODO: handle g1 && ST_Expand(g2) */
if (!IsA(arg1, Var) || !IsA(arg2, Var))
{
elog(DEBUG1, "%s called with arguments that are not column references", __func__);
- PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
+ return DEFAULT_ND_JOINSEL;
}
/* What are the Oids of our tables/relations? */
@@ -1308,36 +1288,68 @@
relid1 = rt_fetch(var1->varno, root->parse->rtable)->relid;
relid2 = rt_fetch(var2->varno, root->parse->rtable)->relid;
- POSTGIS_DEBUGF(3, "using relations \"%s\" Oid(%d), \"%s\" Oid(%d)",
- get_rel_name(relid1) ? get_rel_name(relid1) : "NULL", relid1, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL", relid2);
-
/* Pull the stats from the stats system. */
stats1 = pg_get_nd_stats(relid1, var1->varattno, mode, false);
stats2 = pg_get_nd_stats(relid2, var2->varattno, mode, false);
/* If we can't get stats, we have to stop here! */
- if ( ! stats1 )
+ if (!stats1)
{
- POSTGIS_DEBUGF(3, "unable to retrieve stats for \"%s\" Oid(%d)", get_rel_name(relid1) ? get_rel_name(relid1) : "NULL" , relid1);
- PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
+ elog(DEBUG2, "%s: cannot find stats for \"%s\"", __func__, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL");
+ return DEFAULT_ND_JOINSEL;
}
- else if ( ! stats2 )
+ else if (!stats2)
{
- POSTGIS_DEBUGF(3, "unable to retrieve stats for \"%s\" Oid(%d)", get_rel_name(relid2) ? get_rel_name(relid2) : "NULL", relid2);
- PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
+ elog(DEBUG2, "%s: cannot find stats for \"%s\"", __func__, get_rel_name(relid2) ? get_rel_name(relid2) : "NULL");
+ return DEFAULT_ND_JOINSEL;
}
selectivity = estimate_join_selectivity(stats1, stats2);
- POSTGIS_DEBUGF(2, "got selectivity %g", selectivity);
-
+ elog(DEBUG2, "got selectivity %g", selectivity);
pfree(stats1);
pfree(stats2);
- PG_RETURN_FLOAT8(selectivity);
+ return selectivity;
}
+/**
+* Join selectivity of the && operator. The selectivity
+* is the ratio of the number of rows we think will be
+* returned divided the maximum number of rows the join
+* could possibly return (the full combinatoric join).
+*
+* joinsel = estimated_nrows / (totalrows1 * totalrows2)
+*/
+PG_FUNCTION_INFO_V1(gserialized_gist_joinsel);
+Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ /* Oid operator = PG_GETARG_OID(1); */
+ List *args = (List *) PG_GETARG_POINTER(2);
+ JoinType jointype = (JoinType) PG_GETARG_INT16(3);
+ int mode = PG_GETARG_INT32(4);
+ elog(DEBUG2, "%s: entered function", __func__);
+ /* Check length of args and punt on > 2 */
+ if (list_length(args) != 2)
+ {
+ elog(DEBUG2, "%s: got nargs == %d", __func__, list_length(args));
+ PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
+ }
+ /* Only respond to an inner join/unknown context join */
+ if (jointype != JOIN_INNER)
+ {
+ elog(DEBUG1, "%s: jointype %d not supported", __func__, jointype);
+ PG_RETURN_FLOAT8(DEFAULT_ND_JOINSEL);
+ }
+
+ PG_RETURN_FLOAT8(gserialized_joinsel_internal(root, args, jointype, mode));
+}
+
+
+
+
/**
* The gserialized_analyze_nd sets this function as a
* callback on the stats object when called by the ANALYZE
@@ -2208,6 +2220,7 @@
));
}
+
/**
* This function should return an estimation of the number of
* rows returned by a query involving an overlap check
@@ -2221,87 +2234,63 @@
* and invoke the work-horse.
*
*/
-PG_FUNCTION_INFO_V1(gserialized_gist_sel);
-Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
+
+float8
+gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
- /* Oid operator_oid = PG_GETARG_OID(1); */
- List *args = (List *) PG_GETARG_POINTER(2);
- /* int varRelid = PG_GETARG_INT32(3); */
- int mode = PG_GETARG_INT32(4);
-
VariableStatData vardata;
+ Node *other = NULL;
+ bool varonleft;
ND_STATS *nd_stats = NULL;
- Node *other;
- Var *self;
GBOX search_box;
float8 selectivity = 0;
- POSTGIS_DEBUG(2, "gserialized_gist_sel called");
+ elog(DEBUG2, "%s: entered function", __func__);
- /*
- * TODO: This is a big one,
- * All this statistics code *only* tries to generate a valid
- * selectivity for && and &&&. That leaves all the other
- * geometry operators with bad stats! The selectivity
- * calculation should take account of the incoming operator
- * type and do the right thing.
- */
-
- /* Fail if not a binary opclause (probably shouldn't happen) */
- if (list_length(args) != 2)
+ if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft))
{
- POSTGIS_DEBUG(3, "gserialized_gist_sel: not a binary opclause");
- PG_RETURN_FLOAT8(DEFAULT_ND_SEL);
+ elog(DEBUG2, "%s: could not find vardata", __func__);
+ return DEFAULT_ND_SEL;
}
- /* Find the constant part */
- other = (Node *) linitial(args);
- if ( ! IsA(other, Const) )
+ if (!IsA(other, Const))
{
- self = (Var *)other;
- other = (Node *) lsecond(args);
+ ReleaseVariableStats(vardata);
+ elog(DEBUG2, "%s: no constant argument, returning default selectivity %g", __func__, DEFAULT_ND_SEL);
+ return DEFAULT_ND_SEL;
}
- else
- {
- self = (Var *) lsecond(args);
- }
- if ( ! IsA(other, Const) )
+ if (!gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box))
{
- POSTGIS_DEBUG(3, " no constant arguments - returning a default selectivity");
- PG_RETURN_FLOAT8(DEFAULT_ND_SEL);
+ ReleaseVariableStats(vardata);
+ elog(DEBUG2, "%s: search box is EMPTY", __func__);
+ return 0.0;
}
- /* Convert the constant to a BOX */
- if( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) )
+ if (!vardata.statsTuple)
{
- POSTGIS_DEBUG(3, "search box is EMPTY");
- PG_RETURN_FLOAT8(0.0);
+ elog(DEBUG1, "%s: no statistics available on table, run ANALYZE", __func__);
+ return DEFAULT_ND_SEL;
}
- POSTGIS_DEBUGF(4, " requested search box is: %s", gbox_to_string(&search_box));
- /* Get pg_statistic row */
- examine_variable(root, (Node*)self, 0, &vardata);
- if ( vardata.statsTuple ) {
- nd_stats = pg_nd_stats_from_tuple(vardata.statsTuple, mode);
- }
+ nd_stats = pg_nd_stats_from_tuple(vardata.statsTuple, mode);
ReleaseVariableStats(vardata);
-
- if ( ! nd_stats )
- {
- POSTGIS_DEBUG(3, " unable to load stats from syscache, not analyzed yet?");
- PG_RETURN_FLOAT8(FALLBACK_ND_SEL);
- }
-
- POSTGIS_DEBUGF(4, " got stats:\n%s", nd_stats_to_json(nd_stats));
-
- /* Do the estimation! */
selectivity = estimate_selectivity(&search_box, nd_stats, mode);
- POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity);
+ pfree(nd_stats);
+ return selectivity;
+}
- pfree(nd_stats);
+PG_FUNCTION_INFO_V1(gserialized_gist_sel);
+Datum gserialized_gist_sel(PG_FUNCTION_ARGS)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ // Oid operator_oid = PG_GETARG_OID(1);
+ List *args = (List *) PG_GETARG_POINTER(2);
+ int varRelid = PG_GETARG_INT32(3);
+ int mode = PG_GETARG_INT32(4);
+ float8 selectivity = gserialized_sel_internal(root, args, varRelid, mode);
+ elog(DEBUG2, "%s: selectivity is %g", __func__, selectivity);
PG_RETURN_FLOAT8(selectivity);
}
Modified: trunk/postgis/gserialized_supportfn.c
===================================================================
--- trunk/postgis/gserialized_supportfn.c 2019-05-26 20:51:28 UTC (rev 17445)
+++ trunk/postgis/gserialized_supportfn.c 2019-05-27 21:28:06 UTC (rev 17446)
@@ -46,10 +46,14 @@
/* PostGIS */
#include "liblwgeom.h"
-/* Prototypes */
+/* Local prototypes */
Datum postgis_index_supportfn(PG_FUNCTION_ARGS);
+/* From gserialized_estimate.c */
+float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode);
+float8 gserialized_sel_internal(PlannerInfo *root, List *args, int varRelid, int mode);
+
/*
* Depending on the function, we will deploy different
* index enhancement strategies. Containment functions
@@ -198,6 +202,22 @@
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
Node *ret = NULL;
+ if (IsA(rawreq, SupportRequestSelectivity))
+ {
+ SupportRequestSelectivity *req = (SupportRequestSelectivity *) rawreq;
+
+ if (req->is_join)
+ {
+ req->selectivity = gserialized_joinsel_internal(req->root, req->args, req->jointype, 2);
+ }
+ else
+ {
+ req->selectivity = gserialized_sel_internal(req->root, req->args, req->varRelid, 2);
+ }
+ elog(DEBUG2, "%s: got selectivity %g", __func__, req->selectivity);
+ PG_RETURN_POINTER(req);
+ }
+
/*
* This support function is strictly for adding spatial index
* support.
More information about the postgis-tickets
mailing list