[postgis-tickets] [SCM] PostGIS branch master updated. 3.1.0alpha3-15-ged1de58
git at osgeo.org
git at osgeo.org
Sat Nov 28 19:01:20 PST 2020
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "PostGIS".
The branch, master has been updated
via ed1de589b145cf1b02f68c2a04b6d211f0c62889 (commit)
from d159dcf8d70b5c0b733c248bef44a66b6da51333 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit ed1de589b145cf1b02f68c2a04b6d211f0c62889
Author: Darafei Praliaskouski <me at komzpa.net>
Date: Sun Nov 29 06:01:00 2020 +0300
Generate Correlation statistics so BRIN works.
Closes #4625
diff --git a/NEWS b/NEWS
index ee001fc..407815b 100644
--- a/NEWS
+++ b/NEWS
@@ -15,7 +15,9 @@ Only tickets not included in 3.1.0alpha3
- #4805, _ST_SortableHash exposed to work around parallel soring performance issue
in Postgres. If your table is huge, use ORDER BY _ST_SortableHash(geom)
instead of ORDER BY geom to make parallel sort faster (Darafei Praliaskouski)
-
+ - #4625, Correlation statistics now calculated.
+ Run ANALYZE for BRIN indexes to start kicking in.
+ (Darafei Praliaskouski)
PostGIS 3.1.0alpha3
2020/11/19
diff --git a/postgis/gserialized_estimate.c b/postgis/gserialized_estimate.c
index b5202d7..71a29a9 100644
--- a/postgis/gserialized_estimate.c
+++ b/postgis/gserialized_estimate.c
@@ -168,8 +168,17 @@ Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
*/
#define STATISTIC_KIND_ND 102
#define STATISTIC_KIND_2D 103
-#define STATISTIC_SLOT_ND 0
-#define STATISTIC_SLOT_2D 1
+
+/*
+ * Postgres does not pin its slots and uses them as they come.
+ * We need to preserve its Correlation for brin to work
+ * 0 may be MCV
+ * 1 may be Histogram
+ * 2 may be Correlation
+ * We take 3 and 4.
+ */
+#define STATISTIC_SLOT_ND 3
+#define STATISTIC_SLOT_2D 4
/*
* The SD factor restricts the side of the statistics histogram
@@ -274,8 +283,11 @@ typedef struct ND_STATS_T
float4 value[1];
} ND_STATS;
-
-
+typedef struct {
+ /* Saved state from std_typanalyze() */
+ AnalyzeAttrComputeStatsFunc std_compute_stats;
+ void *std_extra_data;
+} GserializedAnalyzeExtraData;
/**
* Given that geodetic boxes are X/Y/Z regardless of the
@@ -1345,9 +1357,6 @@ Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(gserialized_joinsel_internal(root, args, jointype, mode));
}
-
-
-
/**
* The gserialized_analyze_nd sets this function as a
* callback on the stats object when called by the ANALYZE
@@ -1363,7 +1372,7 @@ Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
* for use by operator estimators.
*
* We will populate an n-d histogram using the provided
- * sample rows. The selectivity estimators (sel and j_oinsel)
+ * sample rows. The selectivity estimators (sel and joinsel)
* can then use the histogram
*/
static void
@@ -1864,6 +1873,12 @@ static void
compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
int sample_rows, double total_rows)
{
+ GserializedAnalyzeExtraData *extra_data = (GserializedAnalyzeExtraData *)stats->extra_data;
+ /* Call standard statistics calculation routine to fill in correlation for BRIN to work */
+ stats->extra_data = extra_data->std_extra_data;
+ extra_data->std_compute_stats(stats, fetchfunc, sample_rows, total_rows);
+ stats->extra_data = extra_data;
+
/* 2D Mode */
compute_gserialized_stats_mode(stats, fetchfunc, sample_rows, total_rows, 2);
@@ -1900,30 +1915,24 @@ compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
*
* Being this experimental we'll stick to a static stat_builder/sample_rows
* value for now.
-*
*/
PG_FUNCTION_INFO_V1(gserialized_analyze_nd);
Datum gserialized_analyze_nd(PG_FUNCTION_ARGS)
{
VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0);
- Form_pg_attribute attr = stats->attr;
+ GserializedAnalyzeExtraData *extra_data =
+ (GserializedAnalyzeExtraData *)palloc(sizeof(GserializedAnalyzeExtraData));
- POSTGIS_DEBUG(2, "gserialized_analyze_nd called");
+ /* Ask for standard analyze to fill in as much as possible */
+ if (!std_typanalyze(stats))
+ PG_RETURN_BOOL(false);
- /* If the attstattarget column is negative, use the default value */
- /* NB: it is okay to scribble on stats->attr since it's a copy */
- if (attr->attstattarget < 0)
- attr->attstattarget = default_statistics_target;
-
- POSTGIS_DEBUGF(3, " attribute stat target: %d", attr->attstattarget);
-
- /* Setup the minimum rows and the algorithm function.
- * 300 matches the default value set in
- * postgresql/src/backend/commands/analyze.c */
- stats->minrows = 300 * stats->attr->attstattarget;
+ /* Save old compute_stats and extra_data for scalar statistics ... */
+ extra_data->std_compute_stats = stats->compute_stats;
+ extra_data->std_extra_data = stats->extra_data;
+ /* ... and replace with our info */
stats->compute_stats = compute_gserialized_stats;
-
- POSTGIS_DEBUGF(3, " minrows: %d", stats->minrows);
+ stats->extra_data = extra_data;
/* Indicate we are done successfully */
PG_RETURN_BOOL(true);
@@ -2462,7 +2471,7 @@ table_get_spatial_index(Oid tbl_oid, text *col, int *key_type)
/* Save result, clean up, and break out */
result = idx_oid;
if (key_type)
- *key_type = (atttypid == b2d_oid ? STATISTIC_SLOT_2D : STATISTIC_SLOT_ND);
+ *key_type = (atttypid == b2d_oid ? STATISTIC_KIND_2D : STATISTIC_KIND_ND);
break;
}
}
@@ -2507,7 +2516,7 @@ spatial_index_read_extent(Oid idx_oid, int key_type)
Datum idx_attr = index_getattr(ituple, 1, idx_rel->rd_att, &isnull);
if (!isnull)
{
- if (key_type == STATISTIC_SLOT_2D)
+ if (key_type == STATISTIC_KIND_2D)
{
BOX2DF *b = (BOX2DF*)DatumGetPointer(idx_attr);
if (bounds_2df)
@@ -2531,14 +2540,14 @@ spatial_index_read_extent(Oid idx_oid, int key_type)
ReleaseBuffer(buffer);
index_close(idx_rel, AccessShareLock);
- if (key_type == STATISTIC_SLOT_2D && bounds_2df)
+ if (key_type == STATISTIC_KIND_2D && bounds_2df)
{
if (box2df_is_empty(bounds_2df))
return NULL;
gbox = gbox_new(0);
box2df_to_gbox_p(bounds_2df, gbox);
}
- else if (key_type == STATISTIC_SLOT_ND && bounds_gidx)
+ else if (key_type == STATISTIC_KIND_ND && bounds_gidx)
{
if (gidx_is_unknown(bounds_gidx))
return NULL;
diff --git a/raster/rt_pg/rtpostgis_drop.sql.in b/raster/rt_pg/rtpostgis_drop.sql.in
index d63c365..259692c 100644
--- a/raster/rt_pg/rtpostgis_drop.sql.in
+++ b/raster/rt_pg/rtpostgis_drop.sql.in
@@ -111,7 +111,7 @@ DROP FUNCTION IF EXISTS st_quantile(rastertable text, rastercolumn text, nband i
DROP FUNCTION IF EXISTS st_quantile(rastertable text, rastercolumn text, nband int, quantile double precision);
DROP FUNCTION IF EXISTS st_quantile(rastertable text, rastercolumn text, exclude_nodata_value boolean, quantile double precision);
DROP FUNCTION IF EXISTS st_quantile(rastertable text, rastercolumn text, quantile double precision);
-DROP FUNCTION IF EXISTS st_approxquantile(rastertable text, rastercolumn text, nband int, exclude_nodata_value boolean, sample_percent double precision DEFAULT 0.1, quantiles double precision[], OUT quantile double precision, OUT value double precision);
+DROP FUNCTION IF EXISTS st_approxquantile(rastertable text, rastercolumn text, nband int, exclude_nodata_value boolean, sample_percent double precision, quantiles double precision[], OUT quantile double precision, OUT value double precision);
DROP FUNCTION IF EXISTS st_approxquantile( rastertable text, rastercolumn text, nband int, sample_percent double precision, quantiles double precision[], OUT quantile double precision, OUT value double precision);
DROP FUNCTION IF EXISTS st_approxquantile( rastertable text, rastercolumn text, sample_percent double precision, quantiles double precision[], OUT quantile double precision, OUT value double precision);
DROP FUNCTION IF EXISTS st_approxquantile( rastertable text, rastercolumn text, quantiles double precision[], OUT quantile double precision, OUT value double precision);
diff --git a/regress/core/regress_selectivity_expected b/regress/core/regress_selectivity_expected
index a399aed..5a12f04 100644
--- a/regress/core/regress_selectivity_expected
+++ b/regress/core/regress_selectivity_expected
@@ -2,7 +2,7 @@ ERROR: stats for "no_stats.g" do not exist
ERROR: stats for "no_stats.g" do not exist
ERROR: stats for "no_stats.g" do not exist
ERROR: stats for "no_stats_join.g" do not exist
-public|regular_overdots|g|f|0|32|-1|||||||
+public|regular_overdots|g|f|0|32|-1|{0101000000000000000000F03F000000000000F03F:010100000000000000000000400000000000000040:010100000000000000000008400000000000000840:0101000000000000000000F03F0000000000000840:01010000000000000000000840000000000000F03F:0101000000000000000000F03F0000000000000040:01010000000000000000000040000000000000F03F:010100000000000000000010400000000000001040:0101000000000000000000F03F0000000000001440:010100000000000000000000400000000000001040:010100000000000000000010400000000000000040:01010000000000000000001440000000000000F03F:0101000000000000000000F03F0000000000001040:01010000000000000000001040000000000000F03F:010100000000000000000008400000000000000040:010100000000000000000000400000000000000840:010100000000000000000014400000000000001440:010100000000000000000000400000000000001840:010100000000000000000008400000000000001440:010100000000000000000014400000000000000840:010100000000000000000018400000000000000040:0101000000000000000000F03F0000000000001C40:01010000000000



2440:010100000000000000000008400000000000002440:010100000000000000000024400000000000001C40:010100000000000000000024400000000000002040:010100000000000000000020400000000000002440:01010000000000000000001C400000000000002440}|{0.0470146,0.0380818,0.0300893,0.029149,0.029149,0.0249177,0.0249177,0.0230371,0.022567,0.022567,0.022567,0.022567,0.0211566,0.0211566,0.0188058,0.0188058,0.0169252,0.0164551,0.0164551,0.0164551,0.0164551,0.015985,0.015985,0.015985,0.015985,0.0145745,0.0145745,0.0136342,0.0136342,0.0122238,0.0122238,0.0117536,0.0117536,0.0117536,0.0117536,0.0117536,0.0108134,0.0108134,0.0103432,0.0103432,0.0103432,0.0103432,0.00846262,0.00846262,0.00799248,0.00799248,0.00752233,0.00752233,0.00752233,0.00752233,0.00752233,0.00752233,0.00752233,0.00705219,0.00705219,0.00611189,0.00611189,0.00611189,0.00611189,0.0051716,0.0051716,0.0051716,0.0051716,0.00470146,0.00470146,0.00423131,0.00423131,0.00423131,0.00376117,0.00376117,0.00376117,0.00376117,0.00329102,0.00329102,0.00329102,0.0032
9102,0.00282087,0.00282087,0.00282087,0.00282087,0.00235073,0.00235073,0.00188058,0.00188058,0.00188058,0.00188058,0.00188058,0.00141044,0.00141044,0.00141044,0.00141044,0.00141044,0.00141044,0.000940292,0.000940292,0.000940292,0.000940292}|{010100000000000000000024400000000000002240:010100000000000000000024400000000000002440:010100000000000000000022400000000000002440}|0.321087|||
selectivity_00|2127
selectivity_01|1068
selectivity_02|actual|0.502
-----------------------------------------------------------------------
Summary of changes:
NEWS | 4 +-
postgis/gserialized_estimate.c | 65 ++++++++++++++++++-------------
raster/rt_pg/rtpostgis_drop.sql.in | 2 +-
regress/core/regress_selectivity_expected | 2 +-
4 files changed, 42 insertions(+), 31 deletions(-)
hooks/post-receive
--
PostGIS
More information about the postgis-tickets
mailing list