[SCM] PostGIS branch master updated. 3.6.0rc2-162-gbe84ae80b
git at osgeo.org
git at osgeo.org
Wed Oct 29 15:56:28 PDT 2025
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "PostGIS".
The branch, master has been updated
via be84ae80b1539e6b05a28911d765d83058ba03fe (commit)
from a14926dae52db384d86dd3ff84ee9317f05837f9 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit be84ae80b1539e6b05a28911d765d83058ba03fe
Author: Darafei Praliaskouski <me at komzpa.net>
Date: Thu Oct 30 02:55:59 2025 +0400
Prevent histogram target overflow when analysing massive tables
Add CUnit tests for overflow scenarios
Closes #5959
diff --git a/NEWS b/NEWS
index 4cde8f448..29083c77d 100644
--- a/NEWS
+++ b/NEWS
@@ -18,6 +18,10 @@ xxxx/xx/xx
- #5702, Allow the compiler to detect the parallelism -flto=auto (Darafei Praliaskouski)
- #4798, ST_AsGeoJSON warns about duplicate property keys (Darafei Praliaskouski)
+* Bug Fixes *
+
+ - #5959, Prevent histogram target overflow when analysing massive tables (Darafei Praliaskouski)
+
PostGIS 3.6.0
2025/09/01
diff --git a/configure.ac b/configure.ac
index d89e82c55..efd360053 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1938,6 +1938,7 @@ AC_CONFIG_FILES([GNUmakefile
libpgcommon/Makefile
libpgcommon/cunit/Makefile
postgis/Makefile
+ postgis/cunit/Makefile
postgis/sqldefines.h
sfcgal/Makefile
$SFCGAL_MAKEFILE_LIST
diff --git a/postgis/cunit/Makefile.in b/postgis/cunit/Makefile.in
new file mode 100644
index 000000000..483e4ca10
--- /dev/null
+++ b/postgis/cunit/Makefile.in
@@ -0,0 +1,43 @@
+# **********************************************************************
+# *
+# * PostGIS - Spatial Types for PostgreSQL
+# * http://postgis.net
+# *
+# * Copyright 2025 Darafei Praliaskouski <me at komzpa.net>
+# *
+# * This is free software; you can redistribute and/or modify it under
+# * the terms of the GNU General Public Licence. See the COPYING file.
+# *
+# **********************************************************************
+
+srcdir = @srcdir@
+top_builddir = @top_builddir@
+
+CC=@CC@
+LIBTOOL=@LIBTOOL@
+CFLAGS = @CFLAGS@ @CPPFLAGS@ @PGSQL_BE_CPPFLAGS@ @CUNIT_CPPFLAGS@ -I.. -I$(top_builddir) -I at top_srcdir@/liblwgeom -I at top_builddir@/liblwgeom -I at top_srcdir@/libpgcommon -I at top_builddir@/libpgcommon
+LDFLAGS = @CUNIT_LDFLAGS@ -lm
+
+VPATH = $(srcdir)
+
+OBJS = cu_tester.o
+
+# Build the standalone histogram helper tester.
+all: cu_tester
+
+# Execute the suite directly; no installation step is required.
+check: all
+ $(LIBTOOL) --mode=execute ./cu_tester
+
+# Link the tester with libtool; all helper code is header-only.
+cu_tester: $(OBJS)
+ $(LIBTOOL) --mode=link $(CC) $(CFLAGS) -o $@ $(OBJS) $(LDFLAGS)
+
+%.o: %.c
+ $(CC) $(CFLAGS) -c -o $@ $<
+
+clean:
+ rm -f $(OBJS) cu_tester
+
+clobber distclean: clean
+ rm -f Makefile
diff --git a/postgis/cunit/cu_tester.c b/postgis/cunit/cu_tester.c
new file mode 100644
index 000000000..b4dd46aa7
--- /dev/null
+++ b/postgis/cunit/cu_tester.c
@@ -0,0 +1,154 @@
+/**********************************************************************
+ *
+ * PostGIS - Spatial Types for PostgreSQL
+ * http://postgis.net
+ *
+ * This file is part of PostGIS
+ *
+ * PostGIS is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * PostGIS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
+ *
+ **********************************************************************
+ *
+ * Copyright 2025 (C) Darafei Praliaskouski <me at komzpa.net>
+ *
+ **********************************************************************/
+
+#include "postgres.h"
+
+#include <CUnit/Basic.h>
+#include <limits.h>
+#include <string.h>
+
+#include "../gserialized_estimate_support.h"
+
+static ND_BOX
+make_box(float minx, float miny, float minz, float minm, float maxx, float maxy, float maxz, float maxm)
+{
+ ND_BOX box;
+
+ memset(&box, 0, sizeof(box));
+ box.min[0] = minx;
+ box.min[1] = miny;
+ box.min[2] = minz;
+ box.min[3] = minm;
+ box.max[0] = maxx;
+ box.max[1] = maxy;
+ box.max[2] = maxz;
+ box.max[3] = maxm;
+ return box;
+}
+
+static void
+histogram_budget_clamps(void)
+{
+ /* Zero or negative row counts disable histogram construction. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(0.0, 2, 100), 0);
+ CU_ASSERT_EQUAL(histogram_cell_budget(-1.0, 4, 100), 0);
+
+ /* Degenerate dimensionality cannot allocate histogram space. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(1000.0, 0, 100), 0);
+
+ /* Matches the classic pow(attstattarget, ndims) path. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(1e6, 2, 100), 10000);
+ CU_ASSERT_EQUAL(histogram_cell_budget(1e6, 3, 50), 125000);
+
+ /* attstattarget^ndims exceeds ndims * 100000 and must be clamped. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(1e6, 4, 50), 400000);
+
+ /* attstattarget<=0 is normalised to the smallest viable target. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(1e6, 2, 0), 1);
+
+ /* Row clamp shrinks the grid for small relations. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(1.0, 2, 100), 20);
+
+ /* Large tables now preserve the dimensional cap instead of overflowing. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(1.5e8, 2, 100), 10000);
+
+ /* Regression for #5984: huge attstat targets stabilise instead of wrapping. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(5e6, 2, 10000), 200000);
+
+ /* Trigger the INT_MAX guard once both other caps exceed it. */
+ CU_ASSERT_EQUAL(histogram_cell_budget((double)INT_MAX, 50000, INT_MAX), INT_MAX);
+}
+
+static void
+nd_stats_indexing_behaviour(void)
+{
+ ND_STATS stats;
+ const int good_index[ND_DIMS] = {1, 2, 0, 0};
+ const int bad_index[ND_DIMS] = {1, 5, 0, 0};
+
+ memset(&stats, 0, sizeof(stats));
+ stats.ndims = 3;
+ stats.size[0] = 4.0f;
+ stats.size[1] = 5.0f;
+ stats.size[2] = 3.0f;
+
+ /* Three-dimensional index (x=1, y=2, z=0) collapses into 1 + 2 * 4. */
+ CU_ASSERT_EQUAL(nd_stats_value_index(&stats, good_index), 1 + 2 * 4);
+ /* Any request outside the histogram bounds triggers a guard. */
+ CU_ASSERT_EQUAL(nd_stats_value_index(&stats, bad_index), -1);
+
+ /* Regression for #5959: ndims higher than populated sizes still honours guards. */
+ stats.ndims = 4;
+ CU_ASSERT_EQUAL(nd_stats_value_index(&stats, good_index), -1);
+}
+
+static void
+nd_box_ratio_cases(void)
+{
+ ND_BOX covering = make_box(0.0f, 0.0f, 0.0f, 0.0f, 2.0f, 2.0f, 2.0f, 0.0f);
+ ND_BOX interior = make_box(0.5f, 0.5f, 0.5f, 0.0f, 1.5f, 1.5f, 1.5f, 0.0f);
+ ND_BOX partial = make_box(0.0f, 0.0f, 0.0f, 0.0f, 0.5f, 0.5f, 0.5f, 0.0f);
+ ND_BOX target = make_box(0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 1.0f, 0.0f);
+ ND_BOX flat = make_box(0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
+ ND_BOX touch = make_box(2.0f, 0.0f, 0.0f, 0.0f, 3.0f, 1.0f, 1.0f, 0.0f);
+
+ /* Full coverage should evaluate to one regardless of the extra extent. */
+ CU_ASSERT_DOUBLE_EQUAL(nd_box_ratio(&covering, &interior, 3), 1.0, 1e-12);
+ /* A shared octant carries one eighth of the reference volume. */
+ CU_ASSERT_DOUBLE_EQUAL(nd_box_ratio(&partial, &target, 3), 0.125, 1e-12);
+ /* Degenerate slabs have zero volume in three dimensions. */
+ CU_ASSERT_DOUBLE_EQUAL(nd_box_ratio(&covering, &flat, 3), 0.0, 1e-12);
+ /* Boxes that only touch along a face should not count as overlap. */
+ CU_ASSERT_DOUBLE_EQUAL(nd_box_ratio(&covering, &touch, 3), 0.0, 1e-12);
+}
+
+int
+main(void)
+{
+ CU_pSuite suite;
+ unsigned int failures = 0;
+ if (CU_initialize_registry() != CUE_SUCCESS)
+ return CU_get_error();
+
+ suite = CU_add_suite("gserialized_histogram_helpers", NULL, NULL);
+ if (!suite)
+ goto cleanup;
+
+ if (!CU_add_test(suite, "histogram budget clamps", histogram_budget_clamps) ||
+ !CU_add_test(suite, "nd_stats value index guards", nd_stats_indexing_behaviour) ||
+ !CU_add_test(suite, "nd_box ratio edge cases", nd_box_ratio_cases))
+ {
+ goto cleanup;
+ }
+
+ CU_basic_set_mode(CU_BRM_VERBOSE);
+ CU_basic_run_tests();
+
+cleanup:
+ failures = CU_get_number_of_tests_failed();
+ CU_cleanup_registry();
+ return failures == 0 ? CUE_SUCCESS : 1;
+}
diff --git a/postgis/gserialized_estimate.c b/postgis/gserialized_estimate.c
index bcbbcf676..5c4b4387b 100644
--- a/postgis/gserialized_estimate.c
+++ b/postgis/gserialized_estimate.c
@@ -19,11 +19,10 @@
**********************************************************************
*
* Copyright 2012 (C) Paul Ramsey <pramsey at cleverelephant.ca>
+ * Copyright 2025 (C) Darafei Praliaskouski <me at komzpa.net>
*
**********************************************************************/
-
-
/**********************************************************************
THEORY OF OPERATION
@@ -112,10 +111,12 @@ dimensionality cases. (2D geometry) &&& (3D column), etc.
#include "stringbuffer.h"
#include "liblwgeom.h"
#include "lwgeodetic.h"
-#include "lwgeom_pg.h" /* For debugging macros. */
+#include "lwgeom_pg.h" /* For debugging macros. */
#include "gserialized_gist.h" /* For index common functions */
+#include "gserialized_estimate_support.h"
#include <math.h>
+#include <limits.h>
#if HAVE_IEEEFP_H
#include <ieeefp.h>
#endif
@@ -144,8 +145,7 @@ Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS);
/* Local prototypes */
static Oid table_get_spatial_index(Oid tbl_oid, int16 attnum, int *key_type, int16 *idx_attnum);
-static GBOX * spatial_index_read_extent(Oid idx_oid, int idx_att_num, int key_type);
-
+static GBOX *spatial_index_read_extent(Oid idx_oid, int idx_att_num, int key_type);
/* Other prototypes */
float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode);
@@ -186,13 +186,6 @@ Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
*/
#define SDFACTOR 3.25
-/**
-* The maximum number of dimensions our code can handle.
-* We'll use this to statically allocate a bunch of
-* arrays below.
-*/
-#define ND_DIMS 4
-
/**
* Minimum width of a dimension that we'll bother trying to
* compute statistics on. Bearing in mind we have no control
@@ -219,68 +212,6 @@ Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
#define FALLBACK_ND_SEL 0.2
#define FALLBACK_ND_JOINSEL 0.3
-/**
-* N-dimensional box type for calculations, to avoid doing
-* explicit axis conversions from GBOX in all calculations
-* at every step.
-*/
-typedef struct ND_BOX_T
-{
- float4 min[ND_DIMS];
- float4 max[ND_DIMS];
-} ND_BOX;
-
-/**
-* N-dimensional box index type
-*/
-typedef struct ND_IBOX_T
-{
- int min[ND_DIMS];
- int max[ND_DIMS];
-} ND_IBOX;
-
-
-/**
-* N-dimensional statistics structure. Well, actually
-* four-dimensional, but set up to handle arbitrary dimensions
-* if necessary (really, we just want to get the 2,3,4-d cases
-* into one shared piece of code).
-*/
-typedef struct ND_STATS_T
-{
- /* Dimensionality of the histogram. */
- float4 ndims;
-
- /* Size of n-d histogram in each dimension. */
- float4 size[ND_DIMS];
-
- /* Lower-left (min) and upper-right (max) spatial bounds of histogram. */
- ND_BOX extent;
-
- /* How many rows in the table itself? */
- float4 table_features;
-
- /* How many rows were in the sample that built this histogram? */
- float4 sample_features;
-
- /* How many not-Null/Empty features were in the sample? */
- float4 not_null_features;
-
- /* How many features actually got sampled in the histogram? */
- float4 histogram_features;
-
- /* How many cells in histogram? (sizex*sizey*sizez*sizem) */
- float4 histogram_cells;
-
- /* How many cells did those histogram features cover? */
- /* Since we are pro-rating coverage, this number should */
- /* now always equal histogram_features */
- float4 cells_covered;
-
- /* Variable length # of floats for histogram */
- float4 value[1];
-} ND_STATS;
-
typedef struct {
/* Saved state from std_typanalyze() */
AnalyzeAttrComputeStatsFunc std_compute_stats;
@@ -318,13 +249,12 @@ text_p_get_mode(const text *txt)
char *modestr;
if (VARSIZE_ANY_EXHDR(txt) <= 0)
return mode;
- modestr = (char*)VARDATA(txt);
- if ( modestr[0] == 'N' )
+ modestr = (char *)VARDATA(txt);
+ if (modestr[0] == 'N')
mode = 0;
return mode;
}
-
/**
* Integer comparison function for qsort
*/
@@ -372,7 +302,7 @@ total_double(const double *vals, int nvals)
int i;
float total = 0;
/* Calculate total */
- for ( i = 0; i < nvals; i++ )
+ for (i = 0; i < nvals; i++)
total += vals[i];
return total;
@@ -425,33 +355,6 @@ stddev(const int *vals, int nvals)
}
#endif /* POSTGIS_DEBUG_LEVEL >= 3 */
-/**
-* Given a position in the n-d histogram (i,j,k) return the
-* position in the 1-d values array.
-*/
-static int
-nd_stats_value_index(const ND_STATS *stats, int *indexes)
-{
- int d;
- int accum = 1, vdx = 0;
-
- /* Calculate the index into the 1-d values array that the (i,j,k,l) */
- /* n-d histogram coordinate implies. */
- /* index = x + y * sizex + z * sizex * sizey + m * sizex * sizey * sizez */
- for ( d = 0; d < (int)(stats->ndims); d++ )
- {
- int size = (int)(stats->size[d]);
- if ( indexes[d] < 0 || indexes[d] >= size )
- {
- POSTGIS_DEBUGF(3, " bad index at (%d, %d)", indexes[0], indexes[1]);
- return -1;
- }
- vdx += indexes[d] * accum;
- accum *= size;
- }
- return vdx;
-}
-
/**
* Convert an #ND_BOX to a JSON string for printing
*/
@@ -722,50 +625,6 @@ nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
return true;
}
-/**
-* Returns the proportion of b2 that is covered by b1.
-*/
-static inline double
-nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
-{
- int d;
- bool covered = true;
- double ivol = 1.0;
- double vol2 = 1.0;
-
- for ( d = 0 ; d < ndims; d++ )
- {
- if ( b1->max[d] <= b2->min[d] || b1->min[d] >= b2->max[d] )
- return 0.0; /* Disjoint */
-
- if ( b1->min[d] > b2->min[d] || b1->max[d] < b2->max[d] )
- covered = false;
- }
-
- if ( covered )
- return 1.0;
-
- for ( d = 0; d < ndims; d++ )
- {
- double width2 = b2->max[d] - b2->min[d];
- double imin, imax, iwidth;
-
- vol2 *= width2;
-
- imin = Max(b1->min[d], b2->min[d]);
- imax = Min(b1->max[d], b2->max[d]);
- iwidth = imax - imin;
- iwidth = Max(0.0, iwidth);
-
- ivol *= iwidth;
- }
-
- if ( vol2 == 0.0 )
- return vol2;
-
- return ivol / vol2;
-}
-
/* How many bins shall we use in figuring out the distribution? */
#define MAX_NUM_BINS 50
#define BIN_MIN_SIZE 10
@@ -894,9 +753,9 @@ nd_increment(ND_IBOX *ibox, int ndims, int *counter)
{
int d = 0;
- while ( d < ndims )
+ while (d < ndims)
{
- if ( counter[d] < ibox->max[d] )
+ if (counter[d] < ibox->max[d])
{
counter[d] += 1;
break;
@@ -905,7 +764,7 @@ nd_increment(ND_IBOX *ibox, int ndims, int *counter)
d++;
}
/* That's it, cannot increment any more! */
- if ( d == ndims )
+ if (d == ndims)
return false;
/* Increment complete! */
@@ -1321,9 +1180,9 @@ gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, i
PG_FUNCTION_INFO_V1(gserialized_gist_joinsel);
Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ PlannerInfo *root = (PlannerInfo *)PG_GETARG_POINTER(0);
/* Oid operator = PG_GETARG_OID(1); */
- List *args = (List *) PG_GETARG_POINTER(2);
+ List *args = (List *)PG_GETARG_POINTER(2);
JoinType jointype = (JoinType) PG_GETARG_INT16(3);
int mode = PG_GETARG_INT32(4);
@@ -1512,22 +1371,13 @@ compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfu
#endif
}
- /*
- * We'll build a histogram having stats->attr->attstattarget
- * (default 100) cells on each side, within reason...
- * we'll use ndims*100000 as the maximum number of cells.
- * Also, if we're sampling a relatively small table, we'll try to ensure that
- * we have a smaller grid.
- */
#if POSTGIS_PGSQL_VERSION >= 170
- histo_cells_target = (int)pow((double)(stats->attstattarget), (double)ndims);
POSTGIS_DEBUGF(3, " stats->attstattarget: %d", stats->attstattarget);
+ histo_cells_target = histogram_cell_budget(total_rows, ndims, stats->attstattarget);
#else
- histo_cells_target = (int)pow((double)(stats->attr->attstattarget), (double)ndims);
POSTGIS_DEBUGF(3, " stats->attr->attstattarget: %d", stats->attr->attstattarget);
+ histo_cells_target = histogram_cell_budget(total_rows, ndims, stats->attr->attstattarget);
#endif
- histo_cells_target = Min(histo_cells_target, ndims * 100000);
- histo_cells_target = Min(histo_cells_target, (int)(10 * ndims * total_rows));
POSTGIS_DEBUGF(3, " target # of histogram cells: %d", histo_cells_target);
/* If there's no useful features, we can't work out stats */
@@ -1836,8 +1686,6 @@ compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfu
return;
}
-
-
/**
* In order to do useful selectivity calculations in both 2-D and N-D
* modes, we actually have to generate two stats objects, one for 2-D
@@ -1875,7 +1723,6 @@ compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
}
}
-
/**
* This function will be called when the ANALYZE command is run
* on a column of the "geometry" or "geography" type.
diff --git a/postgis/gserialized_estimate_support.h b/postgis/gserialized_estimate_support.h
new file mode 100644
index 000000000..0d3a23d75
--- /dev/null
+++ b/postgis/gserialized_estimate_support.h
@@ -0,0 +1,197 @@
+/**********************************************************************
+ *
+ * PostGIS - Spatial Types for PostgreSQL
+ * http://postgis.net
+ *
+ * This file is part of PostGIS
+ *
+ * PostGIS is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * PostGIS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
+ *
+ **********************************************************************
+ *
+ * Internal helpers shared between the gserialized selectivity
+ * implementation and the unit tests.
+ *
+ * Keeping the routines header-only ensures the planner code and the
+ * harness evaluate the exact same floating-point flows without the
+ * cross-object plumbing that previously complicated maintenance.
+ * Nothing here is installed; the header is meant for
+ * gserialized_estimate.c and for the dedicated CUnit suite only.
+ *
+ **********************************************************************
+ *
+ * Copyright 2012 (C) Paul Ramsey <pramsey at cleverelephant.ca>
+ * Copyright 2025 (C) Darafei Praliaskouski <me at komzpa.net>
+ *
+ **********************************************************************/
+
+#ifndef POSTGIS_GSERIALIZED_ESTIMATE_SUPPORT_H
+#define POSTGIS_GSERIALIZED_ESTIMATE_SUPPORT_H
+
+#include "postgres.h"
+
+#include <limits.h>
+#include <math.h>
+
+/* The maximum number of dimensions our statistics code supports. */
+#define ND_DIMS 4
+
+/* Lightweight n-dimensional box representation for selectivity math. */
+typedef struct ND_BOX_T {
+ float4 min[ND_DIMS];
+ float4 max[ND_DIMS];
+} ND_BOX;
+
+/* Integer counterpart used for histogram cell iteration. */
+typedef struct ND_IBOX_T {
+ int min[ND_DIMS];
+ int max[ND_DIMS];
+} ND_IBOX;
+
+/* On-disk representation of the histogram emitted by ANALYZE. */
+typedef struct ND_STATS_T {
+ float4 ndims;
+ float4 size[ND_DIMS];
+ ND_BOX extent;
+ float4 table_features;
+ float4 sample_features;
+ float4 not_null_features;
+ float4 histogram_features;
+ float4 histogram_cells;
+ float4 cells_covered;
+ float4 value[1];
+} ND_STATS;
+
+/*
+ * Return the flattened index for the histogram coordinate expressed by
+ * 'indexes'. A negative result signals that one of the axes fell outside
+ * the histogram definition.
+ */
+static inline int
+nd_stats_value_index(const ND_STATS *stats, const int *indexes)
+{
+ int d;
+ int accum = 1;
+ int vdx = 0;
+
+ for (d = 0; d < (int)(stats->ndims); d++)
+ {
+ int size = (int)(stats->size[d]);
+ if (indexes[d] < 0 || indexes[d] >= size)
+ return -1;
+ vdx += indexes[d] * accum;
+ accum *= size;
+ }
+ return vdx;
+}
+
+/*
+ * Derive the histogram grid budget requested by PostgreSQL's ANALYZE machinery.
+ * The planner caps the cell count via three heuristics that take the requested
+ * attstattarget, the histogram dimensionality, and the underlying row count
+ * into account. Double precision arithmetic keeps the intermediate products in
+ * range so the cap behaves consistently across build architectures.
+ */
+static inline int
+histogram_cell_budget(double total_rows, int ndims, int attstattarget)
+{
+ double budget;
+ double dims_cap;
+ double rows_cap;
+ double attstat;
+ double dims;
+
+ if (ndims <= 0)
+ return 0;
+
+ if (attstattarget <= 0)
+ attstattarget = 1;
+
+ /* Requested resolution coming from PostgreSQL's ANALYZE knob. */
+ attstat = (double)attstattarget;
+ dims = (double)ndims;
+ budget = pow(attstat, dims);
+
+ /* Hard ceiling that keeps the statistics collector responsive. */
+ dims_cap = (double)ndims * 100000.0;
+ if (budget > dims_cap)
+ budget = dims_cap;
+
+ /* Small relations do not need a histogram that dwarfs the sample. */
+ if (total_rows <= 0.0)
+ return 0;
+
+ rows_cap = 10.0 * (double)ndims * total_rows;
+ if (rows_cap < 0.0)
+ rows_cap = 0.0;
+
+ /* Keep intermediate computations in double precision before clamping. */
+ if (rows_cap > (double)INT_MAX)
+ rows_cap = (double)INT_MAX;
+
+ if (budget > rows_cap)
+ budget = rows_cap;
+
+ if (budget >= (double)INT_MAX)
+ return INT_MAX;
+ if (budget <= 0.0)
+ return 0;
+
+ return (int)budget;
+}
+
+/*
+ * Compute the portion of 'target' covered by 'cover'. The caller supplies the
+ * dimensionality because ND_BOX always carries four slots. Degenerate volumes
+ * fold to zero, allowing the callers to detect slabs that ANALYZE sometimes
+ * emits for skewed datasets.
+ */
+static inline double
+nd_box_ratio(const ND_BOX *cover, const ND_BOX *target, int ndims)
+{
+ int d;
+ bool fully_covered = true;
+ double ivol = 1.0;
+ double refvol = 1.0;
+
+ for (d = 0; d < ndims; d++)
+ {
+ if (cover->max[d] <= target->min[d] || cover->min[d] >= target->max[d])
+ return 0.0; /* Disjoint */
+
+ if (cover->min[d] > target->min[d] || cover->max[d] < target->max[d])
+ fully_covered = false;
+ }
+
+ if (fully_covered)
+ return 1.0;
+
+ for (d = 0; d < ndims; d++)
+ {
+ double width = target->max[d] - target->min[d];
+ double imin = Max(cover->min[d], target->min[d]);
+ double imax = Min(cover->max[d], target->max[d]);
+ double iwidth = Max(0.0, imax - imin);
+
+ refvol *= width;
+ ivol *= iwidth;
+ }
+
+ if (refvol == 0.0)
+ return refvol;
+
+ return ivol / refvol;
+}
+
+#endif /* POSTGIS_GSERIALIZED_ESTIMATE_SUPPORT_H */
-----------------------------------------------------------------------
Summary of changes:
NEWS | 4 +
configure.ac | 1 +
postgis/cunit/Makefile.in | 43 +++++++
postgis/cunit/cu_tester.c | 154 ++++++++++++++++++++++++++
postgis/gserialized_estimate.c | 183 +++---------------------------
postgis/gserialized_estimate_support.h | 197 +++++++++++++++++++++++++++++++++
6 files changed, 414 insertions(+), 168 deletions(-)
create mode 100644 postgis/cunit/Makefile.in
create mode 100644 postgis/cunit/cu_tester.c
create mode 100644 postgis/gserialized_estimate_support.h
hooks/post-receive
--
PostGIS
More information about the postgis-tickets
mailing list