[SCM] PostGIS branch stable-3.5 updated. 3.5.4-5-g06fbe94ff
git at osgeo.org
git at osgeo.org
Fri Nov 7 10:47:39 PST 2025
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "PostGIS".
The branch, stable-3.5 has been updated
via 06fbe94ffe37c1e9d26328b7554c2050feaae1b6 (commit)
via 6b8bae5486f12dc53634f1d7982186c6413d0ef5 (commit)
from 949a623cd0afc9802ebb228d8dc7b477272b9b18 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit 06fbe94ffe37c1e9d26328b7554c2050feaae1b6
Author: Darafei Praliaskouski <me at komzpa.net>
Date: Thu Oct 30 03:46:26 2025 +0400
Guard against histogram axis dimension underflow
References #5959
References #5984
diff --git a/NEWS b/NEWS
index df71a295a..075318f84 100644
--- a/NEWS
+++ b/NEWS
@@ -6,11 +6,7 @@ PostgreSQL 12-18 required. GEOS 3.8+ required. Proj 6.1+ required.
* Bug fixes *
-
-
-* Bug Fixes *
-
- - #5959, Prevent histogram target overflow when analysing massive tables (Darafei Praliaskouski)
+ - #5959, #5984, Prevent histogram target overflow when analysing massive tables (Darafei Praliaskouski)
PostGIS 3.5.4
@@ -3083,4 +3079,3 @@ PostGIS 0.1
- truely_inside()
- rtree index support functions
- gist index support functions
-
diff --git a/postgis/cunit/cu_tester.c b/postgis/cunit/cu_tester.c
index b4dd46aa7..eb6ba5b0d 100644
--- a/postgis/cunit/cu_tester.c
+++ b/postgis/cunit/cu_tester.c
@@ -28,6 +28,7 @@
#include <CUnit/Basic.h>
#include <limits.h>
+#include <math.h>
#include <string.h>
#include "../gserialized_estimate_support.h"
@@ -82,6 +83,23 @@ histogram_budget_clamps(void)
CU_ASSERT_EQUAL(histogram_cell_budget((double)INT_MAX, 50000, INT_MAX), INT_MAX);
}
+static void
+histogram_axis_allocation_guards(void)
+{
+ /* Baseline: evenly split a 10k target over two varying dimensions. */
+ CU_ASSERT_EQUAL(histogram_axis_cells(10000, 2, 0.5), 100);
+
+ /* Skewed axis ratios that collapse to tiny powers still return one cell. */
+ CU_ASSERT_EQUAL(histogram_axis_cells(10000, 2, 1e-9), 1);
+
+ /* Denormals, NaNs and negative ratios should not leak to the histogram. */
+ CU_ASSERT_EQUAL(histogram_axis_cells(10000, 2, NAN), 1);
+ CU_ASSERT_EQUAL(histogram_axis_cells(10000, 2, -0.5), 1);
+
+ /* Extremely aggressive ratios remain bounded by the square root of the budget. */
+ CU_ASSERT_EQUAL(histogram_axis_cells(INT_MAX, 2, 1.0), (int)sqrt((double)INT_MAX * 2.0));
+}
+
static void
nd_stats_indexing_behaviour(void)
{
@@ -138,6 +156,7 @@ main(void)
goto cleanup;
if (!CU_add_test(suite, "histogram budget clamps", histogram_budget_clamps) ||
+ !CU_add_test(suite, "histogram axis guards", histogram_axis_allocation_guards) ||
!CU_add_test(suite, "nd_stats value index guards", nd_stats_indexing_behaviour) ||
!CU_add_test(suite, "nd_box ratio edge cases", nd_box_ratio_cases))
{
diff --git a/postgis/gserialized_estimate.c b/postgis/gserialized_estimate.c
index 072d60a1c..e561fd5dd 100644
--- a/postgis/gserialized_estimate.c
+++ b/postgis/gserialized_estimate.c
@@ -1516,11 +1516,10 @@ compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfu
* Scale the target cells number by the # of dims and ratio,
* then take the appropriate root to get the estimated number of cells
* on this axis (eg, pow(0.5) for 2d, pow(0.333) for 3d, pow(0.25) for 4d)
- */
- histo_size[d] = (int)pow((double)histo_cells_target * histo_ndims * edge_ratio, 1/(double)histo_ndims);
- /* If something goes awry, just give this dim one slot */
- if ( ! histo_size[d] )
- histo_size[d] = 1;
+ * The dedicated helper clamps pathological floating point inputs so we
+ * do not resurrect the NaN propagation reported in #5959 on amd64.
+ */
+ histo_size[d] = histogram_axis_cells(histo_cells_target, histo_ndims, edge_ratio);
}
histo_cells_new *= histo_size[d];
}
diff --git a/postgis/gserialized_estimate_support.h b/postgis/gserialized_estimate_support.h
index 0d3a23d75..6b372a43e 100644
--- a/postgis/gserialized_estimate_support.h
+++ b/postgis/gserialized_estimate_support.h
@@ -151,6 +151,46 @@ histogram_cell_budget(double total_rows, int ndims, int attstattarget)
return (int)budget;
}
+/*
+ * Allocate histogram buckets along a single axis in proportion to the observed
+ * density variation. The caller passes in the global histogram target along
+ * with the number of axes that exhibited variation in the sampled data and the
+ * relative contribution of the current axis (edge_ratio). Earlier versions
+ * evaluated the pow() call directly in the caller, which exposed the planner to
+ * NaN propagation on some amd64 builds when the ratio was denormal or negative
+ * (see #5959). Keeping the calculation in one place allows us to clamp the
+ * inputs and provide a predictable fallback for problematic floating point
+ * combinations.
+ */
+static inline int
+histogram_axis_cells(int histo_cells_target, int histo_ndims, double edge_ratio)
+{
+ double scaled;
+ double axis_cells;
+
+ if (histo_cells_target <= 0 || histo_ndims <= 0)
+ return 1;
+
+ if (!(edge_ratio > 0.0) || !isfinite(edge_ratio))
+ return 1;
+
+ scaled = (double)histo_cells_target * (double)histo_ndims * edge_ratio;
+ if (!(scaled > 0.0) || !isfinite(scaled))
+ return 1;
+
+ axis_cells = pow(scaled, 1.0 / (double)histo_ndims);
+ if (!(axis_cells > 0.0) || !isfinite(axis_cells))
+ return 1;
+
+ if (axis_cells >= (double)INT_MAX)
+ return INT_MAX;
+
+ if (axis_cells <= 1.0)
+ return 1;
+
+ return (int)axis_cells;
+}
+
/*
* Compute the portion of 'target' covered by 'cover'. The caller supplies the
* dimensionality because ND_BOX always carries four slots. Degenerate volumes
commit 6b8bae5486f12dc53634f1d7982186c6413d0ef5
Author: Darafei Praliaskouski <me at komzpa.net>
Date: Thu Oct 30 02:55:59 2025 +0400
Prevent histogram target overflow when analysing massive tables
Add CUnit tests for overflow scenarios
Closes #5959
diff --git a/NEWS b/NEWS
index 72aaf5e0d..df71a295a 100644
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,10 @@ PostgreSQL 12-18 required. GEOS 3.8+ required. Proj 6.1+ required.
+* Bug Fixes *
+
+ - #5959, Prevent histogram target overflow when analysing massive tables (Darafei Praliaskouski)
+
PostGIS 3.5.4
2025/10/16
diff --git a/configure.ac b/configure.ac
index a60828c9d..0411756d7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1905,6 +1905,7 @@ AC_CONFIG_FILES([GNUmakefile
libpgcommon/Makefile
libpgcommon/cunit/Makefile
postgis/Makefile
+ postgis/cunit/Makefile
postgis/sqldefines.h
sfcgal/Makefile
$SFCGAL_MAKEFILE_LIST
diff --git a/postgis/cunit/Makefile.in b/postgis/cunit/Makefile.in
new file mode 100644
index 000000000..483e4ca10
--- /dev/null
+++ b/postgis/cunit/Makefile.in
@@ -0,0 +1,43 @@
+# **********************************************************************
+# *
+# * PostGIS - Spatial Types for PostgreSQL
+# * http://postgis.net
+# *
+# * Copyright 2025 Darafei Praliaskouski <me at komzpa.net>
+# *
+# * This is free software; you can redistribute and/or modify it under
+# * the terms of the GNU General Public Licence. See the COPYING file.
+# *
+# **********************************************************************
+
+srcdir = @srcdir@
+top_builddir = @top_builddir@
+
+CC=@CC@
+LIBTOOL=@LIBTOOL@
+CFLAGS = @CFLAGS@ @CPPFLAGS@ @PGSQL_BE_CPPFLAGS@ @CUNIT_CPPFLAGS@ -I.. -I$(top_builddir) -I at top_srcdir@/liblwgeom -I at top_builddir@/liblwgeom -I at top_srcdir@/libpgcommon -I at top_builddir@/libpgcommon
+LDFLAGS = @CUNIT_LDFLAGS@ -lm
+
+VPATH = $(srcdir)
+
+OBJS = cu_tester.o
+
+# Build the standalone histogram helper tester.
+all: cu_tester
+
+# Execute the suite directly; no installation step is required.
+check: all
+ $(LIBTOOL) --mode=execute ./cu_tester
+
+# Link the tester with libtool; all helper code is header-only.
+cu_tester: $(OBJS)
+ $(LIBTOOL) --mode=link $(CC) $(CFLAGS) -o $@ $(OBJS) $(LDFLAGS)
+
+%.o: %.c
+ $(CC) $(CFLAGS) -c -o $@ $<
+
+clean:
+ rm -f $(OBJS) cu_tester
+
+clobber distclean: clean
+ rm -f Makefile
diff --git a/postgis/cunit/cu_tester.c b/postgis/cunit/cu_tester.c
new file mode 100644
index 000000000..b4dd46aa7
--- /dev/null
+++ b/postgis/cunit/cu_tester.c
@@ -0,0 +1,154 @@
+/**********************************************************************
+ *
+ * PostGIS - Spatial Types for PostgreSQL
+ * http://postgis.net
+ *
+ * This file is part of PostGIS
+ *
+ * PostGIS is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * PostGIS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
+ *
+ **********************************************************************
+ *
+ * Copyright 2025 (C) Darafei Praliaskouski <me at komzpa.net>
+ *
+ **********************************************************************/
+
+#include "postgres.h"
+
+#include <CUnit/Basic.h>
+#include <limits.h>
+#include <string.h>
+
+#include "../gserialized_estimate_support.h"
+
+static ND_BOX
+make_box(float minx, float miny, float minz, float minm, float maxx, float maxy, float maxz, float maxm)
+{
+ ND_BOX box;
+
+ memset(&box, 0, sizeof(box));
+ box.min[0] = minx;
+ box.min[1] = miny;
+ box.min[2] = minz;
+ box.min[3] = minm;
+ box.max[0] = maxx;
+ box.max[1] = maxy;
+ box.max[2] = maxz;
+ box.max[3] = maxm;
+ return box;
+}
+
+static void
+histogram_budget_clamps(void)
+{
+ /* Zero or negative row counts disable histogram construction. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(0.0, 2, 100), 0);
+ CU_ASSERT_EQUAL(histogram_cell_budget(-1.0, 4, 100), 0);
+
+ /* Degenerate dimensionality cannot allocate histogram space. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(1000.0, 0, 100), 0);
+
+ /* Matches the classic pow(attstattarget, ndims) path. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(1e6, 2, 100), 10000);
+ CU_ASSERT_EQUAL(histogram_cell_budget(1e6, 3, 50), 125000);
+
+ /* attstattarget^ndims exceeds ndims * 100000 and must be clamped. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(1e6, 4, 50), 400000);
+
+ /* attstattarget<=0 is normalised to the smallest viable target. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(1e6, 2, 0), 1);
+
+ /* Row clamp shrinks the grid for small relations. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(1.0, 2, 100), 20);
+
+ /* Large tables now preserve the dimensional cap instead of overflowing. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(1.5e8, 2, 100), 10000);
+
+ /* Regression for #5984: huge attstat targets stabilise instead of wrapping. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(5e6, 2, 10000), 200000);
+
+ /* Trigger the INT_MAX guard once both other caps exceed it. */
+ CU_ASSERT_EQUAL(histogram_cell_budget((double)INT_MAX, 50000, INT_MAX), INT_MAX);
+}
+
+static void
+nd_stats_indexing_behaviour(void)
+{
+ ND_STATS stats;
+ const int good_index[ND_DIMS] = {1, 2, 0, 0};
+ const int bad_index[ND_DIMS] = {1, 5, 0, 0};
+
+ memset(&stats, 0, sizeof(stats));
+ stats.ndims = 3;
+ stats.size[0] = 4.0f;
+ stats.size[1] = 5.0f;
+ stats.size[2] = 3.0f;
+
+ /* Three-dimensional index (x=1, y=2, z=0) collapses into 1 + 2 * 4. */
+ CU_ASSERT_EQUAL(nd_stats_value_index(&stats, good_index), 1 + 2 * 4);
+ /* Any request outside the histogram bounds triggers a guard. */
+ CU_ASSERT_EQUAL(nd_stats_value_index(&stats, bad_index), -1);
+
+ /* Regression for #5959: ndims higher than populated sizes still honours guards. */
+ stats.ndims = 4;
+ CU_ASSERT_EQUAL(nd_stats_value_index(&stats, good_index), -1);
+}
+
+static void
+nd_box_ratio_cases(void)
+{
+ ND_BOX covering = make_box(0.0f, 0.0f, 0.0f, 0.0f, 2.0f, 2.0f, 2.0f, 0.0f);
+ ND_BOX interior = make_box(0.5f, 0.5f, 0.5f, 0.0f, 1.5f, 1.5f, 1.5f, 0.0f);
+ ND_BOX partial = make_box(0.0f, 0.0f, 0.0f, 0.0f, 0.5f, 0.5f, 0.5f, 0.0f);
+ ND_BOX target = make_box(0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 1.0f, 0.0f);
+ ND_BOX flat = make_box(0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
+ ND_BOX touch = make_box(2.0f, 0.0f, 0.0f, 0.0f, 3.0f, 1.0f, 1.0f, 0.0f);
+
+ /* Full coverage should evaluate to one regardless of the extra extent. */
+ CU_ASSERT_DOUBLE_EQUAL(nd_box_ratio(&covering, &interior, 3), 1.0, 1e-12);
+ /* A shared octant carries one eighth of the reference volume. */
+ CU_ASSERT_DOUBLE_EQUAL(nd_box_ratio(&partial, &target, 3), 0.125, 1e-12);
+ /* Degenerate slabs have zero volume in three dimensions. */
+ CU_ASSERT_DOUBLE_EQUAL(nd_box_ratio(&covering, &flat, 3), 0.0, 1e-12);
+ /* Boxes that only touch along a face should not count as overlap. */
+ CU_ASSERT_DOUBLE_EQUAL(nd_box_ratio(&covering, &touch, 3), 0.0, 1e-12);
+}
+
+int
+main(void)
+{
+ CU_pSuite suite;
+ unsigned int failures = 0;
+ if (CU_initialize_registry() != CUE_SUCCESS)
+ return CU_get_error();
+
+ suite = CU_add_suite("gserialized_histogram_helpers", NULL, NULL);
+ if (!suite)
+ goto cleanup;
+
+ if (!CU_add_test(suite, "histogram budget clamps", histogram_budget_clamps) ||
+ !CU_add_test(suite, "nd_stats value index guards", nd_stats_indexing_behaviour) ||
+ !CU_add_test(suite, "nd_box ratio edge cases", nd_box_ratio_cases))
+ {
+ goto cleanup;
+ }
+
+ CU_basic_set_mode(CU_BRM_VERBOSE);
+ CU_basic_run_tests();
+
+cleanup:
+ failures = CU_get_number_of_tests_failed();
+ CU_cleanup_registry();
+ return failures == 0 ? CUE_SUCCESS : 1;
+}
diff --git a/postgis/gserialized_estimate.c b/postgis/gserialized_estimate.c
index 24984e9d7..072d60a1c 100644
--- a/postgis/gserialized_estimate.c
+++ b/postgis/gserialized_estimate.c
@@ -19,11 +19,10 @@
**********************************************************************
*
* Copyright 2012 (C) Paul Ramsey <pramsey at cleverelephant.ca>
+ * Copyright 2025 (C) Darafei Praliaskouski <me at komzpa.net>
*
**********************************************************************/
-
-
/**********************************************************************
THEORY OF OPERATION
@@ -112,10 +111,12 @@ dimensionality cases. (2D geometry) &&& (3D column), etc.
#include "stringbuffer.h"
#include "liblwgeom.h"
#include "lwgeodetic.h"
-#include "lwgeom_pg.h" /* For debugging macros. */
+#include "lwgeom_pg.h" /* For debugging macros. */
#include "gserialized_gist.h" /* For index common functions */
+#include "gserialized_estimate_support.h"
#include <math.h>
+#include <limits.h>
#if HAVE_IEEEFP_H
#include <ieeefp.h>
#endif
@@ -144,8 +145,7 @@ Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS);
/* Local prototypes */
static Oid table_get_spatial_index(Oid tbl_oid, int16 attnum, int *key_type, int16 *idx_attnum);
-static GBOX * spatial_index_read_extent(Oid idx_oid, int idx_att_num, int key_type);
-
+static GBOX *spatial_index_read_extent(Oid idx_oid, int idx_att_num, int key_type);
/* Other prototypes */
float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode);
@@ -186,13 +186,6 @@ Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
*/
#define SDFACTOR 3.25
-/**
-* The maximum number of dimensions our code can handle.
-* We'll use this to statically allocate a bunch of
-* arrays below.
-*/
-#define ND_DIMS 4
-
/**
* Minimum width of a dimension that we'll bother trying to
* compute statistics on. Bearing in mind we have no control
@@ -219,68 +212,6 @@ Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
#define FALLBACK_ND_SEL 0.2
#define FALLBACK_ND_JOINSEL 0.3
-/**
-* N-dimensional box type for calculations, to avoid doing
-* explicit axis conversions from GBOX in all calculations
-* at every step.
-*/
-typedef struct ND_BOX_T
-{
- float4 min[ND_DIMS];
- float4 max[ND_DIMS];
-} ND_BOX;
-
-/**
-* N-dimensional box index type
-*/
-typedef struct ND_IBOX_T
-{
- int min[ND_DIMS];
- int max[ND_DIMS];
-} ND_IBOX;
-
-
-/**
-* N-dimensional statistics structure. Well, actually
-* four-dimensional, but set up to handle arbitrary dimensions
-* if necessary (really, we just want to get the 2,3,4-d cases
-* into one shared piece of code).
-*/
-typedef struct ND_STATS_T
-{
- /* Dimensionality of the histogram. */
- float4 ndims;
-
- /* Size of n-d histogram in each dimension. */
- float4 size[ND_DIMS];
-
- /* Lower-left (min) and upper-right (max) spatial bounds of histogram. */
- ND_BOX extent;
-
- /* How many rows in the table itself? */
- float4 table_features;
-
- /* How many rows were in the sample that built this histogram? */
- float4 sample_features;
-
- /* How many not-Null/Empty features were in the sample? */
- float4 not_null_features;
-
- /* How many features actually got sampled in the histogram? */
- float4 histogram_features;
-
- /* How many cells in histogram? (sizex*sizey*sizez*sizem) */
- float4 histogram_cells;
-
- /* How many cells did those histogram features cover? */
- /* Since we are pro-rating coverage, this number should */
- /* now always equal histogram_features */
- float4 cells_covered;
-
- /* Variable length # of floats for histogram */
- float4 value[1];
-} ND_STATS;
-
typedef struct {
/* Saved state from std_typanalyze() */
AnalyzeAttrComputeStatsFunc std_compute_stats;
@@ -318,13 +249,12 @@ text_p_get_mode(const text *txt)
char *modestr;
if (VARSIZE_ANY_EXHDR(txt) <= 0)
return mode;
- modestr = (char*)VARDATA(txt);
- if ( modestr[0] == 'N' )
+ modestr = (char *)VARDATA(txt);
+ if (modestr[0] == 'N')
mode = 0;
return mode;
}
-
/**
* Integer comparison function for qsort
*/
@@ -372,7 +302,7 @@ total_double(const double *vals, int nvals)
int i;
float total = 0;
/* Calculate total */
- for ( i = 0; i < nvals; i++ )
+ for (i = 0; i < nvals; i++)
total += vals[i];
return total;
@@ -425,33 +355,6 @@ stddev(const int *vals, int nvals)
}
#endif /* POSTGIS_DEBUG_LEVEL >= 3 */
-/**
-* Given a position in the n-d histogram (i,j,k) return the
-* position in the 1-d values array.
-*/
-static int
-nd_stats_value_index(const ND_STATS *stats, int *indexes)
-{
- int d;
- int accum = 1, vdx = 0;
-
- /* Calculate the index into the 1-d values array that the (i,j,k,l) */
- /* n-d histogram coordinate implies. */
- /* index = x + y * sizex + z * sizex * sizey + m * sizex * sizey * sizez */
- for ( d = 0; d < (int)(stats->ndims); d++ )
- {
- int size = (int)(stats->size[d]);
- if ( indexes[d] < 0 || indexes[d] >= size )
- {
- POSTGIS_DEBUGF(3, " bad index at (%d, %d)", indexes[0], indexes[1]);
- return -1;
- }
- vdx += indexes[d] * accum;
- accum *= size;
- }
- return vdx;
-}
-
/**
* Convert an #ND_BOX to a JSON string for printing
*/
@@ -722,50 +625,6 @@ nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
return true;
}
-/**
-* Returns the proportion of b2 that is covered by b1.
-*/
-static inline double
-nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
-{
- int d;
- bool covered = true;
- double ivol = 1.0;
- double vol2 = 1.0;
-
- for ( d = 0 ; d < ndims; d++ )
- {
- if ( b1->max[d] <= b2->min[d] || b1->min[d] >= b2->max[d] )
- return 0.0; /* Disjoint */
-
- if ( b1->min[d] > b2->min[d] || b1->max[d] < b2->max[d] )
- covered = false;
- }
-
- if ( covered )
- return 1.0;
-
- for ( d = 0; d < ndims; d++ )
- {
- double width2 = b2->max[d] - b2->min[d];
- double imin, imax, iwidth;
-
- vol2 *= width2;
-
- imin = Max(b1->min[d], b2->min[d]);
- imax = Min(b1->max[d], b2->max[d]);
- iwidth = imax - imin;
- iwidth = Max(0.0, iwidth);
-
- ivol *= iwidth;
- }
-
- if ( vol2 == 0.0 )
- return vol2;
-
- return ivol / vol2;
-}
-
/* How many bins shall we use in figuring out the distribution? */
#define MAX_NUM_BINS 50
#define BIN_MIN_SIZE 10
@@ -894,9 +753,9 @@ nd_increment(ND_IBOX *ibox, int ndims, int *counter)
{
int d = 0;
- while ( d < ndims )
+ while (d < ndims)
{
- if ( counter[d] < ibox->max[d] )
+ if (counter[d] < ibox->max[d])
{
counter[d] += 1;
break;
@@ -905,7 +764,7 @@ nd_increment(ND_IBOX *ibox, int ndims, int *counter)
d++;
}
/* That's it, cannot increment any more! */
- if ( d == ndims )
+ if (d == ndims)
return false;
/* Increment complete! */
@@ -1321,9 +1180,9 @@ gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, i
PG_FUNCTION_INFO_V1(gserialized_gist_joinsel);
Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ PlannerInfo *root = (PlannerInfo *)PG_GETARG_POINTER(0);
/* Oid operator = PG_GETARG_OID(1); */
- List *args = (List *) PG_GETARG_POINTER(2);
+ List *args = (List *)PG_GETARG_POINTER(2);
JoinType jointype = (JoinType) PG_GETARG_INT16(3);
int mode = PG_GETARG_INT32(4);
@@ -1512,22 +1371,13 @@ compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfu
#endif
}
- /*
- * We'll build a histogram having stats->attr->attstattarget
- * (default 100) cells on each side, within reason...
- * we'll use ndims*100000 as the maximum number of cells.
- * Also, if we're sampling a relatively small table, we'll try to ensure that
- * we have a smaller grid.
- */
#if POSTGIS_PGSQL_VERSION >= 170
- histo_cells_target = (int)pow((double)(stats->attstattarget), (double)ndims);
POSTGIS_DEBUGF(3, " stats->attstattarget: %d", stats->attstattarget);
+ histo_cells_target = histogram_cell_budget(total_rows, ndims, stats->attstattarget);
#else
- histo_cells_target = (int)pow((double)(stats->attr->attstattarget), (double)ndims);
POSTGIS_DEBUGF(3, " stats->attr->attstattarget: %d", stats->attr->attstattarget);
+ histo_cells_target = histogram_cell_budget(total_rows, ndims, stats->attr->attstattarget);
#endif
- histo_cells_target = Min(histo_cells_target, ndims * 100000);
- histo_cells_target = Min(histo_cells_target, (int)(10 * ndims * total_rows));
POSTGIS_DEBUGF(3, " target # of histogram cells: %d", histo_cells_target);
/* If there's no useful features, we can't work out stats */
@@ -1836,8 +1686,6 @@ compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfu
return;
}
-
-
/**
* In order to do useful selectivity calculations in both 2-D and N-D
* modes, we actually have to generate two stats objects, one for 2-D
@@ -1875,7 +1723,6 @@ compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
}
}
-
/**
* This function will be called when the ANALYZE command is run
* on a column of the "geometry" or "geography" type.
diff --git a/postgis/gserialized_estimate_support.h b/postgis/gserialized_estimate_support.h
new file mode 100644
index 000000000..0d3a23d75
--- /dev/null
+++ b/postgis/gserialized_estimate_support.h
@@ -0,0 +1,197 @@
+/**********************************************************************
+ *
+ * PostGIS - Spatial Types for PostgreSQL
+ * http://postgis.net
+ *
+ * This file is part of PostGIS
+ *
+ * PostGIS is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * PostGIS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
+ *
+ **********************************************************************
+ *
+ * Internal helpers shared between the gserialized selectivity
+ * implementation and the unit tests.
+ *
+ * Keeping the routines header-only ensures the planner code and the
+ * harness evaluate the exact same floating-point flows without the
+ * cross-object plumbing that previously complicated maintenance.
+ * Nothing here is installed; the header is meant for
+ * gserialized_estimate.c and for the dedicated CUnit suite only.
+ *
+ **********************************************************************
+ *
+ * Copyright 2012 (C) Paul Ramsey <pramsey at cleverelephant.ca>
+ * Copyright 2025 (C) Darafei Praliaskouski <me at komzpa.net>
+ *
+ **********************************************************************/
+
+#ifndef POSTGIS_GSERIALIZED_ESTIMATE_SUPPORT_H
+#define POSTGIS_GSERIALIZED_ESTIMATE_SUPPORT_H
+
+#include "postgres.h"
+
+#include <limits.h>
+#include <math.h>
+
+/* The maximum number of dimensions our statistics code supports. */
+#define ND_DIMS 4
+
+/* Lightweight n-dimensional box representation for selectivity math. */
+typedef struct ND_BOX_T {
+ float4 min[ND_DIMS];
+ float4 max[ND_DIMS];
+} ND_BOX;
+
+/* Integer counterpart used for histogram cell iteration. */
+typedef struct ND_IBOX_T {
+ int min[ND_DIMS];
+ int max[ND_DIMS];
+} ND_IBOX;
+
+/* On-disk representation of the histogram emitted by ANALYZE. */
+typedef struct ND_STATS_T {
+ float4 ndims;
+ float4 size[ND_DIMS];
+ ND_BOX extent;
+ float4 table_features;
+ float4 sample_features;
+ float4 not_null_features;
+ float4 histogram_features;
+ float4 histogram_cells;
+ float4 cells_covered;
+ float4 value[1];
+} ND_STATS;
+
+/*
+ * Return the flattened index for the histogram coordinate expressed by
+ * 'indexes'. A negative result signals that one of the axes fell outside
+ * the histogram definition.
+ */
+static inline int
+nd_stats_value_index(const ND_STATS *stats, const int *indexes)
+{
+ int d;
+ int accum = 1;
+ int vdx = 0;
+
+ for (d = 0; d < (int)(stats->ndims); d++)
+ {
+ int size = (int)(stats->size[d]);
+ if (indexes[d] < 0 || indexes[d] >= size)
+ return -1;
+ vdx += indexes[d] * accum;
+ accum *= size;
+ }
+ return vdx;
+}
+
+/*
+ * Derive the histogram grid budget requested by PostgreSQL's ANALYZE machinery.
+ * The planner caps the cell count via three heuristics that take the requested
+ * attstattarget, the histogram dimensionality, and the underlying row count
+ * into account. Double precision arithmetic keeps the intermediate products in
+ * range so the cap behaves consistently across build architectures.
+ */
+static inline int
+histogram_cell_budget(double total_rows, int ndims, int attstattarget)
+{
+ double budget;
+ double dims_cap;
+ double rows_cap;
+ double attstat;
+ double dims;
+
+ if (ndims <= 0)
+ return 0;
+
+ if (attstattarget <= 0)
+ attstattarget = 1;
+
+ /* Requested resolution coming from PostgreSQL's ANALYZE knob. */
+ attstat = (double)attstattarget;
+ dims = (double)ndims;
+ budget = pow(attstat, dims);
+
+ /* Hard ceiling that keeps the statistics collector responsive. */
+ dims_cap = (double)ndims * 100000.0;
+ if (budget > dims_cap)
+ budget = dims_cap;
+
+ /* Small relations do not need a histogram that dwarfs the sample. */
+ if (total_rows <= 0.0)
+ return 0;
+
+ rows_cap = 10.0 * (double)ndims * total_rows;
+ if (rows_cap < 0.0)
+ rows_cap = 0.0;
+
+ /* Keep intermediate computations in double precision before clamping. */
+ if (rows_cap > (double)INT_MAX)
+ rows_cap = (double)INT_MAX;
+
+ if (budget > rows_cap)
+ budget = rows_cap;
+
+ if (budget >= (double)INT_MAX)
+ return INT_MAX;
+ if (budget <= 0.0)
+ return 0;
+
+ return (int)budget;
+}
+
+/*
+ * Compute the portion of 'target' covered by 'cover'. The caller supplies the
+ * dimensionality because ND_BOX always carries four slots. Degenerate volumes
+ * fold to zero, allowing the callers to detect slabs that ANALYZE sometimes
+ * emits for skewed datasets.
+ */
+static inline double
+nd_box_ratio(const ND_BOX *cover, const ND_BOX *target, int ndims)
+{
+ int d;
+ bool fully_covered = true;
+ double ivol = 1.0;
+ double refvol = 1.0;
+
+ for (d = 0; d < ndims; d++)
+ {
+ if (cover->max[d] <= target->min[d] || cover->min[d] >= target->max[d])
+ return 0.0; /* Disjoint */
+
+ if (cover->min[d] > target->min[d] || cover->max[d] < target->max[d])
+ fully_covered = false;
+ }
+
+ if (fully_covered)
+ return 1.0;
+
+ for (d = 0; d < ndims; d++)
+ {
+ double width = target->max[d] - target->min[d];
+ double imin = Max(cover->min[d], target->min[d]);
+ double imax = Min(cover->max[d], target->max[d]);
+ double iwidth = Max(0.0, imax - imin);
+
+ refvol *= width;
+ ivol *= iwidth;
+ }
+
+ if (refvol == 0.0)
+ return refvol;
+
+ return ivol / refvol;
+}
+
+#endif /* POSTGIS_GSERIALIZED_ESTIMATE_SUPPORT_H */
-----------------------------------------------------------------------
Summary of changes:
NEWS | 3 +-
configure.ac | 1 +
postgis/cunit/Makefile.in | 43 ++++++
postgis/cunit/cu_tester.c | 173 ++++++++++++++++++++++++
postgis/gserialized_estimate.c | 192 +++-----------------------
postgis/gserialized_estimate_support.h | 237 +++++++++++++++++++++++++++++++++
6 files changed, 474 insertions(+), 175 deletions(-)
create mode 100644 postgis/cunit/Makefile.in
create mode 100644 postgis/cunit/cu_tester.c
create mode 100644 postgis/gserialized_estimate_support.h
hooks/post-receive
--
PostGIS
More information about the postgis-tickets
mailing list