[SCM] PostGIS branch stable-3.6 updated. 3.6.0-27-g21517f662
git at osgeo.org
git at osgeo.org
Fri Nov 7 10:35:17 PST 2025
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "PostGIS".
The branch, stable-3.6 has been updated
via 21517f662368c90bbee0dfdebff3cb82059467fd (commit)
via a518412e516d4c7b216fd2f2797b20495d095e44 (commit)
via b950728171f5d26fc3bbdd2f1cdf023cbffb0169 (commit)
from 58b4e2571588b4aace175431f2ff33445c230ee4 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit 21517f662368c90bbee0dfdebff3cb82059467fd
Author: Darafei Praliaskouski <me at komzpa.net>
Date: Fri Nov 7 22:34:25 2025 +0400
Touch up NEWS
Closes #5984
diff --git a/NEWS b/NEWS
index b7fc26db9..7a06e124f 100644
--- a/NEWS
+++ b/NEWS
@@ -14,10 +14,7 @@ PostGIS 3.6.1
by extension are owned by extension
authored: Andrey Borodin (Yandex), reported by Sergey Bobrov (Kaspersky)
- #5754, ST_ForcePolygonCCW reverses lines (Paul Ramsey)
-
-* Bug Fixes *
-
- - #5959, Prevent histogram target overflow when analysing massive tables (Darafei Praliaskouski)
+ - #5959, #5984, Prevent histogram target overflow when analysing massive tables (Darafei Praliaskouski)
PostGIS 3.6.0
commit a518412e516d4c7b216fd2f2797b20495d095e44
Author: Darafei Praliaskouski <me at komzpa.net>
Date: Thu Oct 30 03:46:26 2025 +0400
Guard against histogram axis dimension underflow
References #5959
References #5984
diff --git a/postgis/cunit/cu_tester.c b/postgis/cunit/cu_tester.c
index b4dd46aa7..eb6ba5b0d 100644
--- a/postgis/cunit/cu_tester.c
+++ b/postgis/cunit/cu_tester.c
@@ -28,6 +28,7 @@
#include <CUnit/Basic.h>
#include <limits.h>
+#include <math.h>
#include <string.h>
#include "../gserialized_estimate_support.h"
@@ -82,6 +83,23 @@ histogram_budget_clamps(void)
CU_ASSERT_EQUAL(histogram_cell_budget((double)INT_MAX, 50000, INT_MAX), INT_MAX);
}
+static void
+histogram_axis_allocation_guards(void)
+{
+ /* Baseline: evenly split a 10k target over two varying dimensions. */
+ CU_ASSERT_EQUAL(histogram_axis_cells(10000, 2, 0.5), 100);
+
+ /* Skewed axis ratios that collapse to tiny powers still return one cell. */
+ CU_ASSERT_EQUAL(histogram_axis_cells(10000, 2, 1e-9), 1);
+
+ /* Denormals, NaNs and negative ratios should not leak to the histogram. */
+ CU_ASSERT_EQUAL(histogram_axis_cells(10000, 2, NAN), 1);
+ CU_ASSERT_EQUAL(histogram_axis_cells(10000, 2, -0.5), 1);
+
+ /* Extremely aggressive ratios remain bounded by the square root of the budget. */
+ CU_ASSERT_EQUAL(histogram_axis_cells(INT_MAX, 2, 1.0), (int)sqrt((double)INT_MAX * 2.0));
+}
+
static void
nd_stats_indexing_behaviour(void)
{
@@ -138,6 +156,7 @@ main(void)
goto cleanup;
if (!CU_add_test(suite, "histogram budget clamps", histogram_budget_clamps) ||
+ !CU_add_test(suite, "histogram axis guards", histogram_axis_allocation_guards) ||
!CU_add_test(suite, "nd_stats value index guards", nd_stats_indexing_behaviour) ||
!CU_add_test(suite, "nd_box ratio edge cases", nd_box_ratio_cases))
{
diff --git a/postgis/gserialized_estimate.c b/postgis/gserialized_estimate.c
index ea887ecb6..54adae679 100644
--- a/postgis/gserialized_estimate.c
+++ b/postgis/gserialized_estimate.c
@@ -1516,11 +1516,10 @@ compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfu
* Scale the target cells number by the # of dims and ratio,
* then take the appropriate root to get the estimated number of cells
* on this axis (eg, pow(0.5) for 2d, pow(0.333) for 3d, pow(0.25) for 4d)
- */
- histo_size[d] = (int)pow((double)histo_cells_target * histo_ndims * edge_ratio, 1/(double)histo_ndims);
- /* If something goes awry, just give this dim one slot */
- if ( ! histo_size[d] )
- histo_size[d] = 1;
+ * The dedicated helper clamps pathological floating point inputs so we
+ * do not resurrect the NaN propagation reported in #5959 on amd64.
+ */
+ histo_size[d] = histogram_axis_cells(histo_cells_target, histo_ndims, edge_ratio);
}
histo_cells_new *= histo_size[d];
}
diff --git a/postgis/gserialized_estimate_support.h b/postgis/gserialized_estimate_support.h
index 0d3a23d75..6b372a43e 100644
--- a/postgis/gserialized_estimate_support.h
+++ b/postgis/gserialized_estimate_support.h
@@ -151,6 +151,46 @@ histogram_cell_budget(double total_rows, int ndims, int attstattarget)
return (int)budget;
}
+/*
+ * Allocate histogram buckets along a single axis in proportion to the observed
+ * density variation. The caller passes in the global histogram target along
+ * with the number of axes that exhibited variation in the sampled data and the
+ * relative contribution of the current axis (edge_ratio). Earlier versions
+ * evaluated the pow() call directly in the caller, which exposed the planner to
+ * NaN propagation on some amd64 builds when the ratio was denormal or negative
+ * (see #5959). Keeping the calculation in one place allows us to clamp the
+ * inputs and provide a predictable fallback for problematic floating point
+ * combinations.
+ */
+static inline int
+histogram_axis_cells(int histo_cells_target, int histo_ndims, double edge_ratio)
+{
+ double scaled;
+ double axis_cells;
+
+ if (histo_cells_target <= 0 || histo_ndims <= 0)
+ return 1;
+
+ if (!(edge_ratio > 0.0) || !isfinite(edge_ratio))
+ return 1;
+
+ scaled = (double)histo_cells_target * (double)histo_ndims * edge_ratio;
+ if (!(scaled > 0.0) || !isfinite(scaled))
+ return 1;
+
+ axis_cells = pow(scaled, 1.0 / (double)histo_ndims);
+ if (!(axis_cells > 0.0) || !isfinite(axis_cells))
+ return 1;
+
+ if (axis_cells >= (double)INT_MAX)
+ return INT_MAX;
+
+ if (axis_cells <= 1.0)
+ return 1;
+
+ return (int)axis_cells;
+}
+
/*
* Compute the portion of 'target' covered by 'cover'. The caller supplies the
* dimensionality because ND_BOX always carries four slots. Degenerate volumes
commit b950728171f5d26fc3bbdd2f1cdf023cbffb0169
Author: Darafei Praliaskouski <me at komzpa.net>
Date: Thu Oct 30 02:55:59 2025 +0400
Prevent histogram target overflow when analysing massive tables
Add CUnit tests for overflow scenarios
Closes #5959
diff --git a/NEWS b/NEWS
index 4dec87f2a..b7fc26db9 100644
--- a/NEWS
+++ b/NEWS
@@ -15,6 +15,10 @@ PostGIS 3.6.1
authored: Andrey Borodin (Yandex), reported by Sergey Bobrov (Kaspersky)
- #5754, ST_ForcePolygonCCW reverses lines (Paul Ramsey)
+* Bug Fixes *
+
+ - #5959, Prevent histogram target overflow when analysing massive tables (Darafei Praliaskouski)
+
PostGIS 3.6.0
2025/09/01
diff --git a/configure.ac b/configure.ac
index dbd4f59c3..12e26a06b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1927,6 +1927,7 @@ AC_CONFIG_FILES([GNUmakefile
libpgcommon/Makefile
libpgcommon/cunit/Makefile
postgis/Makefile
+ postgis/cunit/Makefile
postgis/sqldefines.h
sfcgal/Makefile
$SFCGAL_MAKEFILE_LIST
diff --git a/postgis/cunit/Makefile.in b/postgis/cunit/Makefile.in
new file mode 100644
index 000000000..483e4ca10
--- /dev/null
+++ b/postgis/cunit/Makefile.in
@@ -0,0 +1,43 @@
+# **********************************************************************
+# *
+# * PostGIS - Spatial Types for PostgreSQL
+# * http://postgis.net
+# *
+# * Copyright 2025 Darafei Praliaskouski <me at komzpa.net>
+# *
+# * This is free software; you can redistribute and/or modify it under
+# * the terms of the GNU General Public Licence. See the COPYING file.
+# *
+# **********************************************************************
+
+srcdir = @srcdir@
+top_builddir = @top_builddir@
+
+CC=@CC@
+LIBTOOL=@LIBTOOL@
+CFLAGS = @CFLAGS@ @CPPFLAGS@ @PGSQL_BE_CPPFLAGS@ @CUNIT_CPPFLAGS@ -I.. -I$(top_builddir) -I at top_srcdir@/liblwgeom -I at top_builddir@/liblwgeom -I at top_srcdir@/libpgcommon -I at top_builddir@/libpgcommon
+LDFLAGS = @CUNIT_LDFLAGS@ -lm
+
+VPATH = $(srcdir)
+
+OBJS = cu_tester.o
+
+# Build the standalone histogram helper tester.
+all: cu_tester
+
+# Execute the suite directly; no installation step is required.
+check: all
+ $(LIBTOOL) --mode=execute ./cu_tester
+
+# Link the tester with libtool; all helper code is header-only.
+cu_tester: $(OBJS)
+ $(LIBTOOL) --mode=link $(CC) $(CFLAGS) -o $@ $(OBJS) $(LDFLAGS)
+
+%.o: %.c
+ $(CC) $(CFLAGS) -c -o $@ $<
+
+clean:
+ rm -f $(OBJS) cu_tester
+
+clobber distclean: clean
+ rm -f Makefile
diff --git a/postgis/cunit/cu_tester.c b/postgis/cunit/cu_tester.c
new file mode 100644
index 000000000..b4dd46aa7
--- /dev/null
+++ b/postgis/cunit/cu_tester.c
@@ -0,0 +1,154 @@
+/**********************************************************************
+ *
+ * PostGIS - Spatial Types for PostgreSQL
+ * http://postgis.net
+ *
+ * This file is part of PostGIS
+ *
+ * PostGIS is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * PostGIS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
+ *
+ **********************************************************************
+ *
+ * Copyright 2025 (C) Darafei Praliaskouski <me at komzpa.net>
+ *
+ **********************************************************************/
+
+#include "postgres.h"
+
+#include <CUnit/Basic.h>
+#include <limits.h>
+#include <string.h>
+
+#include "../gserialized_estimate_support.h"
+
+static ND_BOX
+make_box(float minx, float miny, float minz, float minm, float maxx, float maxy, float maxz, float maxm)
+{
+ ND_BOX box;
+
+ memset(&box, 0, sizeof(box));
+ box.min[0] = minx;
+ box.min[1] = miny;
+ box.min[2] = minz;
+ box.min[3] = minm;
+ box.max[0] = maxx;
+ box.max[1] = maxy;
+ box.max[2] = maxz;
+ box.max[3] = maxm;
+ return box;
+}
+
+static void
+histogram_budget_clamps(void)
+{
+ /* Zero or negative row counts disable histogram construction. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(0.0, 2, 100), 0);
+ CU_ASSERT_EQUAL(histogram_cell_budget(-1.0, 4, 100), 0);
+
+ /* Degenerate dimensionality cannot allocate histogram space. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(1000.0, 0, 100), 0);
+
+ /* Matches the classic pow(attstattarget, ndims) path. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(1e6, 2, 100), 10000);
+ CU_ASSERT_EQUAL(histogram_cell_budget(1e6, 3, 50), 125000);
+
+ /* attstattarget^ndims exceeds ndims * 100000 and must be clamped. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(1e6, 4, 50), 400000);
+
+ /* attstattarget<=0 is normalised to the smallest viable target. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(1e6, 2, 0), 1);
+
+ /* Row clamp shrinks the grid for small relations. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(1.0, 2, 100), 20);
+
+ /* Large tables now preserve the dimensional cap instead of overflowing. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(1.5e8, 2, 100), 10000);
+
+ /* Regression for #5984: huge attstat targets stabilise instead of wrapping. */
+ CU_ASSERT_EQUAL(histogram_cell_budget(5e6, 2, 10000), 200000);
+
+ /* Trigger the INT_MAX guard once both other caps exceed it. */
+ CU_ASSERT_EQUAL(histogram_cell_budget((double)INT_MAX, 50000, INT_MAX), INT_MAX);
+}
+
+static void
+nd_stats_indexing_behaviour(void)
+{
+ ND_STATS stats;
+ const int good_index[ND_DIMS] = {1, 2, 0, 0};
+ const int bad_index[ND_DIMS] = {1, 5, 0, 0};
+
+ memset(&stats, 0, sizeof(stats));
+ stats.ndims = 3;
+ stats.size[0] = 4.0f;
+ stats.size[1] = 5.0f;
+ stats.size[2] = 3.0f;
+
+ /* Three-dimensional index (x=1, y=2, z=0) collapses into 1 + 2 * 4. */
+ CU_ASSERT_EQUAL(nd_stats_value_index(&stats, good_index), 1 + 2 * 4);
+ /* Any request outside the histogram bounds triggers a guard. */
+ CU_ASSERT_EQUAL(nd_stats_value_index(&stats, bad_index), -1);
+
+ /* Regression for #5959: ndims higher than populated sizes still honours guards. */
+ stats.ndims = 4;
+ CU_ASSERT_EQUAL(nd_stats_value_index(&stats, good_index), -1);
+}
+
+static void
+nd_box_ratio_cases(void)
+{
+ ND_BOX covering = make_box(0.0f, 0.0f, 0.0f, 0.0f, 2.0f, 2.0f, 2.0f, 0.0f);
+ ND_BOX interior = make_box(0.5f, 0.5f, 0.5f, 0.0f, 1.5f, 1.5f, 1.5f, 0.0f);
+ ND_BOX partial = make_box(0.0f, 0.0f, 0.0f, 0.0f, 0.5f, 0.5f, 0.5f, 0.0f);
+ ND_BOX target = make_box(0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 1.0f, 0.0f);
+ ND_BOX flat = make_box(0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
+ ND_BOX touch = make_box(2.0f, 0.0f, 0.0f, 0.0f, 3.0f, 1.0f, 1.0f, 0.0f);
+
+ /* Full coverage should evaluate to one regardless of the extra extent. */
+ CU_ASSERT_DOUBLE_EQUAL(nd_box_ratio(&covering, &interior, 3), 1.0, 1e-12);
+ /* A shared octant carries one eighth of the reference volume. */
+ CU_ASSERT_DOUBLE_EQUAL(nd_box_ratio(&partial, &target, 3), 0.125, 1e-12);
+ /* Degenerate slabs have zero volume in three dimensions. */
+ CU_ASSERT_DOUBLE_EQUAL(nd_box_ratio(&covering, &flat, 3), 0.0, 1e-12);
+ /* Boxes that only touch along a face should not count as overlap. */
+ CU_ASSERT_DOUBLE_EQUAL(nd_box_ratio(&covering, &touch, 3), 0.0, 1e-12);
+}
+
+int
+main(void)
+{
+ CU_pSuite suite;
+ unsigned int failures = 0;
+ if (CU_initialize_registry() != CUE_SUCCESS)
+ return CU_get_error();
+
+ suite = CU_add_suite("gserialized_histogram_helpers", NULL, NULL);
+ if (!suite)
+ goto cleanup;
+
+ if (!CU_add_test(suite, "histogram budget clamps", histogram_budget_clamps) ||
+ !CU_add_test(suite, "nd_stats value index guards", nd_stats_indexing_behaviour) ||
+ !CU_add_test(suite, "nd_box ratio edge cases", nd_box_ratio_cases))
+ {
+ goto cleanup;
+ }
+
+ CU_basic_set_mode(CU_BRM_VERBOSE);
+ CU_basic_run_tests();
+
+cleanup:
+ failures = CU_get_number_of_tests_failed();
+ CU_cleanup_registry();
+ return failures == 0 ? CUE_SUCCESS : 1;
+}
diff --git a/postgis/gserialized_estimate.c b/postgis/gserialized_estimate.c
index 1e84228b0..ea887ecb6 100644
--- a/postgis/gserialized_estimate.c
+++ b/postgis/gserialized_estimate.c
@@ -19,11 +19,10 @@
**********************************************************************
*
* Copyright 2012 (C) Paul Ramsey <pramsey at cleverelephant.ca>
+ * Copyright 2025 (C) Darafei Praliaskouski <me at komzpa.net>
*
**********************************************************************/
-
-
/**********************************************************************
THEORY OF OPERATION
@@ -112,10 +111,12 @@ dimensionality cases. (2D geometry) &&& (3D column), etc.
#include "stringbuffer.h"
#include "liblwgeom.h"
#include "lwgeodetic.h"
-#include "lwgeom_pg.h" /* For debugging macros. */
+#include "lwgeom_pg.h" /* For debugging macros. */
#include "gserialized_gist.h" /* For index common functions */
+#include "gserialized_estimate_support.h"
#include <math.h>
+#include <limits.h>
#if HAVE_IEEEFP_H
#include <ieeefp.h>
#endif
@@ -144,8 +145,7 @@ Datum _postgis_gserialized_stats(PG_FUNCTION_ARGS);
/* Local prototypes */
static Oid table_get_spatial_index(Oid tbl_oid, int16 attnum, int *key_type, int16 *idx_attnum);
-static GBOX * spatial_index_read_extent(Oid idx_oid, int idx_att_num, int key_type);
-
+static GBOX *spatial_index_read_extent(Oid idx_oid, int idx_att_num, int key_type);
/* Other prototypes */
float8 gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, int mode);
@@ -186,13 +186,6 @@ Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
*/
#define SDFACTOR 3.25
-/**
-* The maximum number of dimensions our code can handle.
-* We'll use this to statically allocate a bunch of
-* arrays below.
-*/
-#define ND_DIMS 4
-
/**
* Minimum width of a dimension that we'll bother trying to
* compute statistics on. Bearing in mind we have no control
@@ -219,68 +212,6 @@ Datum geometry_estimated_extent(PG_FUNCTION_ARGS);
#define FALLBACK_ND_SEL 0.2
#define FALLBACK_ND_JOINSEL 0.3
-/**
-* N-dimensional box type for calculations, to avoid doing
-* explicit axis conversions from GBOX in all calculations
-* at every step.
-*/
-typedef struct ND_BOX_T
-{
- float4 min[ND_DIMS];
- float4 max[ND_DIMS];
-} ND_BOX;
-
-/**
-* N-dimensional box index type
-*/
-typedef struct ND_IBOX_T
-{
- int min[ND_DIMS];
- int max[ND_DIMS];
-} ND_IBOX;
-
-
-/**
-* N-dimensional statistics structure. Well, actually
-* four-dimensional, but set up to handle arbitrary dimensions
-* if necessary (really, we just want to get the 2,3,4-d cases
-* into one shared piece of code).
-*/
-typedef struct ND_STATS_T
-{
- /* Dimensionality of the histogram. */
- float4 ndims;
-
- /* Size of n-d histogram in each dimension. */
- float4 size[ND_DIMS];
-
- /* Lower-left (min) and upper-right (max) spatial bounds of histogram. */
- ND_BOX extent;
-
- /* How many rows in the table itself? */
- float4 table_features;
-
- /* How many rows were in the sample that built this histogram? */
- float4 sample_features;
-
- /* How many not-Null/Empty features were in the sample? */
- float4 not_null_features;
-
- /* How many features actually got sampled in the histogram? */
- float4 histogram_features;
-
- /* How many cells in histogram? (sizex*sizey*sizez*sizem) */
- float4 histogram_cells;
-
- /* How many cells did those histogram features cover? */
- /* Since we are pro-rating coverage, this number should */
- /* now always equal histogram_features */
- float4 cells_covered;
-
- /* Variable length # of floats for histogram */
- float4 value[1];
-} ND_STATS;
-
typedef struct {
/* Saved state from std_typanalyze() */
AnalyzeAttrComputeStatsFunc std_compute_stats;
@@ -318,13 +249,12 @@ text_p_get_mode(const text *txt)
char *modestr;
if (VARSIZE_ANY_EXHDR(txt) <= 0)
return mode;
- modestr = (char*)VARDATA(txt);
- if ( modestr[0] == 'N' )
+ modestr = (char *)VARDATA(txt);
+ if (modestr[0] == 'N')
mode = 0;
return mode;
}
-
/**
* Integer comparison function for qsort
*/
@@ -372,7 +302,7 @@ total_double(const double *vals, int nvals)
int i;
float total = 0;
/* Calculate total */
- for ( i = 0; i < nvals; i++ )
+ for (i = 0; i < nvals; i++)
total += vals[i];
return total;
@@ -425,33 +355,6 @@ stddev(const int *vals, int nvals)
}
#endif /* POSTGIS_DEBUG_LEVEL >= 3 */
-/**
-* Given a position in the n-d histogram (i,j,k) return the
-* position in the 1-d values array.
-*/
-static int
-nd_stats_value_index(const ND_STATS *stats, int *indexes)
-{
- int d;
- int accum = 1, vdx = 0;
-
- /* Calculate the index into the 1-d values array that the (i,j,k,l) */
- /* n-d histogram coordinate implies. */
- /* index = x + y * sizex + z * sizex * sizey + m * sizex * sizey * sizez */
- for ( d = 0; d < (int)(stats->ndims); d++ )
- {
- int size = (int)(stats->size[d]);
- if ( indexes[d] < 0 || indexes[d] >= size )
- {
- POSTGIS_DEBUGF(3, " bad index at (%d, %d)", indexes[0], indexes[1]);
- return -1;
- }
- vdx += indexes[d] * accum;
- accum *= size;
- }
- return vdx;
-}
-
/**
* Convert an #ND_BOX to a JSON string for printing
*/
@@ -722,50 +625,6 @@ nd_box_overlap(const ND_STATS *nd_stats, const ND_BOX *nd_box, ND_IBOX *nd_ibox)
return true;
}
-/**
-* Returns the proportion of b2 that is covered by b1.
-*/
-static inline double
-nd_box_ratio(const ND_BOX *b1, const ND_BOX *b2, int ndims)
-{
- int d;
- bool covered = true;
- double ivol = 1.0;
- double vol2 = 1.0;
-
- for ( d = 0 ; d < ndims; d++ )
- {
- if ( b1->max[d] <= b2->min[d] || b1->min[d] >= b2->max[d] )
- return 0.0; /* Disjoint */
-
- if ( b1->min[d] > b2->min[d] || b1->max[d] < b2->max[d] )
- covered = false;
- }
-
- if ( covered )
- return 1.0;
-
- for ( d = 0; d < ndims; d++ )
- {
- double width2 = b2->max[d] - b2->min[d];
- double imin, imax, iwidth;
-
- vol2 *= width2;
-
- imin = Max(b1->min[d], b2->min[d]);
- imax = Min(b1->max[d], b2->max[d]);
- iwidth = imax - imin;
- iwidth = Max(0.0, iwidth);
-
- ivol *= iwidth;
- }
-
- if ( vol2 == 0.0 )
- return vol2;
-
- return ivol / vol2;
-}
-
/* How many bins shall we use in figuring out the distribution? */
#define MAX_NUM_BINS 50
#define BIN_MIN_SIZE 10
@@ -894,9 +753,9 @@ nd_increment(ND_IBOX *ibox, int ndims, int *counter)
{
int d = 0;
- while ( d < ndims )
+ while (d < ndims)
{
- if ( counter[d] < ibox->max[d] )
+ if (counter[d] < ibox->max[d])
{
counter[d] += 1;
break;
@@ -905,7 +764,7 @@ nd_increment(ND_IBOX *ibox, int ndims, int *counter)
d++;
}
/* That's it, cannot increment any more! */
- if ( d == ndims )
+ if (d == ndims)
return false;
/* Increment complete! */
@@ -1321,9 +1180,9 @@ gserialized_joinsel_internal(PlannerInfo *root, List *args, JoinType jointype, i
PG_FUNCTION_INFO_V1(gserialized_gist_joinsel);
Datum gserialized_gist_joinsel(PG_FUNCTION_ARGS)
{
- PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ PlannerInfo *root = (PlannerInfo *)PG_GETARG_POINTER(0);
/* Oid operator = PG_GETARG_OID(1); */
- List *args = (List *) PG_GETARG_POINTER(2);
+ List *args = (List *)PG_GETARG_POINTER(2);
JoinType jointype = (JoinType) PG_GETARG_INT16(3);
int mode = PG_GETARG_INT32(4);
@@ -1512,22 +1371,13 @@ compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfu
#endif
}
- /*
- * We'll build a histogram having stats->attr->attstattarget
- * (default 100) cells on each side, within reason...
- * we'll use ndims*100000 as the maximum number of cells.
- * Also, if we're sampling a relatively small table, we'll try to ensure that
- * we have a smaller grid.
- */
#if POSTGIS_PGSQL_VERSION >= 170
- histo_cells_target = (int)pow((double)(stats->attstattarget), (double)ndims);
POSTGIS_DEBUGF(3, " stats->attstattarget: %d", stats->attstattarget);
+ histo_cells_target = histogram_cell_budget(total_rows, ndims, stats->attstattarget);
#else
- histo_cells_target = (int)pow((double)(stats->attr->attstattarget), (double)ndims);
POSTGIS_DEBUGF(3, " stats->attr->attstattarget: %d", stats->attr->attstattarget);
+ histo_cells_target = histogram_cell_budget(total_rows, ndims, stats->attr->attstattarget);
#endif
- histo_cells_target = Min(histo_cells_target, ndims * 100000);
- histo_cells_target = Min(histo_cells_target, (int)(10 * ndims * total_rows));
POSTGIS_DEBUGF(3, " target # of histogram cells: %d", histo_cells_target);
/* If there's no useful features, we can't work out stats */
@@ -1836,8 +1686,6 @@ compute_gserialized_stats_mode(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfu
return;
}
-
-
/**
* In order to do useful selectivity calculations in both 2-D and N-D
* modes, we actually have to generate two stats objects, one for 2-D
@@ -1875,7 +1723,6 @@ compute_gserialized_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
}
}
-
/**
* This function will be called when the ANALYZE command is run
* on a column of the "geometry" or "geography" type.
diff --git a/postgis/gserialized_estimate_support.h b/postgis/gserialized_estimate_support.h
new file mode 100644
index 000000000..0d3a23d75
--- /dev/null
+++ b/postgis/gserialized_estimate_support.h
@@ -0,0 +1,197 @@
+/**********************************************************************
+ *
+ * PostGIS - Spatial Types for PostgreSQL
+ * http://postgis.net
+ *
+ * This file is part of PostGIS
+ *
+ * PostGIS is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * PostGIS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PostGIS. If not, see <http://www.gnu.org/licenses/>.
+ *
+ **********************************************************************
+ *
+ * Internal helpers shared between the gserialized selectivity
+ * implementation and the unit tests.
+ *
+ * Keeping the routines header-only ensures the planner code and the
+ * harness evaluate the exact same floating-point flows without the
+ * cross-object plumbing that previously complicated maintenance.
+ * Nothing here is installed; the header is meant for
+ * gserialized_estimate.c and for the dedicated CUnit suite only.
+ *
+ **********************************************************************
+ *
+ * Copyright 2012 (C) Paul Ramsey <pramsey at cleverelephant.ca>
+ * Copyright 2025 (C) Darafei Praliaskouski <me at komzpa.net>
+ *
+ **********************************************************************/
+
+#ifndef POSTGIS_GSERIALIZED_ESTIMATE_SUPPORT_H
+#define POSTGIS_GSERIALIZED_ESTIMATE_SUPPORT_H
+
+#include "postgres.h"
+
+#include <limits.h>
+#include <math.h>
+
+/* The maximum number of dimensions our statistics code supports. */
+#define ND_DIMS 4
+
+/* Lightweight n-dimensional box representation for selectivity math. */
+typedef struct ND_BOX_T {
+ float4 min[ND_DIMS];
+ float4 max[ND_DIMS];
+} ND_BOX;
+
+/* Integer counterpart used for histogram cell iteration. */
+typedef struct ND_IBOX_T {
+ int min[ND_DIMS];
+ int max[ND_DIMS];
+} ND_IBOX;
+
+/* On-disk representation of the histogram emitted by ANALYZE. */
+typedef struct ND_STATS_T {
+ float4 ndims;
+ float4 size[ND_DIMS];
+ ND_BOX extent;
+ float4 table_features;
+ float4 sample_features;
+ float4 not_null_features;
+ float4 histogram_features;
+ float4 histogram_cells;
+ float4 cells_covered;
+ float4 value[1];
+} ND_STATS;
+
+/*
+ * Return the flattened index for the histogram coordinate expressed by
+ * 'indexes'. A negative result signals that one of the axes fell outside
+ * the histogram definition.
+ */
+static inline int
+nd_stats_value_index(const ND_STATS *stats, const int *indexes)
+{
+ int d;
+ int accum = 1;
+ int vdx = 0;
+
+ for (d = 0; d < (int)(stats->ndims); d++)
+ {
+ int size = (int)(stats->size[d]);
+ if (indexes[d] < 0 || indexes[d] >= size)
+ return -1;
+ vdx += indexes[d] * accum;
+ accum *= size;
+ }
+ return vdx;
+}
+
+/*
+ * Derive the histogram grid budget requested by PostgreSQL's ANALYZE machinery.
+ * The planner caps the cell count via three heuristics that take the requested
+ * attstattarget, the histogram dimensionality, and the underlying row count
+ * into account. Double precision arithmetic keeps the intermediate products in
+ * range so the cap behaves consistently across build architectures.
+ */
+static inline int
+histogram_cell_budget(double total_rows, int ndims, int attstattarget)
+{
+ double budget;
+ double dims_cap;
+ double rows_cap;
+ double attstat;
+ double dims;
+
+ if (ndims <= 0)
+ return 0;
+
+ if (attstattarget <= 0)
+ attstattarget = 1;
+
+ /* Requested resolution coming from PostgreSQL's ANALYZE knob. */
+ attstat = (double)attstattarget;
+ dims = (double)ndims;
+ budget = pow(attstat, dims);
+
+ /* Hard ceiling that keeps the statistics collector responsive. */
+ dims_cap = (double)ndims * 100000.0;
+ if (budget > dims_cap)
+ budget = dims_cap;
+
+ /* Small relations do not need a histogram that dwarfs the sample. */
+ if (total_rows <= 0.0)
+ return 0;
+
+ rows_cap = 10.0 * (double)ndims * total_rows;
+ if (rows_cap < 0.0)
+ rows_cap = 0.0;
+
+ /* Keep intermediate computations in double precision before clamping. */
+ if (rows_cap > (double)INT_MAX)
+ rows_cap = (double)INT_MAX;
+
+ if (budget > rows_cap)
+ budget = rows_cap;
+
+ if (budget >= (double)INT_MAX)
+ return INT_MAX;
+ if (budget <= 0.0)
+ return 0;
+
+ return (int)budget;
+}
+
+/*
+ * Compute the portion of 'target' covered by 'cover'. The caller supplies the
+ * dimensionality because ND_BOX always carries four slots. Degenerate volumes
+ * fold to zero, allowing the callers to detect slabs that ANALYZE sometimes
+ * emits for skewed datasets.
+ */
+static inline double
+nd_box_ratio(const ND_BOX *cover, const ND_BOX *target, int ndims)
+{
+ int d;
+ bool fully_covered = true;
+ double ivol = 1.0;
+ double refvol = 1.0;
+
+ for (d = 0; d < ndims; d++)
+ {
+ if (cover->max[d] <= target->min[d] || cover->min[d] >= target->max[d])
+ return 0.0; /* Disjoint */
+
+ if (cover->min[d] > target->min[d] || cover->max[d] < target->max[d])
+ fully_covered = false;
+ }
+
+ if (fully_covered)
+ return 1.0;
+
+ for (d = 0; d < ndims; d++)
+ {
+ double width = target->max[d] - target->min[d];
+ double imin = Max(cover->min[d], target->min[d]);
+ double imax = Min(cover->max[d], target->max[d]);
+ double iwidth = Max(0.0, imax - imin);
+
+ refvol *= width;
+ ivol *= iwidth;
+ }
+
+ if (refvol == 0.0)
+ return refvol;
+
+ return ivol / refvol;
+}
+
+#endif /* POSTGIS_GSERIALIZED_ESTIMATE_SUPPORT_H */
-----------------------------------------------------------------------
Summary of changes:
NEWS | 1 +
configure.ac | 1 +
postgis/cunit/Makefile.in | 43 ++++++
postgis/cunit/cu_tester.c | 173 ++++++++++++++++++++++++
postgis/gserialized_estimate.c | 192 +++-----------------------
postgis/gserialized_estimate_support.h | 237 +++++++++++++++++++++++++++++++++
6 files changed, 474 insertions(+), 173 deletions(-)
create mode 100644 postgis/cunit/Makefile.in
create mode 100644 postgis/cunit/cu_tester.c
create mode 100644 postgis/gserialized_estimate_support.h
hooks/post-receive
--
PostGIS
More information about the postgis-tickets
mailing list