[postgis-tickets] r16783 - Avoid array overflow in ANALYZE (References #2985)

Paul Ramsey pramsey at cleverelephant.ca
Thu Sep 13 12:25:02 PDT 2018


Author: pramsey
Date: 2018-09-13 12:25:01 -0700 (Thu, 13 Sep 2018)
New Revision: 16783

Modified:
   branches/2.4/postgis/gserialized_estimate.c
Log:
Avoid array overflow in ANALYZE (References #2985)


Modified: branches/2.4/postgis/gserialized_estimate.c
===================================================================
--- branches/2.4/postgis/gserialized_estimate.c	2018-09-13 19:24:48 UTC (rev 16782)
+++ branches/2.4/postgis/gserialized_estimate.c	2018-09-13 19:25:01 UTC (rev 16783)
@@ -694,6 +694,8 @@
 	return ivol / vol2;
 }
 
+/* How many bins shall we use in figuring out the distribution? */
+#define NUM_BINS 50
 
 /**
 * Calculate how much a set of boxes is homogenously distributed
@@ -713,10 +715,8 @@
 static int
 nd_box_array_distribution(const ND_BOX **nd_boxes, int num_boxes, const ND_BOX *extent, int ndims, double *distribution)
 {
-	/* How many bins shall we use in figuring out the distribution? */
-	static int num_bins = 50;
 	int d, i, k, range;
-	int counts[num_bins];
+	int counts[NUM_BINS];
 	double smin, smax;   /* Spatial min, spatial max */
 	double swidth;       /* Spatial width of dimension */
 #if POSTGIS_DEBUG_LEVEL >= 3
@@ -729,7 +729,7 @@
 	for ( d = 0; d < ndims; d++ )
 	{
 		/* Initialize counts for this dimension */
-		memset(counts, 0, sizeof(int)*num_bins);
+		memset(counts, 0, sizeof(counts));
 
 		smin = extent->min[d];
 		smax = extent->max[d];
@@ -755,7 +755,7 @@
 			minoffset = ndb->min[d] - smin;
 			maxoffset = ndb->max[d] - smin;
 
-			/* Skip boxes that our outside our working range */
+			/* Skip boxes that are outside our working range */
 			if ( minoffset < 0 || minoffset > swidth ||
 			     maxoffset < 0 || maxoffset > swidth )
 			{
@@ -763,9 +763,13 @@
 			}
 
 			/* What bins does this range correspond to? */
-			bmin = num_bins * (minoffset) / swidth;
-			bmax = num_bins * (maxoffset) / swidth;
+			bmin = floor(NUM_BINS * minoffset / swidth);
+			bmax = floor(NUM_BINS * maxoffset / swidth);
 
+			/* Should only happen when maxoffset==swidth */
+			if (bmax >= NUM_BINS)
+				bmax = NUM_BINS-1;
+
 			POSTGIS_DEBUGF(4, " dimension %d, feature %d: bin %d to bin %d", d, i, bmin, bmax);
 
 			/* Increment the counts in all the bins this feature overlaps */
@@ -777,11 +781,11 @@
 		}
 
 		/* How dispersed is the distribution of features across bins? */
-		range = range_quintile(counts, num_bins);
+		range = range_quintile(counts, NUM_BINS);
 
 #if POSTGIS_DEBUG_LEVEL >= 3
-		average = avg(counts, num_bins);
-		sdev = stddev(counts, num_bins);
+		average = avg(counts, NUM_BINS);
+		sdev = stddev(counts, NUM_BINS);
 		sdev_ratio = sdev/average;
 
 		POSTGIS_DEBUGF(3, " dimension %d: range = %d", d, range);



More information about the postgis-tickets mailing list