[postgis-tickets] [SCM] PostGIS branch master updated. 3.2.0-353-ge180a30fa

git at osgeo.org git at osgeo.org
Sun Jan 23 02:46:07 PST 2022


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "PostGIS".

The branch, master has been updated
       via  e180a30fa28003b295054b3bbf2e0f559cca257d (commit)
       via  f9b69fe4e9bc89135fbc2367f20c45bdcc54e93d (commit)
       via  337a30f85841bec1a973021939bc1d42bcd298c0 (commit)
       via  af1ab301d3e05f05582888a6a3b8392dd77bb7b6 (commit)
       via  df638fe844466e5a829d961de5eb1a93c0eb8a78 (commit)
       via  89600849aa3aa74273dba51047b7393f5f3dbbcb (commit)
       via  22e0b8d5c1e7302cb5d19e7fb036def4c546c4b7 (commit)
      from  6a48844bd75afa4a773a22dd20bb507023fdab88 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit e180a30fa28003b295054b3bbf2e0f559cca257d
Author: Darafei Praliaskouski <me at komzpa.net>
Date:   Sun Jan 23 13:45:42 2022 +0300

    Update NEWS about GiST merged changes

diff --git a/NEWS b/NEWS
index 27828bb65..ce7d8859a 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,8 @@ PostGIS 3.3.0dev
  * Enhancements *
   - #5037, add convexhull 3D (Loïc Bartoletti)
   - #5040, add postgis_sfcgal_full_version (Loïc Bartoletti)
+  - GH655, GiST: balance the tree splits better in recursive calls (Darafei Praliaskouski)
+  - GH657, GiST: do not call no-op decompress function (Aliaksandr Kalenik)
 
 PostGIS 3.2.0 (Olivier Courtin Edition)
 2021/12/17

commit f9b69fe4e9bc89135fbc2367f20c45bdcc54e93d
Merge: af1ab301d 337a30f85
Author: Darafei Praliaskouski <me at komzpa.net>
Date:   Sun Jan 23 13:43:08 2022 +0300

    Enable recursion-aware GiST picksplit.
    
    Merge remote-tracking branch 'gh/pr/655/merge'


commit 337a30f85841bec1a973021939bc1d42bcd298c0
Merge: 6a48844bd 89600849a
Author: Darafei Praliaskouski <me at komzpa.net>
Date:   Sat Jan 22 10:31:04 2022 +0300

    Merge 89600849aa3aa74273dba51047b7393f5f3dbbcb into 6a48844bd75afa4a773a22dd20bb507023fdab88


commit af1ab301d3e05f05582888a6a3b8392dd77bb7b6
Merge: 6a48844bd df638fe84
Author: kalenikaliaksandr <kalenik.aliaksandr at gmail.com>
Date:   Sat Jan 22 10:15:21 2022 +0300

    Merge df638fe844466e5a829d961de5eb1a93c0eb8a78 into 6a48844bd75afa4a773a22dd20bb507023fdab88


commit df638fe844466e5a829d961de5eb1a93c0eb8a78
Author: Aliaksandr Kalenik <kalenik.aliaksandr at gmail.com>
Date:   Sat Jan 22 05:29:22 2022 +0300

    remove gist decompress support function

diff --git a/postgis/geography.sql.in b/postgis/geography.sql.in
index 5b4e904ab..4fa9aaa11 100644
--- a/postgis/geography.sql.in
+++ b/postgis/geography.sql.in
@@ -296,7 +296,9 @@ CREATE OPERATOR CLASS gist_geography_ops
 	FUNCTION        1        geography_gist_consistent (internal, geography, int4),
 	FUNCTION        2        geography_gist_union (bytea, internal),
 	FUNCTION        3        geography_gist_compress (internal),
+#if POSTGIS_PGSQL_VERSION < 110
 	FUNCTION        4        geography_gist_decompress (internal),
+#endif
 	FUNCTION        5        geography_gist_penalty (internal, internal, internal),
 	FUNCTION        6        geography_gist_picksplit (internal, internal),
 	FUNCTION        7        geography_gist_same (box2d, box2d, internal);
diff --git a/postgis/postgis.sql.in b/postgis/postgis.sql.in
index 04f11babe..f3d3c4861 100644
--- a/postgis/postgis.sql.in
+++ b/postgis/postgis.sql.in
@@ -823,7 +823,9 @@ CREATE OPERATOR CLASS gist_geometry_ops_2d
 	FUNCTION        1        geometry_gist_consistent_2d (internal, geometry, int4),
 	FUNCTION        2        geometry_gist_union_2d (bytea, internal),
 	FUNCTION        3        geometry_gist_compress_2d (internal),
+#if POSTGIS_PGSQL_VERSION < 110
 	FUNCTION        4        geometry_gist_decompress_2d (internal),
+#endif
 	FUNCTION        5        geometry_gist_penalty_2d (internal, internal, internal),
 	FUNCTION        6        geometry_gist_picksplit_2d (internal, internal),
 	FUNCTION        7        geometry_gist_same_2d (geom1 geometry, geom2 geometry, internal);
@@ -1008,7 +1010,9 @@ CREATE OPERATOR CLASS gist_geometry_ops_nd
 	FUNCTION        1        geometry_gist_consistent_nd (internal, geometry, int4),
 	FUNCTION        2        geometry_gist_union_nd (bytea, internal),
 	FUNCTION        3        geometry_gist_compress_nd (internal),
+#if POSTGIS_PGSQL_VERSION < 110
 	FUNCTION        4        geometry_gist_decompress_nd (internal),
+#endif
 	FUNCTION        5        geometry_gist_penalty_nd (internal, internal, internal),
 	FUNCTION        6        geometry_gist_picksplit_nd (internal, internal),
 	FUNCTION        7        geometry_gist_same_nd (geometry, geometry, internal);

commit 89600849aa3aa74273dba51047b7393f5f3dbbcb
Author: Darafei Praliaskouski <me at komzpa.net>
Date:   Sun Jan 16 19:35:39 2022 +0300

    Picksplit: break last tie to put the tuple into smaller page, not to the right
    
    Overlap is reduced: if a tuple is a perfect fit for some side, put it there.

diff --git a/postgis/gserialized_gist_2d.c b/postgis/gserialized_gist_2d.c
index da882de83..a47c58d3e 100644
--- a/postgis/gserialized_gist_2d.c
+++ b/postgis/gserialized_gist_2d.c
@@ -2039,27 +2039,12 @@ Datum gserialized_gist_picksplit_2d(PG_FUNCTION_ARGS)
 	 */
 	if (commonEntriesCount > 0)
 	{
-		/*
-		 * Calculate minimum number of entries that must be placed in both
-		 * groups, to reach LIMIT_RATIO.
-		 */
-		int			m = ceil(LIMIT_RATIO * (double) nentries);
+		/* Calculate minimum number of entries that must be placed in both groups, to reach LIMIT_RATIO. */
+		int m = ceil(LIMIT_RATIO * (double)nentries);
 
-		/* Recursive picksplit called, splitting full page at least */
-		if (nentries > INDEX_TUPLES_PER_PAGE)
-		{
-			if (nentries <= 2 * INDEX_TUPLES_PER_PAGE)
-			{
-				/* If this is going to be the last split, make sure we split into 2 parts */
-				m = Max(m, nentries - INDEX_TUPLES_PER_PAGE);
-			}
-			else
-			{
-				/* Make sure we'll be filling the smaller side until the end of the page at least */
-				m = ((Max(Min(v->spl_nleft, v->spl_nright), m) / INDEX_TUPLES_PER_PAGE) + 1) *
-				    INDEX_TUPLES_PER_PAGE;
-			}
-		}
+		/* Recursive picksplit called, this is going to be the last split, keep split into 2 parts */
+		if (nentries > INDEX_TUPLES_PER_PAGE && nentries <= 2 * INDEX_TUPLES_PER_PAGE)
+			m = Max(m, nentries - INDEX_TUPLES_PER_PAGE);
 
 		/*
 		 * Calculate delta between penalties of join "common entries" to
@@ -2083,25 +2068,39 @@ Datum gserialized_gist_picksplit_2d(PG_FUNCTION_ARGS)
 		 */
 		for (i = 0; i < commonEntriesCount; i++)
 		{
-			box = (BOX2DF *) DatumGetPointer(entryvec->vector[
-												commonEntries[i].index].key);
-
-			/*
-			 * Check if we have to place this entry in either group to achieve
-			 * LIMIT_RATIO.
-			 */
-			if (v->spl_nleft + (commonEntriesCount - i) <= m)
-				PLACE_LEFT(box, commonEntries[i].index);
+			float right_penalty, left_penalty;
+			bool place_right = true;
+			box = (BOX2DF *)DatumGetPointer(entryvec->vector[commonEntries[i].index].key);
+
+			/* Recheck penalties. After we put undecided tuples to some side they're changed */
+			left_penalty = box2df_penalty(leftBox, box);
+			right_penalty = box2df_penalty(rightBox, box);
+
+			/* Check if penalty is absolutely telling a tuple should go to some side */
+			if (right_penalty > 0 && left_penalty == 0)
+				place_right = false;
+			else if (right_penalty == 0 && left_penalty > 0)
+				place_right = true;
+			/* Check if we have to place this entry in either group to achieve LIMIT_RATIO */
+			else if (v->spl_nleft + (commonEntriesCount - i) <= m)
+				place_right = false;
 			else if (v->spl_nright + (commonEntriesCount - i) <= m)
+				place_right = true;
+			/* Otherwise select the group by smaller penalty */
+			else if (left_penalty < right_penalty)
+				place_right = false;
+			else if (right_penalty < left_penalty)
+				place_right = true;
+			/* or just put tuple into smaller group */
+			else if (v->spl_nleft < v->spl_nright)
+				place_right = false;
+			else
+				place_right = true;
+
+			if (place_right)
 				PLACE_RIGHT(box, commonEntries[i].index);
 			else
-			{
-				/* Otherwise select the group by minimal penalty */
-				if (box2df_penalty(leftBox, box) < box2df_penalty(rightBox, box))
-					PLACE_LEFT(box, commonEntries[i].index);
-				else
-					PLACE_RIGHT(box, commonEntries[i].index);
-			}
+				PLACE_LEFT(box, commonEntries[i].index);
 		}
 	}
 	v->spl_ldatum = PointerGetDatum(leftBox);

commit 22e0b8d5c1e7302cb5d19e7fb036def4c546c4b7
Author: Darafei Praliaskouski <me at komzpa.net>
Date:   Sun Jan 16 18:49:57 2022 +0300

    Recursion aware picksplit.

diff --git a/postgis/gserialized_gist_2d.c b/postgis/gserialized_gist_2d.c
index f9207ec98..da882de83 100644
--- a/postgis/gserialized_gist_2d.c
+++ b/postgis/gserialized_gist_2d.c
@@ -19,7 +19,7 @@
  **********************************************************************
  *
  * Copyright 2009 Paul Ramsey <pramsey at cleverelephant.ca>
- * Copyright 2017-2019 Darafei Praliaskouski <me at komzpa.net>
+ * Copyright 2017-2022 Darafei Praliaskouski <me at komzpa.net>
  *
  **********************************************************************/
 
@@ -54,11 +54,11 @@
 #include <float.h> /* For FLT_MAX */
 #include <math.h>
 
-/*
-** When is a node split not so good? If more than 90% of the entries
-** end up in one of the children.
-*/
-#define LIMIT_RATIO 0.1
+/* When is a node split not so good? Prefer to not split 2 pages into more than 3 pages in index. */
+#define LIMIT_RATIO 0.3333333333333333
+
+/* How many index tuples does one page fit? Recursive splits will target this. */
+#define INDEX_TUPLES_PER_PAGE 291
 
 /*
 ** For debugging
@@ -2045,6 +2045,22 @@ Datum gserialized_gist_picksplit_2d(PG_FUNCTION_ARGS)
 		 */
 		int			m = ceil(LIMIT_RATIO * (double) nentries);
 
+		/* Recursive picksplit called, splitting full page at least */
+		if (nentries > INDEX_TUPLES_PER_PAGE)
+		{
+			if (nentries <= 2 * INDEX_TUPLES_PER_PAGE)
+			{
+				/* If this is going to be the last split, make sure we split into 2 parts */
+				m = Max(m, nentries - INDEX_TUPLES_PER_PAGE);
+			}
+			else
+			{
+				/* Make sure we'll be filling the smaller side until the end of the page at least */
+				m = ((Max(Min(v->spl_nleft, v->spl_nright), m) / INDEX_TUPLES_PER_PAGE) + 1) *
+				    INDEX_TUPLES_PER_PAGE;
+			}
+		}
+
 		/*
 		 * Calculate delta between penalties of join "common entries" to
 		 * different groups.

-----------------------------------------------------------------------

Summary of changes:
 NEWS                          |  2 ++
 postgis/geography.sql.in      |  2 ++
 postgis/gserialized_gist_2d.c | 69 ++++++++++++++++++++++++++-----------------
 postgis/postgis.sql.in        |  4 +++
 4 files changed, 50 insertions(+), 27 deletions(-)


hooks/post-receive
-- 
PostGIS


More information about the postgis-tickets mailing list