# [postgis-users] Minimum Spanning tree from MultiLineString

Bruce Rindahl bruce.rindahl at gmail.com
Wed Jun 30 15:39:26 PDT 2021

```Warning - Long post

I have created a function that finds the minimum spanning tree from a
MultiLineString per the description at:
I have it working in two parts but I can't merge them together into one
function.

First the data.  I created a table wiki with just a geometry column.  Then
I grabbed the  SVG coordinates from the Wiki link from above, tweaked it
and got the following MultiLineString:

MULTILINESTRING((12.927 149.924,50.261 222.857),(12.927 149.924,122.094
70.04),(12.927 149.924,49.247 169.665),(49.26 169.665,178.334
165.312),(50.01 222.79,49.26 169.665),(64.208 197.562,173.254
269.374),(64.355 197.598,50.01 222.455),(64.355 197.598,178.499
165.263),(64.602 197.598,49.26 169.665),(122.093 70.04,49.26
169.665),(173.254 269.374,50.01 222.79),(173.254 269.374,232.314
286.445),(173.427 269.374,242.527 269.556),(178.427 165.263,242.527
269.556),(178.499 165.263,122.093 70.04),(178.499 165.263,173.254
269.374),(232.314 286.445,242.527 269.556),(232.314 286.437,285.906
259.373),(242.527 269.556,285.906 259.236),(285.906 259.373,122.026
70.04),(285.906 259.373,178.427 165.263))

I then inserted it into the table wiki.  Note the vertices do not perfectly
line up..

Step one - I need to create a table that defines the geometry,starting and
ending points of each line segment and the line length.  This is easy:

create table mst_tree as (
WITH t1 as (SELECT (ST_Dump(geom)).geom AS geom from wiki),
t2 as(select st_snaptogrid(geom,10) geom from t1)
select geom, st_startpoint(geom) as src, st_endpoint(geom) as dest,
st_length(geom) as weight from t2
)

Note the value of 10 in the st_snaptogrid - this assures all the close
nodes snap to each other and will need to be a parameter to the function.

Next is the function that finds the Minimum Spanning Tree.  I used the
Kruskal method - see:
https://www.geeksforgeeks.org/kruskals-minimum-spanning-tree-algorithm-greedy-algo-2/

The full function is:
---------------------------------------------------------------

-- FUNCTION: public.mst()

-- DROP FUNCTION public.mst();

CREATE OR REPLACE FUNCTION public.mst(
-- tree geometry,
-- tolerance float
)
RETURNS TABLE(mst_geom geometry, mst_weight double precision)
LANGUAGE 'plpgsql'
COST 100
VOLATILE PARALLEL UNSAFE
ROWS 1000

AS \$BODY\$
DECLARE
edge RECORD;
e integer;
i integer := 0 ;
BEGIN
EXECUTE 'drop table if exists mst_edges';
EXECUTE 'drop table if exists mst_visited';

-- I am stuck here
--EXECUTE 'drop table if exists mst_tree';
--EXECUTE 'create table mst_tree as (WITH t1 as (SELECT (ST_Dump(tree)) AS
geom ),
--t2 as(select st_snaptogrid(geom,tolerance) geom from t1)
--select geom, st_startpoint(geom) as src, st_endpoint(geom) as dest,
st_length(geom) as weight from t2)';

EXECUTE 'create table mst_visited as (select src as node, 1 as branch
from mst_tree limit 0)';
EXECUTE 'create table mst_edges as (select geom, weight, src, dest, 1
as mst from mst_tree limit 0)';

with nodes as (
select src as node from mst_edges
union all
select dest as node from mst_edges
),
d_nodes as (
select distinct node from nodes)
SELECT COUNT(node) from d_nodes into e;

for edge in execute 'SELECT geom, weight, src, dest FROM mst_tree ORDER by
weight asc'
LOOP
EXIT WHEN e = 1;

WITH next_nodes as (
select edge.src as node
union all
select edge.dest as node
), -- put nodes in a single column

new_nodes as (
select node from next_nodes where node not in (select node from mst_visited)
), -- list of nodes not yet visited

existing_nodes as (
select next_nodes.node, branch from next_nodes, mst_visited
where next_nodes.node = mst_visited.node
),  -- list of nodes already visited

cycle_test as (
select
case
when count(node) = 2 and (max(branch) = min(branch)) then 2 -- forms a
loop, do not add to MST
else 1 end as mst, -- not a loop, include edge in MST
max(branch) as max_branch, min(branch) as min_branch
from existing_nodes
),

v (nodes) as (
insert into mst_visited (node,branch)
(select node,
case
when (select count(node) from new_nodes) = 2 then e --next_branch.n_b  --
two new nodes, new branch
else (select branch from existing_nodes) -- add node to existing branch
end
as branch
from new_nodes--,next_branch
)
returning *
),

v_c(node) as (
update mst_visited set branch = d.min_branch
from (select * from cycle_test) d
where mst_visited.branch = d.max_branch
returning *
), -- if two branches, combine branches (no effect if on same branch)  This
is the critical step to loop detection

-- update edge table, set mst to 1 or 2
u (node) as (
insert into mst_edges (geom,weight,src,dest,mst)
(select edge.geom,edge.weight, edge.src, edge.dest, mst from cycle_test
where mst = 1)
returning *
)

select mst from cycle_test into i;

IF (i = 1) then
e = e - 1;
END IF;
END LOOP;

EXECUTE 'drop table mst_visited';
RETURN QUERY SELECT geom,weight from mst_edges;

END;
\$BODY\$;

ALTER FUNCTION public.mst()
OWNER TO postgres;
-----------------------------------------------

If you create the table mst_tree from the first part and then run:

SELECT * from mst()

you will get a table of each segment in the MST and the associated weight
(length) .  If you want to get the result in a MultiLine segment:

SELECT ST_Collect(ARRAY(SELECT mst_geom FROM mst6()));

Now I am stuck.  How do I pass the geometry and tolerance into the
function  If I uncomment the lines I get the error:

ERROR: column "geom" does not exist LINE 1: ...te table mst_tree as (WITH
t1 as (SELECT (ST_Dump(geom)).geo... QUERY: create table mst_tree as (WITH
t1 as (SELECT (ST_Dump(geom)).geom AS geom), t2 as(select
st_snaptogrid(geom,\$2) geom from t1) select geom, st_startpoint(geom) as
src, st_endpoint(geom) as dest, st_length(geom) as weight from t2)
Any help will be appreciated
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.osgeo.org/pipermail/postgis-users/attachments/20210630/ca82f829/attachment.html>
```