Skip to content

Commit

Permalink
Merge pull request #58 from hotosm/refactor/split-algorithm
Browse files Browse the repository at this point in the history
Merge least feature count polygons with neighbouring polygons
  • Loading branch information
spwoodcock authored Oct 30, 2024
2 parents aa0db4c + 2bbb72f commit b76e10c
Showing 1 changed file with 112 additions and 117 deletions.
229 changes: 112 additions & 117 deletions fmtm_splitter/fmtm_algorithm.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ DROP TABLE IF EXISTS polygonsnocount;
DO $$
DECLARE
lines_count INTEGER;
num_buildings INTEGER;
BEGIN
-- Check if ways_line has any data
SELECT COUNT(*) INTO lines_count
Expand Down Expand Up @@ -135,7 +134,6 @@ USING gist (geom);
-- Clean up the table which may have gaps and stuff from spatial indexing
-- VACUUM ANALYZE buildings;


DROP TABLE IF EXISTS splitpolygons;
CREATE TABLE splitpolygons AS (
WITH polygonsfeaturecount AS (
Expand Down Expand Up @@ -163,49 +161,49 @@ USING gist (geom);
DROP TABLE polygonsnocount;


DROP TABLE IF EXISTS lowfeaturecountpolygons;
CREATE TABLE lowfeaturecountpolygons AS (
-- Grab the polygons with fewer than the requisite number of features
WITH lowfeaturecountpolys AS (
SELECT *
FROM splitpolygons AS p
-- TODO: feature count should not be hard-coded
WHERE p.numfeatures < %(num_buildings)s
),

-- Find the neighbors of the low-feature-count polygons
-- Store their ids as n_polyid, numfeatures as n_numfeatures, etc
allneighborlist AS (
SELECT
p.*,
pf.polyid AS n_polyid,
pf.area AS n_area,
p.numfeatures AS n_numfeatures,
-- length of shared boundary to make nice merge decisions
ST_LENGTH2D(ST_INTERSECTION(p.geom, pf.geom)) AS sharedbound
FROM lowfeaturecountpolys AS p
INNER JOIN splitpolygons AS pf
-- Anything that touches
ON ST_TOUCHES(p.geom, pf.geom)
-- But eliminate those whose intersection is a point, because
-- polygons that only touch at a corner shouldn't be merged
AND ST_GEOMETRYTYPE(ST_INTERSECTION(p.geom, pf.geom)) != 'ST_Point'
-- Sort first by polyid of the low-feature-count polygons
-- Then by descending featurecount and area of the
-- high-feature-count neighbors (area is in case of equal
-- featurecounts, we'll just pick the biggest to add to)
ORDER BY p.polyid ASC, p.numfeatures DESC, pf.area DESC
-- OR, maybe for more aesthetic merges:
-- order by p.polyid, sharedbound desc
)

SELECT DISTINCT ON (a.polyid) * FROM allneighborlist AS a
);
ALTER TABLE lowfeaturecountpolygons ADD PRIMARY KEY (polyid);
SELECT POPULATE_GEOMETRY_COLUMNS('public.lowfeaturecountpolygons'::regclass);
CREATE INDEX lowfeaturecountpolygons_idx
ON lowfeaturecountpolygons
USING gist (geom);
-- DROP TABLE IF EXISTS lowfeaturecountpolygons;
-- CREATE TABLE lowfeaturecountpolygons AS (
-- -- Grab the polygons with fewer than the requisite number of features
-- WITH lowfeaturecountpolys AS (
-- SELECT *
-- FROM splitpolygons AS p
-- -- TODO: feature count should not be hard-coded
-- WHERE p.numfeatures < %(num_buildings)s
-- ),

-- -- Find the neighbors of the low-feature-count polygons
-- -- Store their ids as n_polyid, numfeatures as n_numfeatures, etc
-- allneighborlist AS (
-- SELECT
-- p.*,
-- pf.polyid AS n_polyid,
-- pf.area AS n_area,
-- p.numfeatures AS n_numfeatures,
-- -- length of shared boundary to make nice merge decisions
-- ST_LENGTH2D(ST_INTERSECTION(p.geom, pf.geom)) AS sharedbound
-- FROM lowfeaturecountpolys AS p
-- INNER JOIN splitpolygons AS pf
-- -- Anything that touches
-- ON ST_TOUCHES(p.geom, pf.geom)
-- -- But eliminate those whose intersection is a point, because
-- -- polygons that only touch at a corner shouldn't be merged
-- AND ST_GEOMETRYTYPE(ST_INTERSECTION(p.geom, pf.geom)) != 'ST_Point'
-- -- Sort first by polyid of the low-feature-count polygons
-- -- Then by descending featurecount and area of the
-- -- high-feature-count neighbors (area is in case of equal
-- -- featurecounts, we'll just pick the biggest to add to)
-- ORDER BY p.polyid ASC, p.numfeatures DESC, pf.area DESC
-- -- OR, maybe for more aesthetic merges:
-- -- order by p.polyid, sharedbound desc
-- )

-- SELECT DISTINCT ON (a.polyid) * FROM allneighborlist AS a
-- );
-- ALTER TABLE lowfeaturecountpolygons ADD PRIMARY KEY (polyid);
-- SELECT POPULATE_GEOMETRY_COLUMNS('public.lowfeaturecountpolygons'::regclass);
-- CREATE INDEX lowfeaturecountpolygons_idx
-- ON lowfeaturecountpolygons
-- USING gist (geom);
-- VACUUM ANALYZE lowfeaturecountpolygons;


Expand Down Expand Up @@ -245,7 +243,7 @@ CREATE TABLE clusteredbuildings AS (
*,
ST_CLUSTERKMEANS(
b.geom,
CAST((b.numfeatures / %(num_buildings)s) + 1 AS integer)
CAST((b.numfeatures / b.%(num_buildings)s) + 1 AS integer)
)
OVER (PARTITION BY b.polyid)
AS cid
Expand Down Expand Up @@ -335,7 +333,6 @@ USING gist (geom);

--VACUUM ANALYZE unsimplifiedtaskpolygons;


--*****************************Simplify*******************************
-- Extract unique line segments
DROP TABLE IF EXISTS taskpolygons;
Expand Down Expand Up @@ -380,78 +377,70 @@ CREATE TABLE taskpolygons AS (
FROM taskpolygonsnoindex AS tpni
);

-- ALTER TABLE taskpolygons ADD PRIMARY KEY(taskid);

-- Step 1: Identify polygons without any buildings
DROP TABLE IF EXISTS no_building_polygons;
CREATE TABLE no_building_polygons AS (
SELECT
tp.*,
tp.geom AS no_building_geom
FROM taskpolygons AS tp
LEFT JOIN buildings AS b ON ST_INTERSECTS(tp.geom, b.geom)
WHERE b.geom IS NULL
);

-- Step 2: Identify neighboring polygons
DROP TABLE IF EXISTS neighboring_polygons;
CREATE TABLE neighboring_polygons AS (
SELECT
nb.*,
nb.geom AS neighbor_geom,
nb_building_count,
nbp.no_building_geom
FROM taskpolygons AS nb
INNER JOIN no_building_polygons AS nbp
-- Finds polygons that touch each other
ON ST_TOUCHES(nbp.no_building_geom, nb.geom)
LEFT JOIN (
-- Step 3: Count buildings in the neighboring polygons
SELECT
nb.geom,
COUNT(b.geom) AS nb_building_count
FROM taskpolygons AS nb
LEFT JOIN buildings AS b ON ST_INTERSECTS(nb.geom, b.geom)
GROUP BY nb.geom
) AS building_counts
ON nb.geom = building_counts.geom
);
ALTER TABLE taskpolygons ADD PRIMARY KEY (taskid);
SELECT POPULATE_GEOMETRY_COLUMNS('public.taskpolygons'::regclass);
CREATE INDEX taskpolygons_idx
ON taskpolygons
USING gist (geom);

-- Step 4: Find the optimal neighboring polygon to avoid,
-- same polygons with the smallest number of buildings merging into
-- multiple neighboring polygons
DROP TABLE IF EXISTS optimal_neighbors;
CREATE TABLE optimal_neighbors AS (
SELECT
nbp.no_building_geom,
nbp.neighbor_geom
FROM neighboring_polygons AS nbp
WHERE nbp.nb_building_count = (
SELECT MIN(nb_building_count)
FROM neighboring_polygons AS np
WHERE np.no_building_geom = nbp.no_building_geom
)
);
-- Merge least feature polygons with neighbouring polygons
DO $$
DECLARE
num_buildings INTEGER := %(num_buildings)s;
min_area NUMERIC; -- Set the minimum area threshold
mean_area NUMERIC;
stddev_area NUMERIC; -- Set the standard deviation
min_buildings INTEGER; -- Set the minimum number of buildings threshold
small_polygon RECORD; -- set small_polygon and nearest_neighbor as record
nearest_neighbor RECORD; -- in order to use them in the loop
BEGIN
min_buildings := num_buildings * 0.5;

-- Step 5: Merge the small polygons with their optimal neighboring polygons
UPDATE taskpolygons tp
SET geom = ST_UNION(tp.geom, nbp.no_building_geom)
FROM optimal_neighbors AS nbp
WHERE tp.geom = nbp.neighbor_geom;
DELETE FROM taskpolygons
WHERE geom IN (
SELECT no_building_geom
FROM no_building_polygons
);
-- Find the mean and standard deviation of the area
SELECT
AVG(ST_Area(geom)),
STDDEV_POP(ST_Area(geom))
INTO mean_area, stddev_area
FROM taskpolygons;

-- Set the threshold as mean - standard deviation
min_area := mean_area - stddev_area;

DROP TABLE IF EXISTS no_building_polygons;
DROP TABLE IF EXISTS neighboring_polygons;
DROP TABLE IF EXISTS leastfeaturepolygons;
CREATE TABLE leastfeaturepolygons AS
SELECT taskid, geom
FROM taskpolygons
WHERE ST_Area(geom) < min_area OR (
SELECT COUNT(b.id) FROM buildings b
WHERE ST_Contains(taskpolygons.geom, b.geom)
) < min_buildings; -- find least feature polygons based on the area and feature

FOR small_polygon IN
SELECT * FROM leastfeaturepolygons
LOOP
-- Find the nearest neighbor to merge the small polygon with
FOR nearest_neighbor IN
SELECT taskid, geom, ST_LENGTH2D(ST_Intersection(small_polygon.geom, geom)) as shared_bound
FROM taskpolygons
WHERE taskid NOT IN (SELECT taskid FROM leastfeaturepolygons)
AND ST_Touches(small_polygon.geom, geom)
AND ST_GEOMETRYTYPE(ST_INTERSECTION(small_polygon.geom, geom)) != 'ST_Point'
ORDER BY shared_bound DESC -- Find neighbor polygon based on shared boundary distance
LIMIT 1
LOOP
-- Merge the small polygon into the neighboring polygon
UPDATE taskpolygons
SET geom = ST_Union(geom, small_polygon.geom)
WHERE taskid = nearest_neighbor.taskid;

DELETE FROM taskpolygons WHERE taskid = small_polygon.taskid;
-- Exit the neighboring polygon loop after one successful merge
EXIT;
END LOOP;
END LOOP;
END $$;

SELECT POPULATE_GEOMETRY_COLUMNS('public.taskpolygons'::regclass);
CREATE INDEX taskpolygons_idx
ON taskpolygons
USING gist (geom);
DROP TABLE IF EXISTS leastfeaturepolygons;
-- VACUUM ANALYZE taskpolygons;

-- Generate GeoJSON output
Expand All @@ -464,8 +453,14 @@ FROM (
SELECT
JSONB_BUILD_OBJECT(
'type', 'Feature',
'geometry', ST_ASGEOJSON(geom)::jsonb,
'properties', JSONB_BUILD_OBJECT()
'geometry', ST_ASGEOJSON(t.geom)::jsonb,
'properties', JSONB_BUILD_OBJECT(
'building_count', (
SELECT COUNT(b.id)
FROM buildings AS b
WHERE ST_CONTAINS(t.geom, b.geom)
)
)
) AS feature
FROM taskpolygons
FROM taskpolygons AS t
) AS features;

0 comments on commit b76e10c

Please sign in to comment.