diff --git a/fmtm_splitter/db.py b/fmtm_splitter/db.py
index dfecb82..a6ee93c 100644
--- a/fmtm_splitter/db.py
+++ b/fmtm_splitter/db.py
@@ -17,12 +17,16 @@
"""DB models for temporary tables in splitBySQL."""
import logging
-from typing import Any, Dict, List, Union
+from typing import Any, Dict, List, Optional, Union
import psycopg2
+from geojson import FeatureCollection
from psycopg2.extensions import register_adapter
from psycopg2.extras import Json, register_uuid
-from shapely.geometry import Polygon
+from shapely.geometry import Polygon, shape
+
+from fmtm_splitter.osm_extract import generate_osm_extract
+from fmtm_splitter.parsers import json_str_to_dict
log = logging.getLogger(__name__)
@@ -191,3 +195,78 @@ def insert_geom(
except Exception as e:
log.error(f"Error executing query: {e}")
conn.rollback() # Rollback transaction on error
+
+
+def insert_features_to_db(
+ features: List[Dict[str, Any]],
+ conn: psycopg2.extensions.connection,
+ extract_type: str,
+) -> None:
+ """Insert features into the database with optimized performance.
+
+ Args:
+ features: List of features containing geometry and properties.
+ conn: Database connection object.
+ extract_type: Type of data extraction ("custom", "lines", or "extracts").
+ """
+ ways_poly_batch = []
+ ways_line_batch = []
+
+ for feature in features:
+ geom = shape(feature["geometry"]).wkb_hex
+ properties = feature.get("properties", {})
+ tags = properties.get("tags", properties)
+
+ if tags:
+ tags = json_str_to_dict(tags).get("tags", json_str_to_dict(tags))
+
+ osm_id = properties.get("osm_id")
+
+ # Process based on extract_type
+ if extract_type == "custom":
+ ways_poly_batch.append({"geom": geom})
+ elif extract_type == "lines":
+ if any(key in tags for key in ["highway", "waterway", "railway"]):
+ ways_line_batch.append({"osm_id": osm_id, "geom": geom, "tags": tags})
+ elif extract_type == "extracts":
+ if tags.get("building") == "yes":
+ ways_poly_batch.append({"osm_id": osm_id, "geom": geom, "tags": tags})
+ elif any(key in tags for key in ["highway", "waterway", "railway"]):
+ ways_line_batch.append({"osm_id": osm_id, "geom": geom, "tags": tags})
+
+ # Perform batch inserts
+ if ways_poly_batch:
+ insert_geom(conn, "ways_poly", ways_poly_batch)
+ if ways_line_batch:
+ insert_geom(conn, "ways_line", ways_line_batch)
+
+
+def process_features_for_db(
+ aoi_featcol: FeatureCollection,
+ conn: psycopg2.extensions.connection,
+ custom_features: Optional[List[Dict]] = None,
+) -> None:
+ """Process custom features or OSM extracts and insert them into the database."""
+ if custom_features:
+ insert_features_to_db(custom_features["features"], conn, "custom")
+
+ extract_lines = generate_osm_extract(aoi_featcol, "lines")
+ if extract_lines:
+ insert_features_to_db(extract_lines["features"], conn, "lines")
+ else:
+ extract_buildings = generate_osm_extract(aoi_featcol, "extracts")
+ insert_features_to_db(extract_buildings["features"], conn, "extracts")
+
+
+def setup_lines_view(conn, geometry):
+ """Create a database view for line features intersecting the AOI."""
+ view_sql = """
+ DROP VIEW IF EXISTS lines_view;
+ CREATE VIEW lines_view AS
+ SELECT tags, geom
+ FROM ways_line
+ WHERE ST_Intersects(ST_SetSRID(CAST(%s AS GEOMETRY), 4326), geom)
+ """
+ with conn.cursor() as cur:
+ cur.execute(view_sql, (geometry,))
+ cur.close()
diff --git a/fmtm_splitter/osm_extract.py b/fmtm_splitter/osm_extract.py
new file mode 100644
index 0000000..c6cf6fd
--- /dev/null
+++ b/fmtm_splitter/osm_extract.py
@@ -0,0 +1,75 @@
+#!/bin/python3
+
+# Copyright (c) 2022 Humanitarian OpenStreetMap Team
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# FMTM-Splitter is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with FMTM-Splitter. If not, see .
+#
+"""Class and helper methods for task splitting."""
+
+import logging
+from io import BytesIO
+
+from geojson import FeatureCollection
+from osm_rawdata.postgres import PostgresClient
+
+# Instantiate logger
+log = logging.getLogger(__name__)
+
+
+def generate_osm_extract(
+ aoi_featcol: FeatureCollection, extract_type: str
+) -> FeatureCollection:
+ """Generate OSM extract based on the specified type from AOI FeatureCollection."""
+ try:
+ config_data_map = {
+ "extracts": """
+ select:
+ from:
+ - nodes
+ - ways_poly
+ - ways_line
+ where:
+ tags:
+ - building: not null
+ - highway: not null
+ - waterway: not null
+ - railway: not null
+ - aeroway: not null
+ """,
+ "lines": """
+ select:
+ from:
+ - nodes
+ - ways_line
+ where:
+ tags:
+ - highway: not null
+ - waterway: not null
+ - railway: not null
+ - aeroway: not null
+ """,
+ }
+ config_data = config_data_map.get(extract_type)
+ if not config_data:
+ raise ValueError(f"Invalid extract type: {extract_type}")
+
+ config_bytes = BytesIO(config_data.encode())
+ pg = PostgresClient("underpass", config_bytes)
+ return pg.execQuery(
+ aoi_featcol,
+ extra_params={"fileName": "fmtm_splitter", "useStWithin": False},
+ )
+ except Exception as e:
+ log.error(f"Error during OSM extract generation: {e}")
+ raise RuntimeError(f"Failed to generate OSM extract: {e}") from e
diff --git a/fmtm_splitter/parsers.py b/fmtm_splitter/parsers.py
new file mode 100644
index 0000000..534f1e3
--- /dev/null
+++ b/fmtm_splitter/parsers.py
@@ -0,0 +1,91 @@
+#!/bin/python3
+
+# Copyright (c) 2022 Humanitarian OpenStreetMap Team
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# FMTM-Splitter is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with FMTM-Splitter. If not, see .
+#
+"""helper functions to parse data for task splitting."""
+
+import json
+import logging
+import math
+from pathlib import Path
+from typing import Optional, Tuple, Union
+
+# Instantiate logger
+log = logging.getLogger(__name__)
+
+
+def prepare_sql_query(sql_file: Optional[Union[str, Path]], default_path: Path) -> str:
+ """Load SQL query from a file or fallback to default."""
+ if not sql_file:
+ sql_file = default_path
+ with open(sql_file, "r") as sql:
+ return sql.read()
+
+
+def json_str_to_dict(json_item: Union[str, dict]) -> dict:
+ """Convert a JSON string to dict."""
+ if isinstance(json_item, dict):
+ return json_item
+ if isinstance(json_item, str):
+ try:
+ return json.loads(json_item)
+ except json.JSONDecodeError:
+ msg = f"Error decoding key in GeoJSON: {json_item}"
+ log.error(msg)
+ # Set tags to empty, skip feature
+ return {}
+
+
+def meters_to_degrees(meters: float, reference_lat: float) -> Tuple[float, float]:
+ """Converts meters to degrees at a given latitude.
+
+ Using WGS84 ellipsoidal calculations.
+
+ Args:
+ meters (float): The distance in meters to convert.
+ reference_lat (float): The latitude at which to ,
+ perform the conversion (in degrees).
+
+ Returns:
+ Tuple[float, float]: Degree values for latitude and longitude.
+ """
+ # INFO:
+ # The geodesic distance is the shortest distance on the surface
+ # of an ellipsoidal model of the earth
+
+ lat_rad = math.radians(reference_lat)
+
+ # Using WGS84 parameters
+ a = 6378137.0 # Semi-major axis in meters
+ f = 1 / 298.257223563 # Flattening factor
+
+ # Applying formula
+ e2 = (2 * f) - (f**2) # Eccentricity squared
+ n = a / math.sqrt(
+ 1 - e2 * math.sin(lat_rad) ** 2
+ ) # Radius of curvature in the prime vertical
+ m = (
+ a * (1 - e2) / (1 - e2 * math.sin(lat_rad) ** 2) ** (3 / 2)
+ ) # Radius of curvature in the meridian
+
+ lat_deg_change = meters / m # Latitude change in degrees
+ lon_deg_change = meters / (n * math.cos(lat_rad)) # Longitude change in degrees
+
+ # Convert changes to degrees by dividing by radians to degrees
+ lat_deg_change = math.degrees(lat_deg_change)
+ lon_deg_change = math.degrees(lon_deg_change)
+
+ return lat_deg_change, lon_deg_change
diff --git a/fmtm_splitter/splitter.py b/fmtm_splitter/splitter.py
index 054aaf8..70f85d9 100755
--- a/fmtm_splitter/splitter.py
+++ b/fmtm_splitter/splitter.py
@@ -20,17 +20,18 @@
import argparse
import json
import logging
-import math
import sys
-from io import BytesIO
from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Optional, Union
import geojson
import numpy as np
import psycopg2
+from app.fmtm_splitter.parsers import (
+ meters_to_degrees,
+ prepare_sql_query,
+)
from geojson import Feature, FeatureCollection, GeoJSON
-from osm_rawdata.postgres import PostgresClient
from psycopg2.extensions import connection
from shapely.geometry import Polygon, box, shape
from shapely.ops import unary_union
@@ -41,7 +42,8 @@
create_connection,
create_tables,
drop_tables,
- insert_geom,
+ process_features_for_db,
+ setup_lines_view,
)
# Instantiate logger
@@ -151,49 +153,6 @@ def geojson_to_shapely_polygon(
return shape(features[0].get("geometry"))
- def meters_to_degrees(
- self, meters: float, reference_lat: float
- ) -> Tuple[float, float]:
- """Converts meters to degrees at a given latitude.
-
- Using WGS84 ellipsoidal calculations.
-
- Args:
- meters (float): The distance in meters to convert.
- reference_lat (float): The latitude at which to ,
- perform the conversion (in degrees).
-
- Returns:
- Tuple[float, float]: Degree values for latitude and longitude.
- """
- # INFO:
- # The geodesic distance is the shortest distance on the surface
- # of an ellipsoidal model of the earth
-
- lat_rad = math.radians(reference_lat)
-
- # Using WGS84 parameters
- a = 6378137.0 # Semi-major axis in meters
- f = 1 / 298.257223563 # Flattening factor
-
- # Applying formula
- e2 = (2 * f) - (f**2) # Eccentricity squared
- n = a / math.sqrt(
- 1 - e2 * math.sin(lat_rad) ** 2
- ) # Radius of curvature in the prime vertical
- m = (
- a * (1 - e2) / (1 - e2 * math.sin(lat_rad) ** 2) ** (3 / 2)
- ) # Radius of curvature in the meridian
-
- lat_deg_change = meters / m # Latitude change in degrees
- lon_deg_change = meters / (n * math.cos(lat_rad)) # Longitude change in degrees
-
- # Convert changes to degrees by dividing by radians to degrees
- lat_deg_change = math.degrees(lat_deg_change)
- lon_deg_change = math.degrees(lon_deg_change)
-
- return lat_deg_change, lon_deg_change
-
def splitBySquare( # noqa: N802
self,
meters: int,
@@ -220,7 +179,7 @@ def splitBySquare( # noqa: N802
xmin, ymin, xmax, ymax = self.aoi.bounds
reference_lat = (ymin + ymax) / 2
- length_deg, width_deg = self.meters_to_degrees(meters, reference_lat)
+ length_deg, width_deg = meters_to_degrees(meters, reference_lat)
# Create grid columns and rows based on the AOI bounds
cols = np.arange(xmin, xmax + width_deg, width_deg)
@@ -507,125 +466,6 @@ def split_by_square(
return split_features
-def parse_geojson_input(input_data: Union[str, FeatureCollection]) -> FeatureCollection:
- """Parse input data into a valid FeatureCollection."""
- return FMTMSplitter.input_to_geojson(input_data)
-
-
-def prepare_sql_query(sql_file: Optional[Union[str, Path]], default_path: Path) -> str:
- """Load SQL query from a file or fallback to default."""
- if not sql_file:
- sql_file = default_path
- with open(sql_file, "r") as sql:
- return sql.read()
-
-
-def generate_osm_extract(
- aoi_featcol: FeatureCollection, extract_type: str
-) -> FeatureCollection:
- """Generate OSM extract based on the specified type from AOI FeatureCollection."""
- try:
- config_data_map = {
- "extracts": """
- select:
- from:
- - nodes
- - ways_poly
- - ways_line
- where:
- tags:
- - building: not null
- - highway: not null
- - waterway: not null
- - railway: not null
- - aeroway: not null
- """,
- "lines": """
- select:
- from:
- - nodes
- - ways_line
- where:
- tags:
- - highway: not null
- - waterway: not null
- - railway: not null
- - aeroway: not null
- """,
- }
- config_data = config_data_map.get(extract_type)
- if not config_data:
- raise ValueError(f"Invalid extract type: {extract_type}")
-
- config_bytes = BytesIO(config_data.encode())
- pg = PostgresClient("underpass", config_bytes)
- return pg.execQuery(
- aoi_featcol,
- extra_params={"fileName": "fmtm_splitter", "useStWithin": False},
- )
- except Exception as e:
- log.error(f"Error during OSM extract generation: {e}")
- raise RuntimeError(f"Failed to generate OSM extract: {e}") from e
-
-
-def json_str_to_dict(json_item: Union[str, dict]) -> dict:
- """Convert a JSON string to dict."""
- if isinstance(json_item, dict):
- return json_item
- if isinstance(json_item, str):
- try:
- return json.loads(json_item)
- except json.JSONDecodeError:
- msg = f"Error decoding key in GeoJSON: {json_item}"
- log.error(msg)
- # Set tags to empty, skip feature
- return {}
-
-
-def insert_features_to_db(
- features: List[Dict[str, Any]],
- conn: psycopg2.extensions.connection,
- extract_type: str,
-) -> None:
- """Insert features into the database with optimized performance.
-
- Args:
- features: List of features containing geometry and properties.
- conn: Database connection object.
- extract_type: Type of data extraction ("custom", "lines", or "extracts").
- """
- ways_poly_batch = []
- ways_line_batch = []
-
- for feature in features:
- geom = shape(feature["geometry"]).wkb_hex
- properties = feature.get("properties", {})
- tags = properties.get("tags", properties)
-
- if tags:
- tags = json_str_to_dict(tags).get("tags", json_str_to_dict(tags))
-
- osm_id = properties.get("osm_id")
-
- # Process based on extract_type
- if extract_type == "custom":
- ways_poly_batch.append({"geom": geom})
- elif extract_type == "lines":
- if any(key in tags for key in ["highway", "waterway", "railway"]):
- ways_line_batch.append({"osm_id": osm_id, "geom": geom, "tags": tags})
- elif extract_type == "extracts":
- if tags.get("building") == "yes":
- ways_poly_batch.append({"osm_id": osm_id, "geom": geom, "tags": tags})
- elif any(key in tags for key in ["highway", "waterway", "railway"]):
- ways_line_batch.append({"osm_id": osm_id, "geom": geom, "tags": tags})
-
- # Perform batch inserts
- if ways_poly_batch:
- insert_geom(conn, "ways_poly", ways_poly_batch)
- if ways_line_batch:
- insert_geom(conn, "ways_line", ways_line_batch)
-
-
def split_by_sql(
aoi: Union[str, FeatureCollection],
db: Union[str, connection],
@@ -658,11 +498,12 @@ def split_by_sql(
default_sql_path = Path(__file__).parent / "fmtm_algorithm.sql"
query = prepare_sql_query(sql_file, default_sql_path)
- parsed_aoi = parse_geojson_input(aoi)
+ parsed_aoi = FMTMSplitter.input_to_geojson(aoi)
aoi_featcol = FMTMSplitter.geojson_to_featcol(parsed_aoi)
+ custom_features = FMTMSplitter.geojson_to_featcol(custom_features)
if len(features := aoi_featcol.get("features", [])) > 1:
- return handle_multiple_aoi_features(
+ return split_multiple_aoi_features(
features, db, sql_file, num_buildings, outfile, custom_features
)
@@ -695,39 +536,7 @@ def split_by_sql(
close_connection(conn)
-def process_features_for_db(
- aoi_featcol: FeatureCollection,
- conn: psycopg2.extensions.connection,
- custom_features: Optional[List[Dict]] = None,
-) -> None:
- """Process custom features or OSM extracts and insert them into the database."""
- if custom_features:
- custom_buildings = parse_geojson_input(custom_features)
- insert_features_to_db(custom_buildings["features"], conn, "custom")
-
- extract_lines = generate_osm_extract(aoi_featcol, "lines")
- if extract_lines:
- insert_features_to_db(extract_lines["features"], conn, "lines")
- else:
- extract_buildings = generate_osm_extract(aoi_featcol, "extracts")
- insert_features_to_db(extract_buildings["features"], conn, "extracts")
-
-
-def setup_lines_view(conn, geometry):
- """Create a database view for line features intersecting the AOI."""
- view_sql = """
- DROP VIEW IF EXISTS lines_view;
- CREATE VIEW lines_view AS
- SELECT tags, geom
- FROM ways_line
- WHERE ST_Intersects(ST_SetSRID(CAST(%s AS GEOMETRY), 4326), geom)
- """
- with conn.cursor() as cur:
- cur.execute(view_sql, (geometry,))
- cur.close()
-
-
-def handle_multiple_aoi_features(
+def split_multiple_aoi_features(
features, db, sql_file, num_buildings, outfile, custom_features
):
"""Handle AOIs with multiple features by splitting them recursively."""
@@ -917,7 +726,7 @@ def main(args_list: list[str] | None = None):
sql_file=args.custom,
num_buildings=args.number,
outfile=args.outfile,
- osm_extract=args.extract,
+ custom_features=args.extract,
)
# Split by feature using geojson
elif args.source and args.source[3:] != "PG:":