Skip to content

Commit

Permalink
Addressed comments in PR #37 (75c19843f61d524d9257b8cad60a33ae5baff7e…
Browse files Browse the repository at this point in the history
…ee6a9fa7b89a0ab5971db9357)
  • Loading branch information
Jack-Hayes committed Dec 18, 2024
1 parent baafc3f commit 9c5b1ea
Show file tree
Hide file tree
Showing 7 changed files with 208 additions and 159 deletions.
249 changes: 137 additions & 112 deletions docs/examples/additional_lidar.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions docs/examples/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ quickstart
cascading_search
sliderule
contextual_data
additional_lidar
```
21 changes: 12 additions & 9 deletions docs/user_guide/datasets.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,18 @@ Below we provide a short table summarizing datasets that are searchable with
different providers, the _provider_ column identifies the source of the data
used by this library.

| Dataset | Alias | Type | Start | End | Extent | Source |
| -------------- | ---------- | --------- | ---------- | ---------- | ------------- | --------------------------------------------------------------------------- |
| TanDEM-X | tdx | SAR | 2007-07-01 | | global | [NASA CSDAP](https://csdap.earthdata.nasa.gov/stac/collections/airbus) |
| Maxar Stereo | maxar | VHR | 2007-07-01 | | global | [Maxar](https://developers.maxar.com/docs/discovery/) |
| Coperincus DEM | cop30 | SAR | 2021-04-22 | | global | [Microsoft](https://planetarycomputer.microsoft.com/dataset/cop-dem-glo-30) |
| ICESat-2 ATL06 | atl06 | Altimeter | 2018-10-13 | | global | [NASA](https://nsidc.org/data/atl03) |
| GEDI L2A | gedi | Altimeter | 2019-04-04 | 2023-03-17 | mid-latitudes | [NASA](https://lpdaac.usgs.gov/products/gedi02_av002/) |
| 3DEP LiDAR | 3dep | LiDAR | 2000-12-01 | | CONUS | [USGS](https://www.usgs.gov/3d-elevation-program) |
| ESA WorldCover | worldcover | LULC | 2020-01-01 | 2021-12-31 | global | [Microsoft](https://planetarycomputer.microsoft.com/dataset/esa-worldcover) |
| Dataset | Alias | Type | Start | End | Extent | Source |
| ------------------ | ---------- | --------- | ---------- | ---------- | -------------- | --------------------------------------------------------------------------- |
| TanDEM-X | tdx | SAR | 2007-07-01 | | global | [NASA CSDAP](https://csdap.earthdata.nasa.gov/stac/collections/airbus) |
| Maxar Stereo | maxar | VHR | 2007-07-01 | | global | [Maxar](https://developers.maxar.com/docs/discovery/) |
| Coperincus DEM | cop30 | SAR | 2021-04-22 | | global | [Microsoft](https://planetarycomputer.microsoft.com/dataset/cop-dem-glo-30) |
| ICESat-2 ATL06 | atl06 | Altimeter | 2018-10-13 | | global | [NASA](https://nsidc.org/data/atl03) |
| GEDI L2A | gedi | Altimeter | 2019-04-04 | 2023-03-17 | mid-latitudes | [NASA](https://lpdaac.usgs.gov/products/gedi02_av002/) |
| 3DEP LiDAR | 3dep | LiDAR | 2000-12-01 | | CONUS | [USGS](https://www.usgs.gov/3d-elevation-program) |
| ESA WorldCover | worldcover | LULC | 2020-01-01 | 2021-12-31 | global | [Microsoft](https://planetarycomputer.microsoft.com/dataset/esa-worldcover) |
| NOAA Coastal LiDAR | noaa | LiDAR | 1996-10-09 | | US Territories | [NOAA](https://coast.noaa.gov/digitalcoast/data/coastallidar.html) |
| NCALM LiDAR | ncalm | LiDAR | 2003-05-15 | | US Territories | [NCALM](https://calm.geo.berkeley.edu/ncalm/dtc.html) |
| NEON LiDAR | neon | LiDAR | 2013-06-01 | | US Territories | [NEON](https://data.neonscience.org/data-products/DP3.30024.001) |

## Other data sources

Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ dev = [
"pytest >=6",
"pytest-cov >=3",
"sliderule>=4.7.1,<5",
"requests>=2.32.3,<3",
"types-requests>=2.32.0.20241016,<3"
]
docs = [
Expand Down
18 changes: 6 additions & 12 deletions src/coincident/search/neon_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,13 @@

from __future__ import annotations

import warnings

import geopandas as gpd
import pandas as pd
import pyogrio
import requests
from pandas import Timestamp
from shapely.geometry import Point

warnings.filterwarnings(
"ignore", message=".*Geometry is in a geographic CRS.*", category=UserWarning
)


def build_neon_point_gf(sites_url: str) -> gpd.GeoDataFrame:
"""
Expand Down Expand Up @@ -129,7 +123,7 @@ def temporal_filter_neon(
].reset_index(drop=True)


def get_neon_bboxes(url: str | None, fallback_geometry: gpd.GeoSeries) -> gpd.GeoSeries:
def get_neon_bboxes(url: str, fallback_geometry: gpd.GeoSeries) -> gpd.GeoSeries:
"""
Fetch and return bounding boxes for NEON data products.
Expand Down Expand Up @@ -181,17 +175,17 @@ def search_bboxes(
search_end: Timestamp,
) -> gpd.GeoDataFrame:
"""
Perform a search for NEON metadata and respective bbox footprints. Note that this search
Perform a search for NEON LiDAR metadata and respective bbox footprints. Note that this search
will take a while if you denote a large aoi or a large time range.
Parameters
----------
intersects : gpd.GeoDataFrame | gpd.GeoSeries
The geometry to restrict the search.
The geometry to restrict the search. By default does a global search.
search_start : pd.Timestamp
The start of the time range to filter by.
The start of the time range to filter by. By default searches all dates in the catalog (don't do this)
search_end : pd.Timestamp
The end of the time range to filter by.
The end of the time range to filter by. By default searches all dates in the catalog (don't do this)
Returns
-------
Expand All @@ -200,7 +194,7 @@ def search_bboxes(
"""
if search_start is None and search_end is None:
search_start = pd.Timestamp(
"2005-06-01"
"2013-06-01"
) # Starting from June, 2005 (first NEON dataset)
search_end = pd.Timestamp.today() # Default to today's date
# note that the above will result in the search taking a very long time
Expand Down
35 changes: 20 additions & 15 deletions src/coincident/search/opentopo_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,17 @@ def search_ncalm_noaa(
dataset: str | None = None,
) -> gpd.GeoDataFrame:
"""
Perform a search for geospatial data using OpenTopography API.
This function dynamically adjusts the API URL based on the dataset.
Perform a search for NCALM LiDAR or NOAA Coastal LiDAR footprints and metadata via
the OpenTopography API.
Parameters
----------
aoi : gpd.GeoDataFrame | gpd.GeoSeries
A GeoDataFrame or GeoSeries containing a geometry to restrict the search area.
A GeoDataFrame or GeoSeries containing a geometry to restrict the search area, by default does a global search.
search_start : Timestamp, optional
The start datetime for the search, by default None.
The start datetime for the search, by default searches the entire catalog defined by 'dataset'.
search_end : Timestamp, optional
The end datetime for the search, by default None.
The end datetime for the search, by default searches the entire catalog defined by 'dataset'.
dataset : str
The dataset type (either "noaa" or "ncalm").
Expand Down Expand Up @@ -68,25 +68,30 @@ def search_ncalm_noaa(
]
)
else:
# convex_hull works better than simplify for more-complex geometry (ie. Louisiana)
# convex_hull works better than simplify for more-complex geometries (ie. Louisiana)
# https://raw.githubusercontent.com/unitedstates/districts/refs/heads/gh-pages/states/LA/shape.geojson
search_poly = aoi.to_crs(4326).union_all()
search_poly_chull = search_poly.convex_hull
coords = ",".join([f"{x},{y}" for x, y in search_poly_chull.exterior.coords])

# alter the API URL based on the dataset
if dataset == "noaa":
url = f"https://portal.opentopography.org/API/otCatalog?productFormat=PointCloud&polygon={coords}&detail=true&outputFormat=json&include_federated=true"
elif dataset == "ncalm":
url = f"https://portal.opentopography.org/API/otCatalog?productFormat=PointCloud&polygon={coords}&detail=true&outputFormat=json&include_federated=false"
else:
if dataset not in ["noaa", "ncalm"]:
msg = f"Unsupported dataset: {dataset}"
raise ValueError(msg)

response = requests.get(url)
# https://requests.readthedocs.io/en/latest/user/quickstart/#passing-parameters-in-urls
url_api_base = "https://portal.opentopography.org/API/otCatalog"
params_api = {
"productFormat": "PointCloud",
"detail": "true",
"outputFormat": "json",
"polygon": coords,
"include_federated": "true" if dataset == "noaa" else "false",
}

response = requests.get(url_api_base, params=params_api)
if response.status_code != 200:
msg = f"Error querying OpenTopography API: {response.status_code}"
raise ValueError(msg)
msg_response = f"Error querying OpenTopography API: {response.status_code}"
raise ValueError(msg_response)

catalog = response.json()
if dataset == "noaa":
Expand Down
42 changes: 32 additions & 10 deletions tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import geopandas as gpd
import pytest
from geopandas.testing import assert_geodataframe_equal
from shapely.geometry import Polygon

import coincident

Expand Down Expand Up @@ -187,33 +188,54 @@ def test_swath_polygon_not_found():

# opentopo (NCALM and NOAA)
# =======
# TODO: remove datetime argument constraint for opentopo search
# also, smaller aois
@network
def test_noaa_search(bathy_aoi):
gf = coincident.search.search(
dataset="noaa", intersects=bathy_aoi, datetime=["2019-01-01", "2023-12-31"]
)
assert len(gf) == 2
assert gf.shape == (2, 5)
assert all(
col in gf.columns
for col in ["id", "title", "start_datetime", "end_datetime", "geometry"]
)
assert all(isinstance(geom, Polygon) for geom in gf["geometry"])


@network
def test_ncalm_search(large_aoi):
gf = coincident.search.search(
dataset="ncalm", intersects=large_aoi, datetime=["2019-01-01", "2023-12-31"]
)
assert len(gf) == 6
assert gf.shape == (6, 5)
assert all(
col in gf.columns
for col in ["id", "title", "start_datetime", "end_datetime", "geometry"]
)
assert all(isinstance(geom, Polygon) for geom in gf["geometry"])


# NEON
# TODO: use a smaller aoi for search
# i don't want to keep adding fixtures to conftest.py but i think the test takes way too long with CO
# and no sites overlap with grandmesa
# TODO: add a test for provisional NEON datasets
# =======
@network
@pytest.mark.filterwarnings("ignore:Geometry is in a geographic CRS:UserWarning")
def test_neon_search(large_aoi):
def test_neon_search():
intersects = gpd.read_file(
"https://raw.githubusercontent.com/unitedstates/districts/refs/heads/gh-pages/states/MA/shape.geojson"
)
gf = coincident.search.search(
dataset="neon", intersects=large_aoi, datetime=["2022-01-01", "2022-12-31"]
dataset="neon", intersects=intersects, datetime=["2019"]
)
assert gf.shape == (2, 6)
assert all(
col in gf.columns
for col in [
"id",
"title",
"start_datetime",
"end_datetime",
"product_url",
"geometry",
]
)
assert len(gf) == 3
assert all(isinstance(geom, Polygon) for geom in gf["geometry"])

0 comments on commit 9c5b1ea

Please sign in to comment.