From ab37010c251943d74581605253e1d38193587ef8 Mon Sep 17 00:00:00 2001 From: Charles Turner <52199577+charles-turner-1@users.noreply.github.com> Date: Fri, 18 Oct 2024 15:18:30 +0800 Subject: [PATCH] Making coordinate variables searchable (#212) - Updated builders to scan for all variables, not just data variables. - Updated tests to include coordinate variable discovery. --- src/access_nri_intake/source/builders.py | 2 +- src/access_nri_intake/source/utils.py | 8 +- tests/test_builders.py | 283 ++++++++++++++++------- 3 files changed, 209 insertions(+), 84 deletions(-) diff --git a/src/access_nri_intake/source/builders.py b/src/access_nri_intake/source/builders.py index 58f85927..ddfcdca7 100644 --- a/src/access_nri_intake/source/builders.py +++ b/src/access_nri_intake/source/builders.py @@ -316,7 +316,7 @@ def parse_access_ncfile( ) as ds: dvars = _VarInfo() - for var in ds.data_vars: + for var in ds.variables: attrs = ds[var].attrs dvars.append_attrs(var, attrs) # type: ignore diff --git a/src/access_nri_intake/source/utils.py b/src/access_nri_intake/source/utils.py index e8def4ed..4d0ab76e 100644 --- a/src/access_nri_intake/source/utils.py +++ b/src/access_nri_intake/source/utils.py @@ -7,7 +7,7 @@ from dataclasses import asdict, dataclass, field from datetime import timedelta from pathlib import Path -from typing import Union +from typing import Optional, Union import cftime import xarray as xr @@ -33,7 +33,7 @@ class _AccessNCFileInfo: filename: Union[str, Path] file_id: str path: str - filename_timestamp: Union[str, None] + filename_timestamp: Optional[str] frequency: str start_date: str end_date: str @@ -139,7 +139,7 @@ def _guess_start_end_dates(ts, te, frequency): def get_timeinfo( ds: xr.Dataset, - filename_frequency: Union[str, None], + filename_frequency: Optional[str], time_dim: str, ) -> tuple[str, str, str]: """ @@ -177,7 +177,7 @@ def _todate(t): time_format = "%Y-%m-%d, %H:%M:%S" ts = None te = None - frequency: Union[str, tuple[Union[int, None], str]] = "fx" + frequency: Union[str, tuple[Optional[int], str]] = "fx" has_time = time_dim in ds if has_time: diff --git a/tests/test_builders.py b/tests/test_builders.py index 4cd6cc1d..c28f9eac 100644 --- a/tests/test_builders.py +++ b/tests/test_builders.py @@ -373,14 +373,16 @@ def test_parse_access_filename(builder, filename, expected): frequency="fx", start_date="none", end_date="none", - variable=["geolat_t", "geolon_t"], + variable=["geolat_t", "geolon_t", "xt_ocean", "yt_ocean"], variable_long_name=[ "tracer latitude", "tracer longitude", + "tcell longitude", + "tcell latitude", ], - variable_standard_name=["", ""], - variable_cell_methods=["time: point", "time: point"], - variable_units=["degrees_N", "degrees_E"], + variable_standard_name=["", "", "", ""], + variable_cell_methods=["time: point", "time: point", "", ""], + variable_units=["degrees_N", "degrees_E", "degrees_E", "degrees_N"], ), ), ( @@ -395,21 +397,41 @@ def test_parse_access_filename(builder, filename, expected): start_date="1900-01-01, 00:00:00", end_date="1910-01-01, 00:00:00", variable=[ + "nv", + "st_ocean", "temp", + "time", "time_bounds", + "xt_ocean", + "yt_ocean", ], variable_long_name=[ + "vertex number", + "tcell zstar depth", "Conservative temperature", + "time", "time axis boundaries", + "tcell longitude", + "tcell latitude", ], variable_standard_name=[ + "", + "", "sea_water_conservative_temperature", "", + "", + "", + "", ], - variable_cell_methods=["time: mean", ""], + variable_cell_methods=["", "", "time: mean", "", "", "", ""], variable_units=[ + "none", + "meters", "K", + "days since 1900-01-01 00:00:00", "days", + "degrees_E", + "degrees_N", ], ), ), @@ -424,19 +446,31 @@ def test_parse_access_filename(builder, filename, expected): frequency="1mon", start_date="1900-01-01, 00:00:00", end_date="1910-01-01, 00:00:00", - variable=["mld", "time_bounds"], + variable=["mld", "nv", "time", "time_bounds", "xt_ocean", "yt_ocean"], variable_long_name=[ "mixed layer depth determined by density criteria", + "vertex number", + "time", "time axis boundaries", + "tcell longitude", + "tcell latitude", ], variable_standard_name=[ "ocean_mixed_layer_thickness_defined_by_sigma_t", "", + "", + "", + "", + "", ], - variable_cell_methods=["time: mean", ""], + variable_cell_methods=["time: mean", "", "", "", "", ""], variable_units=[ "m", + "none", + "days since 1900-01-01 00:00:00", "days", + "degrees_E", + "degrees_N", ], ), ), @@ -451,18 +485,25 @@ def test_parse_access_filename(builder, filename, expected): frequency="1mon", start_date="1900-01-01, 00:00:00", end_date="1900-02-01, 00:00:00", - variable=["mld"], + variable=["mld", "time", "xt_ocean", "yt_ocean"], variable_long_name=[ "mixed layer depth determined by density criteria", + "time", + "tcell longitude", + "tcell latitude", ], variable_standard_name=[ "ocean_mixed_layer_thickness_defined_by_sigma_t", + "", + "", + "", ], - variable_cell_methods=[ - "time: mean", - ], + variable_cell_methods=["time: mean", "", "", ""], variable_units=[ "m", + "days since 1900-01-01 00:00:00", + "degrees_E", + "degrees_N", ], ), ), @@ -477,28 +518,24 @@ def test_parse_access_filename(builder, filename, expected): frequency="1mon", start_date="1900-01-01, 00:00:00", end_date="1900-02-01, 00:00:00", - variable=["TLAT", "TLON", "aice_m", "tarea", "time_bounds"], + variable=["TLAT", "TLON", "aice_m", "tarea", "time", "time_bounds"], variable_long_name=[ "T grid center latitude", "T grid center longitude", "ice area (aggregate)", "area of T grid cells", + "model time", "boundaries for time-averaging interval", ], - variable_standard_name=["", "", "", "", ""], - variable_cell_methods=[ - "", - "", - "time: mean", - "", - "", - ], + variable_standard_name=["", "", "", "", "", ""], + variable_cell_methods=["", "", "time: mean", "", "", ""], variable_units=[ "degrees_north", "degrees_east", "1", "m^2", "days since 1900-01-01 00:00:00", + "days since 1900-01-01 00:00:00", ], ), ), @@ -531,34 +568,24 @@ def test_parse_access_filename(builder, filename, expected): frequency="1day", start_date="2015-01-01, 00:00:00", end_date="2015-02-01, 00:00:00", - variable=["TLAT", "TLON", "aice", "tarea", "time_bounds"], + variable=["TLAT", "TLON", "aice", "tarea", "time", "time_bounds"], variable_long_name=[ "T grid center latitude", "T grid center longitude", "ice area (aggregate)", "area of T grid cells", + "model time", "boundaries for time-averaging interval", ], - variable_standard_name=[ - "", - "", - "", - "", - "", - ], - variable_cell_methods=[ - "", - "", - "time: mean", - "", - "", - ], + variable_standard_name=["", "", "", "", "", ""], + variable_cell_methods=["", "", "time: mean", "", "", ""], variable_units=[ "degrees_north", "degrees_east", "1", "m^2", "days since 1850-01-01 00:00:00", + "days since 1850-01-01 00:00:00", ], ), ), @@ -573,22 +600,24 @@ def test_parse_access_filename(builder, filename, expected): frequency="1day", start_date="2015-01-01, 00:00:00", end_date="2015-07-01, 00:00:00", - variable=[ - "sst", - "time_bounds", - ], + variable=["nv", "sst", "time", "time_bounds", "xt_ocean", "yt_ocean"], variable_long_name=[ + "vertex number", "Potential temperature", + "time", "time axis boundaries", + "tcell longitude", + "tcell latitude", ], - variable_standard_name=["sea_surface_temperature", ""], - variable_cell_methods=[ - "time: mean", - "", - ], + variable_standard_name=["", "sea_surface_temperature", "", "", "", ""], + variable_cell_methods=["", "time: mean", "", "", "", ""], variable_units=[ + "none", "K", + "days since 1850-01-01 00:00:00", "days", + "degrees_E", + "degrees_N", ], ), ), @@ -604,20 +633,32 @@ def test_parse_access_filename(builder, filename, expected): start_date="2015-01-01, 00:00:00", end_date="2015-07-01, 00:00:00", variable=[ + "nv", + "scalar_axis", "temp_global_ave", + "time", "time_bounds", ], variable_long_name=[ + "vertex number", + "none", "Global mean temp in liquid seawater", + "time", "time axis boundaries", ], variable_standard_name=[ + "", + "", "sea_water_potential_temperature", "", + "", ], - variable_cell_methods=["time: mean", ""], + variable_cell_methods=["", "", "time: mean", "", ""], variable_units=[ + "none", + "none", "deg_C", + "days since 1850-01-01 00:00:00", "days", ], ), @@ -651,28 +692,24 @@ def test_parse_access_filename(builder, filename, expected): frequency="1mon", start_date="1850-01-01, 00:00:00", end_date="1850-02-01, 00:00:00", - variable=["TLAT", "TLON", "aice", "tarea", "time_bounds"], + variable=["TLAT", "TLON", "aice", "tarea", "time", "time_bounds"], variable_long_name=[ "T grid center latitude", "T grid center longitude", "ice area (aggregate)", "area of T grid cells", + "model time", "boundaries for time-averaging interval", ], - variable_standard_name=[ - "", - "", - "", - "", - "", - ], - variable_cell_methods=["", "", "time: mean", "", ""], + variable_standard_name=["", "", "", "", "", ""], + variable_cell_methods=["", "", "time: mean", "", "", ""], variable_units=[ "degrees_north", "degrees_east", "1", "m^2", "days since 0001-01-01 00:00:00", + "days since 0001-01-01 00:00:00", ], ), ), @@ -689,20 +726,29 @@ def test_parse_access_filename(builder, filename, expected): end_date="1850-12-30, 00:00:00", variable=[ "fgco2_raw", + "nv", + "time", "time_bounds", + "xt_ocean", + "yt_ocean", ], variable_long_name=[ "Flux into ocean - DIC, inc. anth.", + "vertex number", + "time", "time axis boundaries", + "tcell longitude", + "tcell latitude", ], - variable_standard_name=[ - "", - "", - ], - variable_cell_methods=["time: mean", ""], + variable_standard_name=["", "", "", "", "", ""], + variable_cell_methods=["time: mean", "", "", "", "", ""], variable_units=[ "mmol/m^2/s", + "none", + "days since 0001-01-01 00:00:00", "days", + "degrees_E", + "degrees_N", ], ), ), @@ -718,21 +764,33 @@ def test_parse_access_filename(builder, filename, expected): start_date="1849-12-30, 00:00:00", end_date="1850-12-30, 00:00:00", variable=[ + "nv", "o2", + "st_ocean", + "time", "time_bounds", + "xt_ocean", + "yt_ocean", ], variable_long_name=[ + "vertex number", "o2", + "tcell zstar depth", + "time", "time axis boundaries", + "tcell longitude", + "tcell latitude", ], - variable_standard_name=["", ""], - variable_cell_methods=[ - "time: mean", - "", - ], + variable_standard_name=["", "", "", "", "", "", ""], + variable_cell_methods=["", "time: mean", "", "", "", "", ""], variable_units=[ + "none", "mmol/m^3", + "meters", + "days since 0001-01-01 00:00:00", "days", + "degrees_E", + "degrees_N", ], ), ), @@ -751,36 +809,61 @@ def test_parse_access_filename(builder, filename, expected): "average_DT", "average_T1", "average_T2", + "nv", "thetao", + "time", "time_bnds", + "xh", + "yh", + "zl", ], variable_long_name=[ "Length of average period", "Start time for average period", "End time for average period", + "vertex number", "Sea Water Potential Temperature", + "time", "time axis boundaries", + "h point nominal longitude", + "h point nominal latitude", + "Layer pseudo-depth, -z*", ], variable_standard_name=[ "", "", "", + "", "sea_water_potential_temperature", "", + "", + "", + "", + "", ], variable_cell_methods=[ "", "", "", + "", "area:mean zl:mean yh:mean xh:mean time: mean", "", + "", + "", + "", + "", ], variable_units=[ "days", "days since 0001-01-01 00:00:00", "days since 0001-01-01 00:00:00", + "", "degC", "days since 0001-01-01 00:00:00", + "days since 0001-01-01 00:00:00", + "degrees_east", + "degrees_north", + "meter", ], ), ), @@ -799,36 +882,56 @@ def test_parse_access_filename(builder, filename, expected): "average_DT", "average_T1", "average_T2", + "nv", + "time", "time_bnds", "tos", + "xh", + "yh", ], variable_long_name=[ "Length of average period", "Start time for average period", "End time for average period", + "vertex number", + "time", "time axis boundaries", "Sea Surface Temperature", + "h point nominal longitude", + "h point nominal latitude", ], variable_standard_name=[ "", "", "", "", + "", + "", "sea_surface_temperature", + "", + "", ], variable_cell_methods=[ "", "", "", "", + "", + "", "area:mean yh:mean xh:mean time: mean", + "", + "", ], variable_units=[ "days", "days since 0001-01-01 00:00:00", "days since 0001-01-01 00:00:00", + "", + "days since 0001-01-01 00:00:00", "days since 0001-01-01 00:00:00", "degC", + "degrees_east", + "degrees_north", ], ), ), @@ -843,19 +946,20 @@ def test_parse_access_filename(builder, filename, expected): frequency="fx", start_date="none", end_date="none", - variable=["geolat", "geolon"], + variable=["geolat", "geolon", "xh", "yh"], variable_long_name=[ "Latitude of tracer (T) points", "Longitude of tracer (T) points", + "h point nominal longitude", + "h point nominal latitude", ], - variable_standard_name=[ - "", - "", - ], - variable_cell_methods=["time: point", "time: point"], + variable_standard_name=["", "", "", ""], + variable_cell_methods=["time: point", "time: point", "", ""], variable_units=[ "degrees_north", "degrees_east", + "degrees_east", + "degrees_north", ], ), ), @@ -874,36 +978,61 @@ def test_parse_access_filename(builder, filename, expected): "average_DT", "average_T1", "average_T2", + "nv", "thetao", + "time", "time_bnds", + "xh", + "yh", + "z_l", ], variable_long_name=[ "Length of average period", "Start time for average period", "End time for average period", + "vertex number", "Sea Water Potential Temperature", + "time", "time axis boundaries", + "h point nominal longitude", + "h point nominal latitude", + "Depth at cell center", ], variable_standard_name=[ "", "", "", + "", "sea_water_potential_temperature", "", + "", + "", + "", + "", ], variable_cell_methods=[ "", "", "", + "", "area:mean z_l:mean yh:mean xh:mean time: mean", "", + "", + "", + "", + "", ], variable_units=[ "days", "days since 0001-01-01 00:00:00", "days since 0001-01-01 00:00:00", + "", "degC", "days since 0001-01-01 00:00:00", + "days since 0001-01-01 00:00:00", + "degrees_east", + "degrees_north", + "meters", ], ), ), @@ -918,28 +1047,24 @@ def test_parse_access_filename(builder, filename, expected): frequency="1day", start_date="1900-01-01, 00:00:00", end_date="1900-01-02, 00:00:00", - variable=["TLAT", "TLON", "aice", "tarea", "time_bounds"], + variable=["TLAT", "TLON", "aice", "tarea", "time", "time_bounds"], variable_long_name=[ "T grid center latitude", "T grid center longitude", "ice area (aggregate)", "area of T grid cells", + "time", "time interval endpoints", ], - variable_standard_name=[ - "", - "", - "", - "", - "", - ], - variable_cell_methods=["", "", "time: mean", "", ""], + variable_standard_name=["", "", "", "", "", ""], + variable_cell_methods=["", "", "time: mean", "", "", ""], variable_units=[ "degrees_north", "degrees_east", "1", "m^2", "days since 0000-01-01 00:00:00", + "days since 0000-01-01 00:00:00", ], ), ),