Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove bare get_timeinfo, repoint tests to GenericTimeParser #315

Merged
merged 3 commits into from
Jan 20, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 0 additions & 114 deletions src/access_nri_intake/source/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,120 +138,6 @@ def _guess_start_end_dates(ts, te, frequency):
return ts, te


def get_timeinfo(
ds: xr.Dataset,
filename_frequency: str | None,
time_dim: str,
) -> tuple[str, str, str]:
"""
Get start date, end date and frequency of a xarray dataset. Stolen and adapted from the
cosima cookbook, see
https://github.com/COSIMA/cosima-cookbook/blob/master/cosima_cookbook/database.py#L565

Parameters
----------
ds: :py:class:`xarray.Dataset`
The dataset to parse the time info from
filename_frequency: str
Frequency as determined from the filename
time_dim: str
The name of the time dimension

Returns
-------
start_date: str
The start date of the dataset
end_date: str
The end date of the dataset
frequency: str
The frequency of the dataset

Raises
------
EmptyFileError
If the dataset has a valid unlimited dimension, but no data
"""

def _todate(t):
return cftime.num2date(t, time_var.units, calendar=time_var.calendar)

time_format = "%Y-%m-%d, %H:%M:%S"
ts = None
te = None
frequency: str | tuple[int | None, str] = FREQUENCY_STATIC
has_time = time_dim in ds

if has_time:
time_var = ds[time_dim]

if len(time_var) == 0:
raise EmptyFileError(
"This file has a valid unlimited dimension, but no data"
)

has_bounds = hasattr(time_var, "bounds") and time_var.bounds in ds.variables
if has_bounds:
bounds_var = ds.variables[time_var.bounds]
ts = _todate(bounds_var[0, 0])
te = _todate(bounds_var[-1, 1])
else:
ts = _todate(time_var[0])
te = _todate(time_var[-1])

if len(time_var) > 1 or has_bounds:
if has_bounds:
t1 = _todate(bounds_var[0, 1])
else:
t1 = _todate(time_var[1])

dt = t1 - ts
# TODO: This is not a very good way to get the frequency
if dt.days >= 365:
years = round(dt.days / 365)
frequency = (years, "yr")
elif dt.days >= 28:
months = round(dt.days / 30)
frequency = (months, "mon")
elif dt.days >= 1:
frequency = (dt.days, "day")
elif dt.seconds >= 3600:
hours = round(dt.seconds / 3600)
frequency = (hours, "hr")
else:
frequency = (None, "subhr")

if filename_frequency:
if filename_frequency != frequency:
msg = (
f"The frequency '{filename_frequency}' determined from filename does not "
f"match the frequency '{frequency}' determined from the file contents."
)
if frequency == FREQUENCY_STATIC:
frequency = filename_frequency
warnings.warn(f"{msg} Using '{frequency}'.")

if has_time & (frequency != FREQUENCY_STATIC):
if not has_bounds:
ts, te = _guess_start_end_dates(ts, te, frequency)

if ts is None:
start_date = "none"
else:
start_date = ts.strftime(time_format)

if te is None:
end_date = "none"
else:
end_date = te.strftime(time_format)

if frequency[0]:
frequency = f"{str(frequency[0])}{frequency[1]}"
else:
frequency = frequency[1]

return start_date, end_date, frequency


class GenericTimeParser:
"""
Generic time parser
Expand Down
7 changes: 4 additions & 3 deletions tests/test_source_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
EmptyFileError,
GenericTimeParser,
GfdlTimeParser,
get_timeinfo,
)


Expand Down Expand Up @@ -126,7 +125,7 @@
),
],
)
def test_get_timeinfo(times, bounds, ffreq, expected):
def test_genericparser_get_timeinfo(times, bounds, ffreq, expected):
if bounds:
time = (times[0] + times[1]) / 2
ds = xr.Dataset(
Expand All @@ -147,7 +146,9 @@ def test_get_timeinfo(times, bounds, ffreq, expected):
units="days since 1900-01-01 00:00:00", calendar="GREGORIAN"
)

assert get_timeinfo(ds, filename_frequency=ffreq, time_dim="time") == expected
assert (
GenericTimeParser(ds, filename_frequency=ffreq, time_dim="time")() == expected
)


@pytest.mark.parametrize(
Expand Down
Loading