Skip to content

Commit

Permalink
Remove bare get_timeinfo, repoint tests to GenericTimeParser
Browse files Browse the repository at this point in the history
  • Loading branch information
marc-white committed Jan 15, 2025
1 parent 1e08b06 commit 085a0a2
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 116 deletions.
114 changes: 0 additions & 114 deletions src/access_nri_intake/source/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,120 +138,6 @@ def _guess_start_end_dates(ts, te, frequency):
return ts, te


def get_timeinfo(
ds: xr.Dataset,
filename_frequency: str | None,
time_dim: str,
) -> tuple[str, str, str]:
"""
Get start date, end date and frequency of a xarray dataset. Stolen and adapted from the
cosima cookbook, see
https://github.com/COSIMA/cosima-cookbook/blob/master/cosima_cookbook/database.py#L565
Parameters
----------
ds: :py:class:`xarray.Dataset`
The dataset to parse the time info from
filename_frequency: str
Frequency as determined from the filename
time_dim: str
The name of the time dimension
Returns
-------
start_date: str
The start date of the dataset
end_date: str
The end date of the dataset
frequency: str
The frequency of the dataset
Raises
------
EmptyFileError
If the dataset has a valid unlimited dimension, but no data
"""

def _todate(t):
return cftime.num2date(t, time_var.units, calendar=time_var.calendar)

time_format = "%Y-%m-%d, %H:%M:%S"
ts = None
te = None
frequency: str | tuple[int | None, str] = FREQUENCY_STATIC
has_time = time_dim in ds

if has_time:
time_var = ds[time_dim]

if len(time_var) == 0:
raise EmptyFileError(
"This file has a valid unlimited dimension, but no data"
)

has_bounds = hasattr(time_var, "bounds") and time_var.bounds in ds.variables
if has_bounds:
bounds_var = ds.variables[time_var.bounds]
ts = _todate(bounds_var[0, 0])
te = _todate(bounds_var[-1, 1])
else:
ts = _todate(time_var[0])
te = _todate(time_var[-1])

if len(time_var) > 1 or has_bounds:
if has_bounds:
t1 = _todate(bounds_var[0, 1])
else:
t1 = _todate(time_var[1])

dt = t1 - ts
# TODO: This is not a very good way to get the frequency
if dt.days >= 365:
years = round(dt.days / 365)
frequency = (years, "yr")
elif dt.days >= 28:
months = round(dt.days / 30)
frequency = (months, "mon")
elif dt.days >= 1:
frequency = (dt.days, "day")
elif dt.seconds >= 3600:
hours = round(dt.seconds / 3600)
frequency = (hours, "hr")
else:
frequency = (None, "subhr")

if filename_frequency:
if filename_frequency != frequency:
msg = (
f"The frequency '{filename_frequency}' determined from filename does not "
f"match the frequency '{frequency}' determined from the file contents."
)
if frequency == FREQUENCY_STATIC:
frequency = filename_frequency
warnings.warn(f"{msg} Using '{frequency}'.")

if has_time & (frequency != FREQUENCY_STATIC):
if not has_bounds:
ts, te = _guess_start_end_dates(ts, te, frequency)

if ts is None:
start_date = "none"
else:
start_date = ts.strftime(time_format)

if te is None:
end_date = "none"
else:
end_date = te.strftime(time_format)

if frequency[0]:
frequency = f"{str(frequency[0])}{frequency[1]}"
else:
frequency = frequency[1]

return start_date, end_date, frequency


class GenericTimeParser:
"""
Generic time parser
Expand Down
5 changes: 3 additions & 2 deletions tests/test_source_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
EmptyFileError,
GenericTimeParser,
GfdlTimeParser,
get_timeinfo,
)


Expand Down Expand Up @@ -147,7 +146,9 @@ def test_get_timeinfo(times, bounds, ffreq, expected):
units="days since 1900-01-01 00:00:00", calendar="GREGORIAN"
)

assert get_timeinfo(ds, filename_frequency=ffreq, time_dim="time") == expected
assert (
GenericTimeParser(ds, filename_frequency=ffreq, time_dim="time")() == expected
)


@pytest.mark.parametrize(
Expand Down

0 comments on commit 085a0a2

Please sign in to comment.