Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove bare get_timeinfo, repoint tests to GenericTimeParser #315

Merged
merged 3 commits into from
Jan 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 0 additions & 160 deletions src/access_nri_intake/source/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,166 +92,6 @@ def to_var_info_dict(self) -> dict[str, list[str]]:
}


def _add_month_start(time, n: int):
"""Add months to cftime datetime and truncate to start"""
year = time.year + ((time.month + n - 1) // 12)
month = (time.month + n - 1) % 12 + 1
return time.replace(
year=year, month=month, day=1, hour=0, minute=0, second=0, microsecond=0
)


def _add_year_start(time, n: int):
"""Add years to cftime datetime and truncate to start"""
return time.replace(
year=time.year + n, month=1, day=1, hour=0, minute=0, second=0, microsecond=0
)


def _guess_start_end_dates(ts, te, frequency):
"""Guess the start and end bounded times for a given frequency"""
warnings.warn(
"Time coordinate does not include bounds information. Guessing "
"start and end times."
)
num, unit = frequency
if unit == "yr":
step_back = -int(num / 2)
step_fwd = num + step_back
ts = _add_year_start(ts, step_back)
te = _add_year_start(te, step_fwd)
elif unit == "mon":
step_back = -int(num / 2)
step_fwd = num + step_back
ts = _add_month_start(ts, step_back)
te = _add_month_start(te, step_fwd)
elif unit == "day":
dt = timedelta(days=num) / 2
ts = ts - dt
te = te + dt
elif unit == "hr":
dt = timedelta(hours=num) / 2
ts = ts - dt
te = te + dt
else:
warnings.warn("Cannot infer start and end times for subhourly frequencies.")
return ts, te


def get_timeinfo(
ds: xr.Dataset,
filename_frequency: str | None,
time_dim: str,
) -> tuple[str, str, str]:
"""
Get start date, end date and frequency of a xarray dataset. Stolen and adapted from the
cosima cookbook, see
https://github.com/COSIMA/cosima-cookbook/blob/master/cosima_cookbook/database.py#L565

Parameters
----------
ds: :py:class:`xarray.Dataset`
The dataset to parse the time info from
filename_frequency: str
Frequency as determined from the filename
time_dim: str
The name of the time dimension

Returns
-------
start_date: str
The start date of the dataset
end_date: str
The end date of the dataset
frequency: str
The frequency of the dataset

Raises
------
EmptyFileError
If the dataset has a valid unlimited dimension, but no data
"""

def _todate(t):
return cftime.num2date(t, time_var.units, calendar=time_var.calendar)

time_format = "%Y-%m-%d, %H:%M:%S"
ts = None
te = None
frequency: str | tuple[int | None, str] = FREQUENCY_STATIC
has_time = time_dim in ds

if has_time:
time_var = ds[time_dim]

if len(time_var) == 0:
raise EmptyFileError(
"This file has a valid unlimited dimension, but no data"
)

has_bounds = hasattr(time_var, "bounds") and time_var.bounds in ds.variables
if has_bounds:
bounds_var = ds.variables[time_var.bounds]
ts = _todate(bounds_var[0, 0])
te = _todate(bounds_var[-1, 1])
else:
ts = _todate(time_var[0])
te = _todate(time_var[-1])

if len(time_var) > 1 or has_bounds:
if has_bounds:
t1 = _todate(bounds_var[0, 1])
else:
t1 = _todate(time_var[1])

dt = t1 - ts
# TODO: This is not a very good way to get the frequency
if dt.days >= 365:
years = round(dt.days / 365)
frequency = (years, "yr")
elif dt.days >= 28:
months = round(dt.days / 30)
frequency = (months, "mon")
elif dt.days >= 1:
frequency = (dt.days, "day")
elif dt.seconds >= 3600:
hours = round(dt.seconds / 3600)
frequency = (hours, "hr")
else:
frequency = (None, "subhr")

if filename_frequency:
if filename_frequency != frequency:
msg = (
f"The frequency '{filename_frequency}' determined from filename does not "
f"match the frequency '{frequency}' determined from the file contents."
)
if frequency == FREQUENCY_STATIC:
frequency = filename_frequency
warnings.warn(f"{msg} Using '{frequency}'.")

if has_time & (frequency != FREQUENCY_STATIC):
if not has_bounds:
ts, te = _guess_start_end_dates(ts, te, frequency)

if ts is None:
start_date = "none"
else:
start_date = ts.strftime(time_format)

if te is None:
end_date = "none"
else:
end_date = te.strftime(time_format)

if frequency[0]:
frequency = f"{str(frequency[0])}{frequency[1]}"
else:
frequency = frequency[1]

return start_date, end_date, frequency


class GenericTimeParser:
"""
Generic time parser
Expand Down
7 changes: 4 additions & 3 deletions tests/test_source_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
EmptyFileError,
GenericTimeParser,
GfdlTimeParser,
get_timeinfo,
)


Expand Down Expand Up @@ -126,7 +125,7 @@
),
],
)
def test_get_timeinfo(times, bounds, ffreq, expected):
def test_genericparser_get_timeinfo(times, bounds, ffreq, expected):
if bounds:
time = (times[0] + times[1]) / 2
ds = xr.Dataset(
Expand All @@ -147,7 +146,9 @@ def test_get_timeinfo(times, bounds, ffreq, expected):
units="days since 1900-01-01 00:00:00", calendar="GREGORIAN"
)

assert get_timeinfo(ds, filename_frequency=ffreq, time_dim="time") == expected
assert (
GenericTimeParser(ds, filename_frequency=ffreq, time_dim="time")() == expected
)


@pytest.mark.parametrize(
Expand Down
Loading