"""Lilio's main Calendar module."""
import copy
import re
import warnings
from os import linesep
from typing import Literal
from typing import Union
import pandas as pd
import xarray as xr
from pandas.tseries.offsets import DateOffset
from lilio import _plot
from lilio import utils
_MappingYears = tuple[Literal["years"], int, int]
_MappingData = tuple[Literal["data"], pd.Timestamp, pd.Timestamp]
[docs]
class Interval:
"""Basic construction element of calendar for defining precursors and targets."""
def __init__(
self,
role: Literal["target", "precursor"],
length: Union[str, dict],
gap: Union[str, dict] = "0d",
) -> None:
"""Construct the basic element of the calendar.
The Interval is characterised by its type (either target or precursor), its
length and the gap between it and the previous interval of its type (or the
anchor date, if the interval is the first target/first precursor).
Args:
role: The type of interval. Either "target" or "precursor".
length: The length of the interval. This can either be a pandas-like
frequency string (e.g. "10d", "2W", or "3M"), or a pandas.DateOffset
compatible dictionary such as {days=10}, {weeks=2}, or
{months=1, weeks=2}.
gap: The gap between the previous interval and this interval. Valid inputs
are the same as the length keyword argument. Defaults to "0d".
Example:
>>> from lilio import Interval
>>> iv = Interval("target", length="7d")
>>> iv
Interval(role='target', length='7d', gap='0d')
You can modify the interval's properties in-place:
>>> iv.gap = "1W"
>>> iv
Interval(role='target', length='7d', gap='1W')
"""
self.length = length
self.gap = gap
self._role = role
self._target = role == "target"
self._gap_dateoffset: pd.DateOffset
self._length_dateoffset: pd.DateOffset
# TO DO: support lead_time
# self.lead_time = lead_time
@property
[docs]
def is_target(self):
"""Return whether this Intervals is a target interval."""
return self._target
@property
[docs]
def role(self):
"""Return the type of interval."""
return self._role
@property
[docs]
def length(self):
"""Return the length of the interval, as a pandas.DateOffset."""
return self._length
@length.setter
def length(self, value: Union[str, dict]):
self._length = value
if isinstance(value, str):
self._length_dateoffset = DateOffset(
**utils.parse_freqstr_to_dateoffset(value)
)
else:
self._length_dateoffset = DateOffset(**value)
@property
[docs]
def length_dateoffset(self):
"""Return the length property as a dateoffset."""
return self._length_dateoffset
@property
[docs]
def gap(self):
"""Returns the gap of the interval, as a pandas.DateOffset."""
return self._gap
@gap.setter
def gap(self, value: Union[str, dict]):
self._gap = value
if isinstance(value, str):
self._gap_dateoffset = DateOffset(
**utils.parse_freqstr_to_dateoffset(value)
)
else:
self._gap_dateoffset = DateOffset(**value)
@property
[docs]
def gap_dateoffset(self):
"""Get the gap property as a dateoffset."""
return self._gap_dateoffset
def __repr__(self):
"""Return a string representation of the Interval class."""
props = [
("role", self.role),
("length", self.length),
("gap", self.gap),
]
propstr = ", ".join([f"{k}={repr(v)}" for k, v in props])
return f"{self.__class__.__name__}({propstr})"
[docs]
class Calendar:
"""Build a calendar from scratch with basic construction elements."""
def __init__(
self,
anchor: str,
allow_overlap: bool = False,
mapping: Union[
None,
_MappingYears,
_MappingData,
] = None,
intervals: Union[None, list[Interval]] = None,
):
"""Instantiate a basic container for building calendar using basic blocks.
This is a highly flexible calendar which allows the user to build their own
calendar with the basic building blocks of target and precursor periods.
Users have the freedom to create calendar with customized intervals, gap
between intervals, and even overlapped intervals. They need to manage the
calendar themselves.
Some shorthand calendars, such as a `daily_calendar`, `weekly_calendar` and
`monthly_calendar` are available in lilio.calendar_shorthands. These can be used
to easily construct basic calendars with only a few parameters, but do not have
the flexibility that this calendar builder module provides.
Args:
anchor: String denoting the anchor date. The following inputs are valid:
- "MM-DD" for a month and day. E.g. "12-31".
- "MM" for only a month, e.g. "4" for March.
- English names and abbreviations of months. E.g. "December" or
"jan".
- "Www" for a week number, e.g. "W05" for the fifth week of the
year.
- "Www-D" for a week number plus day of week. E.g. "W01-4" for the
first thursday of the year.
allow_overlap: If overlapping intervals between years is allowed or not.
Default behaviour is False, which means that anchor years will be
skipped to avoid data being shared between anchor years.
mapping: Calendar mapping. Input in the form: ("years", 2000, 2020) or
("data", pd.Timestamp("2000-01-01"), pd.Timestamp("2020-01-01")). The
calendar mapping is usually set with the `map_years` or `map_to_data`
methods.
intervals: A list of Interval objects that should be appended to the
calendar when it is initialized.
Example:
Instantiate a custom calendar and appending target/precursor periods.
>>> import lilio
>>> calendar = lilio.Calendar(anchor="12-31")
>>> calendar # doctest: +NORMALIZE_WHITESPACE
Calendar(
anchor='12-31',
allow_overlap=False,
mapping=None,
intervals=None
)
"""
self._anchor, self._anchor_fmt = self._parse_anchor(anchor)
self._allow_overlap = allow_overlap
self.targets: list[Interval] = []
self.precursors: list[Interval] = []
self._first_year: Union[None, int] = None
self._last_year: Union[None, int] = None
if intervals is not None:
# pylint: disable=expression-not-assigned
[self._append(iv) for iv in intervals]
self._mapping: Union[None, Literal["years", "data"]]
self._set_mapping(mapping)
@property
[docs]
def n_targets(self) -> int:
"""Return the number of targets."""
return len(self.targets)
@property
[docs]
def n_precursors(self) -> int:
"""Return the number of precursors."""
return len(self.precursors)
@property
[docs]
def anchor(self):
"""Return the anchor."""
return self._anchor
@anchor.setter
def anchor(self, value):
self._anchor, self._anchor_fmt = self._parse_anchor(value)
@property
[docs]
def allow_overlap(self):
"""Returns the allow_overlap: if overlapping intervals are allowed or not."""
return self._allow_overlap
@allow_overlap.setter
def allow_overlap(self, value: bool):
if isinstance(value, bool):
self._allow_overlap = value
else:
raise ValueError(
f"allow_overlap should be either True or False, not {value}"
f"of type {type(value)}"
)
@property
[docs]
def mapping(self) -> Union[None, Literal["years", "data"]]:
"""Return the mapping of the calendar. Either None, "years", or "data"."""
return self._mapping
[docs]
def add_intervals(
self,
role: Literal["target", "precursor"],
length: str,
gap: str = "0d",
n: int = 1,
) -> None:
"""Add one or more intervals to the calendar.
The interval can be a target or a precursor, and can be defined by its length,
a possible gap between this interval and the preceding interval.
Args:
role: Either a 'target' or 'precursor' interval(s).
length: The length of the interval(s), in a format of '5d' for five days,
'2W' for two weeks, or '1M' for one month.
gap: The gap between this interval and the preceding target/precursor
interval. Same format as the length argument.
n: The number of intervals which should be added to the calendar. Defaults
to 1.
"""
if not isinstance(n, int):
raise ValueError(
"Please input an 'int' type for the 'n' argument." f" Not a {type(n)}."
)
if n <= 0:
raise ValueError(
"The number of intervals 'n' has to be 1 or greater, " f"not '{n}'."
)
if role in ["target", "precursor"]:
for _ in range(n):
self._append(Interval(role, length, gap))
else:
raise ValueError(
f"Type '{role}' is not a valid interval type. Please "
"choose between 'target' and 'precursor'"
)
def _get_anchor(self, year: int) -> pd.Timestamp:
"""Generate an anchor timestamp for your specific calendar.
The method should return the exact timestamp of the end of the anchor_year's
0 interval, e.g., for the AdventCalendar:
pd.Timestamp(year, self.month, self.day)
Args:
year (int): anchor year for which the anchor timestamp should be generated
Returns:
pd.Timestamp: Timestamp at the end of the anchor_years interval 0.
"""
return pd.to_datetime(
f"{year}-" + self._anchor, format="%Y-" + self._anchor_fmt
)
def _parse_anchor(self, anchor_str: str) -> tuple[str, str]:
"""Parse the user-input anchor.
Args:
anchor_str: Anchor string in the right formatting.
Returns:
Datetime formatter to parse the anchor into a date.
"""
# non string check
if not isinstance(anchor_str, str):
raise ValueError("Anchor input must be a string with expected format.")
# format match
if re.fullmatch("\\d{1,2}-\\d{1,2}", anchor_str):
utils.check_month_day(*[int(x) for x in anchor_str.split("-")])
fmt = "%m-%d"
elif re.fullmatch("\\d{1,2}", anchor_str):
utils.check_month_day(int(anchor_str))
fmt = "%m"
elif re.fullmatch("W\\d{1,2}-\\d", anchor_str):
utils.check_week_day(*[int(x) for x in anchor_str[1:].split("-")])
fmt = "W%W-%w"
elif re.fullmatch("W\\d{1,2}", anchor_str):
utils.check_week_day(int(anchor_str[1:]))
fmt = "W%W-%w"
anchor_str += "-1"
elif anchor_str.lower() in utils.get_month_names():
anchor_str = str(utils.get_month_names()[anchor_str.lower()])
fmt = "%m"
else:
raise ValueError(
f"Anchor input '{anchor_str}' does not match expected format."
)
return anchor_str, fmt
def _append(self, interval):
"""Append target/precursor periods to the calendar."""
if interval.is_target:
self.targets.append(interval)
else:
self.precursors.append(interval)
def _map_year(self, year: int) -> pd.Series:
"""Return a concrete IntervalIndex for the given year.
Since our calendars are used to study periodic events, they are first
instantiated without specific year(s). This method adds a specific year
to the calendar and returns an intervalindex, mapping the
Calendar to the given year.
Intended for internal use, in conjunction with map_years or map_to_data.
Args:
year: The year for which the Calendar will be realized
Returns:
Pandas Series filled with Intervals of the calendar's frequency, counting
backwards from the calendar's achor.
"""
intervals_target = self._concatenate_periods(year, self.targets, True)
intervals_precursor = self._concatenate_periods(year, self.precursors, False)
year_intervals = intervals_precursor[::-1] + intervals_target
# turn the list of intervals into pandas series
year_intervals = pd.Series(year_intervals[::-1], name=year)
year_intervals.index.name = "i_interval"
return year_intervals
def _concatenate_periods(self, year, list_periods, is_target):
# generate intervals based on the building blocks
intervals = []
if is_target:
# build from left to right
left_date = self._get_anchor(year)
# loop through all the building blocks to
for block in list_periods:
left_date += block.gap_dateoffset
right_date = left_date + block.length_dateoffset
intervals.append(pd.Interval(left_date, right_date, closed="left"))
# update left date
left_date = right_date
else:
# build from right to left
right_date = self._get_anchor(year)
# loop through all the building blocks to
for block in list_periods:
right_date -= block.gap_dateoffset
left_date = right_date - block.length_dateoffset
intervals.append(pd.Interval(left_date, right_date, closed="left"))
# update right date
right_date = left_date
return intervals
def _get_skip_nyears(self) -> int:
"""Determine how many years need to be skipped to avoid overlapping data.
Required to prevent information leakage between anchor years.
Returns:
int: Number of years that need to be skipped.
"""
if self._allow_overlap:
return 0
proto_year = 2000
skip_years = 0
start_calendar = self._get_anchor(proto_year)
for prec in self.precursors:
start_calendar -= prec.gap_dateoffset
start_calendar -= prec.length_dateoffset
while True:
prev_end_calendar = self._get_anchor(proto_year - 1 - skip_years)
for target in self.targets:
prev_end_calendar += target.gap_dateoffset
prev_end_calendar += target.length_dateoffset
if prev_end_calendar > start_calendar:
skip_years += 1
else:
break
return skip_years
[docs]
def map_years(self, start: int, end: int):
"""Add a start and end year mapping to the calendar.
If the start and end years are the same, the intervals for only that single
year are returned by calendar.get_intervals().
Args:
start: The first year for which the calendar will be realized
end: The last year for which the calendar will be realized
Returns:
The calendar mapped to the input start and end year.
"""
if start > end:
raise ValueError("The start year cannot be greater than the end year")
self._first_year = start
self._last_year = end
self._mapping = "years"
self._first_timestamp = None
self._last_timestamp = None
return self
[docs]
def map_to_data(
self,
input_data: Union[pd.Series, pd.DataFrame, xr.Dataset, xr.DataArray],
):
"""Map the calendar to input data period.
Stores the first and last intervals of the input data to the calendar, so that
the intervals can cover the data to the greatest extent.
Args:
input_data: Input data for datetime mapping. Its index must be either
pandas.DatetimeIndex, or an xarray `time` coordinate with datetime
data.
Returns:
The calendar mapped to the input data period.
"""
utils.check_timeseries(input_data)
# check the datetime order of input data
if isinstance(input_data, (pd.Series, pd.DataFrame)):
self._first_timestamp = input_data.index.min()
self._last_timestamp = input_data.index.max()
else:
self._first_timestamp = pd.Timestamp(input_data.time.min().values)
self._last_timestamp = pd.Timestamp(input_data.time.max().values)
self._mapping = "data"
self._first_year = None
self._last_year = None
return self
def _set_year_range_from_timestamps(self):
min_year = self._first_timestamp.year # type: ignore
max_year = self._last_timestamp.year # type: ignore
# ensure that the input data could always cover the advent calendar
# last date check
while self._map_year(max_year).iloc[0].right > self._last_timestamp:
max_year -= 1
# first date check
while self._map_year(min_year).iloc[-1].right <= self._first_timestamp:
min_year += 1
# map year(s) and generate year realized advent calendar
if max_year >= min_year:
self._first_year = min_year
self._last_year = max_year
else:
raise ValueError(
"The input data could not cover the target advent calendar."
)
return self
def _set_mapping(self, mapping):
if mapping is None:
self._mapping = None
elif mapping[0] == "years":
self.map_years(mapping[1], mapping[2])
elif mapping[0] == "data":
self._mapping = "data"
self._first_timestamp = mapping[1]
self._last_timestamp = mapping[2]
else:
raise ValueError(
"Unknown mapping passed to calendar. Valid options are"
"either 'years' or 'data'."
)
def _rename_intervals(self, intervals: pd.DataFrame) -> pd.DataFrame:
"""Add target labels to the header row of the intervals.
Args:
intervals (pd.Dataframe): Dataframe with intervals.
Returns:
pd.Dataframe: Dataframe with target periods labelled, sorted by their
i_interval value.
"""
# rename precursors
intervals = intervals.rename(
columns={
i: self.n_targets - i - 1
for i in range(self.n_targets, len(intervals.columns))
}
)
# rename targets
intervals = intervals.rename(
columns={i: self.n_targets - i for i in range(self.n_targets)}
)
return intervals.sort_index(axis=1)
[docs]
def get_intervals(self) -> pd.DataFrame:
"""Retrieve updated intervals from the Calendar object."""
if self._mapping is None:
raise ValueError(
"Cannot retrieve intervals without map_years or "
"map_to_data having configured the calendar."
)
if self._mapping == "data":
self._set_year_range_from_timestamps()
year_range = range(
self._last_year, # type: ignore
self._first_year - 1, # type: ignore
-(self._get_skip_nyears() + 1), # type: ignore
)
intervals = pd.concat([self._map_year(year) for year in year_range], axis=1).T
intervals = self._rename_intervals(intervals)
intervals.index.name = "anchor_year"
return intervals.sort_index(axis=0, ascending=False)
[docs]
def show(self) -> pd.DataFrame:
"""Display the intervals the Calendar will generate for the current setup.
Returns:
pd.Dataframe: Dataframe containing the calendar intervals.
"""
df = self.get_intervals()
df = df.astype("str")
for i in range(df.shape[0]):
df.iloc[i] = [el.replace(" 00:00:00", "") for el in df.iloc[i].values]
return df
def __repr__(self) -> str:
"""Return a string representation of the Calendar."""
intervals = self.targets + self.precursors
if len(intervals) == 0:
intervals_str = repr(None)
else:
intervals_str = (
f"[{linesep}\t\t"
+ f",{linesep}\t\t".join([repr(iv) for iv in intervals])
+ f"{linesep}\t]"
)
if self._mapping == "years":
mapping = ("years", self._first_year, self._last_year)
elif self._mapping == "data":
mapping = ("data", self._first_timestamp, self._last_timestamp)
else:
mapping = None
props = [
("anchor", repr(self.anchor)),
("allow_overlap", repr(self.allow_overlap)),
("mapping", repr(mapping)),
("intervals", intervals_str),
]
propstr = f"{linesep}\t" + f",{linesep}\t".join([f"{k}={v}" for k, v in props])
return f"{self.__class__.__name__}({propstr}{linesep})".replace("\t", " ")
[docs]
def visualize( # noqa: PLR0913 (too-many-arguments)
self,
n_years: int = 3,
interactive: bool = False,
relative_dates: bool = False,
show_length: bool = False,
add_legend: bool = True,
ax=None,
**bokeh_kwargs,
) -> None:
"""Plot a visualization of the current calendar setup, to aid in user setup.
Note: The interactive visualization requires the `bokeh` package to be installed
in the active Python environment.
Args:
n_years: Sets the maximum number of anchor years that should be shown. By
default only the most recent 3 are visualized, to ensure that they
fit within the plot.
interactive: If False, matplotlib will be used for the visualization. If
True, bokeh will be used.
relative_dates: Toggles if the intervals should be displayed relative to the
anchor date, or as absolute dates.
show_length: Toggles if the frequency of the intervals should be displayed.
Defaults to False (Matplotlib plotter only).
add_legend: Toggles if a legend should be added to the plot (Matplotlib
only)
ax: Matplotlib axis object to plot the visualization into.
**bokeh_kwargs: Keyword arguments to pass to Bokeh's plotting.figure. See
https://docs.bokeh.org/en/latest/docs/reference/plotting/figure.html
for a list of possible keyword arguments.
"""
calendar = copy.deepcopy(self)
if calendar._mapping is None: # pylint: disable=protected-access
calendar.map_years(2000, 2000)
if not relative_dates:
print(
"Setting relative_dates=True. To see absolute dates, first call "
"calendar.map_years or calendar.map_data"
)
relative_dates = True
add_yticklabels = False
else:
add_yticklabels = True
n_years = max(n_years, 1)
n_years = min(n_years, len(calendar.get_intervals().index))
if interactive:
utils.assert_bokeh_available()
# pylint: disable=import-outside-toplevel
from ._bokeh_plots import bokeh_visualization
if ax is not None:
warnings.warn(
"\n ax is only a valid keyword argument for the non-interactive"
"\n matplotlib backend. Bokeh's figure can be controlled by"
"\n passing Bokeh figure keyword arguments (e.g. width=800).",
UserWarning,
stacklevel=1,
)
bokeh_visualization(
calendar, n_years, relative_dates, add_yticklabels, **bokeh_kwargs
)
else:
if bokeh_kwargs:
warnings.warn(
"\n kwargs for bokeh have been passed to visualize(), but the"
"\n matplotlib backend does not support these. Use the 'ax' kwarg"
"\n instead to control the generated figure.",
UserWarning,
stacklevel=1,
)
_plot.matplotlib_visualization(
calendar,
n_years,
relative_dates,
show_length,
add_legend,
add_yticklabels,
ax=ax,
)
@property
[docs]
def flat(self) -> pd.DataFrame:
"""Returns the flattened intervals."""
return self.get_intervals().stack() # type: ignore