diff --git a/doc/source/reference/offset_frequency.rst b/doc/source/reference/offset_frequency.rst index 9b2753ca02495..fb262af1e63f6 100644 --- a/doc/source/reference/offset_frequency.rst +++ b/doc/source/reference/offset_frequency.rst @@ -25,6 +25,7 @@ Properties DateOffset.nanos DateOffset.normalize DateOffset.rule_code + DateOffset.n Methods ~~~~~~~ @@ -57,6 +58,7 @@ Properties BusinessDay.nanos BusinessDay.normalize BusinessDay.rule_code + BusinessDay.n Methods ~~~~~~~ @@ -90,6 +92,7 @@ Properties BusinessHour.nanos BusinessHour.normalize BusinessHour.rule_code + BusinessHour.n Methods ~~~~~~~ @@ -122,6 +125,7 @@ Properties CustomBusinessDay.nanos CustomBusinessDay.normalize CustomBusinessDay.rule_code + CustomBusinessDay.n Methods ~~~~~~~ @@ -154,6 +158,7 @@ Properties CustomBusinessHour.nanos CustomBusinessHour.normalize CustomBusinessHour.rule_code + CustomBusinessHour.n Methods ~~~~~~~ @@ -186,6 +191,7 @@ Properties MonthOffset.nanos MonthOffset.normalize MonthOffset.rule_code + MonthOffset.n Methods ~~~~~~~ @@ -219,6 +225,7 @@ Properties MonthEnd.nanos MonthEnd.normalize MonthEnd.rule_code + MonthEnd.n Methods ~~~~~~~ @@ -252,6 +259,7 @@ Properties MonthBegin.nanos MonthBegin.normalize MonthBegin.rule_code + MonthBegin.n Methods ~~~~~~~ @@ -285,6 +293,7 @@ Properties BusinessMonthEnd.nanos BusinessMonthEnd.normalize BusinessMonthEnd.rule_code + BusinessMonthEnd.n Methods ~~~~~~~ @@ -318,6 +327,7 @@ Properties BusinessMonthBegin.nanos BusinessMonthBegin.normalize BusinessMonthBegin.rule_code + BusinessMonthBegin.n Methods ~~~~~~~ @@ -352,6 +362,7 @@ Properties CustomBusinessMonthEnd.nanos CustomBusinessMonthEnd.normalize CustomBusinessMonthEnd.rule_code + CustomBusinessMonthEnd.n Methods ~~~~~~~ @@ -385,6 +396,7 @@ Properties CustomBusinessMonthBegin.nanos CustomBusinessMonthBegin.normalize CustomBusinessMonthBegin.rule_code + CustomBusinessMonthBegin.n Methods ~~~~~~~ @@ -417,6 +429,7 @@ Properties SemiMonthOffset.nanos SemiMonthOffset.normalize SemiMonthOffset.rule_code + SemiMonthOffset.n Methods ~~~~~~~ @@ -450,6 +463,7 @@ Properties SemiMonthEnd.nanos SemiMonthEnd.normalize SemiMonthEnd.rule_code + SemiMonthEnd.n Methods ~~~~~~~ @@ -483,6 +497,7 @@ Properties SemiMonthBegin.nanos SemiMonthBegin.normalize SemiMonthBegin.rule_code + SemiMonthBegin.n Methods ~~~~~~~ @@ -516,6 +531,7 @@ Properties Week.nanos Week.normalize Week.rule_code + Week.n Methods ~~~~~~~ @@ -549,6 +565,7 @@ Properties WeekOfMonth.nanos WeekOfMonth.normalize WeekOfMonth.rule_code + WeekOfMonth.n Methods ~~~~~~~ @@ -581,6 +598,7 @@ Properties LastWeekOfMonth.nanos LastWeekOfMonth.normalize LastWeekOfMonth.rule_code + LastWeekOfMonth.n Methods ~~~~~~~ @@ -613,6 +631,7 @@ Properties QuarterOffset.nanos QuarterOffset.normalize QuarterOffset.rule_code + QuarterOffset.n Methods ~~~~~~~ @@ -646,6 +665,7 @@ Properties BQuarterEnd.nanos BQuarterEnd.normalize BQuarterEnd.rule_code + BQuarterEnd.n Methods ~~~~~~~ @@ -679,6 +699,7 @@ Properties BQuarterBegin.nanos BQuarterBegin.normalize BQuarterBegin.rule_code + BQuarterBegin.n Methods ~~~~~~~ @@ -712,6 +733,7 @@ Properties QuarterEnd.nanos QuarterEnd.normalize QuarterEnd.rule_code + QuarterEnd.n Methods ~~~~~~~ @@ -745,6 +767,7 @@ Properties QuarterBegin.nanos QuarterBegin.normalize QuarterBegin.rule_code + QuarterBegin.n Methods ~~~~~~~ @@ -778,6 +801,7 @@ Properties YearOffset.nanos YearOffset.normalize YearOffset.rule_code + YearOffset.n Methods ~~~~~~~ @@ -811,6 +835,7 @@ Properties BYearEnd.nanos BYearEnd.normalize BYearEnd.rule_code + BYearEnd.n Methods ~~~~~~~ @@ -844,6 +869,7 @@ Properties BYearBegin.nanos BYearBegin.normalize BYearBegin.rule_code + BYearBegin.n Methods ~~~~~~~ @@ -877,6 +903,7 @@ Properties YearEnd.nanos YearEnd.normalize YearEnd.rule_code + YearEnd.n Methods ~~~~~~~ @@ -910,6 +937,7 @@ Properties YearBegin.nanos YearBegin.normalize YearBegin.rule_code + YearBegin.n Methods ~~~~~~~ @@ -943,6 +971,7 @@ Properties FY5253.nanos FY5253.normalize FY5253.rule_code + FY5253.n Methods ~~~~~~~ @@ -977,6 +1006,7 @@ Properties FY5253Quarter.nanos FY5253Quarter.normalize FY5253Quarter.rule_code + FY5253Quarter.n Methods ~~~~~~~ @@ -1011,6 +1041,7 @@ Properties Easter.nanos Easter.normalize Easter.rule_code + Easter.n Methods ~~~~~~~ @@ -1317,6 +1348,7 @@ Properties BDay.normalize BDay.offset BDay.rule_code + BDay.n Methods ~~~~~~~ @@ -1353,6 +1385,7 @@ Properties BMonthEnd.nanos BMonthEnd.normalize BMonthEnd.rule_code + BMonthEnd.n Methods ~~~~~~~ @@ -1389,6 +1422,7 @@ Properties BMonthBegin.nanos BMonthBegin.normalize BMonthBegin.rule_code + BMonthBegin.n Methods ~~~~~~~ @@ -1429,6 +1463,7 @@ Properties CBMonthEnd.normalize CBMonthEnd.offset CBMonthEnd.rule_code + CBMonthEnd.n Methods ~~~~~~~ @@ -1469,6 +1504,7 @@ Properties CBMonthBegin.normalize CBMonthBegin.offset CBMonthBegin.rule_code + CBMonthBegin.n Methods ~~~~~~~ @@ -1506,6 +1542,7 @@ Properties CDay.normalize CDay.offset CDay.rule_code + CDay.n Methods ~~~~~~~ diff --git a/pandas/_libs/tslibs/base.pxd b/pandas/_libs/tslibs/base.pxd index d32413c3bad86..0521279025d4f 100644 --- a/pandas/_libs/tslibs/base.pxd +++ b/pandas/_libs/tslibs/base.pxd @@ -8,13 +8,8 @@ cdef class ABCTimestamp(datetime): pass -cdef class ABCTick: - pass - - cdef class ABCPeriod: pass -cdef bint is_tick_object(object obj) cdef bint is_period_object(object obj) diff --git a/pandas/_libs/tslibs/base.pyx b/pandas/_libs/tslibs/base.pyx index 59daba4510b6e..91178fe3933f7 100644 --- a/pandas/_libs/tslibs/base.pyx +++ b/pandas/_libs/tslibs/base.pyx @@ -20,13 +20,5 @@ cdef class ABCPeriod: pass -cdef class ABCTick: - pass - - -cdef bint is_tick_object(object obj): - return isinstance(obj, ABCTick) - - cdef bint is_period_object(object obj): return isinstance(obj, ABCPeriod) diff --git a/pandas/_libs/tslibs/offsets.pxd b/pandas/_libs/tslibs/offsets.pxd index c6afb557ba2ef..e75cd8bdf1baf 100644 --- a/pandas/_libs/tslibs/offsets.pxd +++ b/pandas/_libs/tslibs/offsets.pxd @@ -1,2 +1,3 @@ cdef to_offset(object obj) cdef bint is_offset_object(object obj) +cdef bint is_tick_object(object obj) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index c113897e4fe82..b25558e8572fe 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -21,7 +21,7 @@ cnp.import_array() from pandas._libs.tslibs cimport util from pandas._libs.tslibs.util cimport is_integer_object, is_datetime64_object -from pandas._libs.tslibs.base cimport ABCTick, ABCTimestamp, is_tick_object +from pandas._libs.tslibs.base cimport ABCTimestamp from pandas._libs.tslibs.ccalendar import MONTHS, DAYS, weekday_to_int, int_to_weekday from pandas._libs.tslibs.ccalendar cimport get_days_in_month, dayofweek @@ -35,13 +35,9 @@ from pandas._libs.tslibs.np_datetime cimport ( from pandas._libs.tslibs.timezones cimport utc_pytz as UTC from pandas._libs.tslibs.tzconversion cimport tz_convert_single -from pandas._libs.tslibs.timedeltas import Timedelta -from pandas._libs.tslibs.timestamps import Timestamp - # --------------------------------------------------------------------- # Constants - _offset_to_period_map = { 'WEEKDAY': 'D', 'EOM': 'M', @@ -94,6 +90,10 @@ cdef bint is_offset_object(object obj): return isinstance(obj, _BaseOffset) +cdef bint is_tick_object(object obj): + return isinstance(obj, _Tick) + + cdef to_offset(object obj): """ Wrap pandas.tseries.frequencies.to_offset to keep centralize runtime @@ -157,6 +157,8 @@ def apply_wraps(func): # not play nicely with cython class methods def wrapper(self, other): + from pandas import Timestamp + if other is NaT: return NaT elif isinstance(other, (timedelta, BaseOffset)): @@ -396,7 +398,7 @@ class ApplyTypeError(TypeError): # --------------------------------------------------------------------- # Base Classes -class _BaseOffset: +cdef class _BaseOffset: """ Base class for DateOffset methods that are not overridden by subclasses and will (after pickle errors are resolved) go into a cdef class. @@ -407,16 +409,17 @@ class _BaseOffset: _use_relativedelta = False _adjust_dst = True _deprecations = frozenset(["isAnchored", "onOffset"]) - normalize = False # default for prior pickles + + cdef readonly: + int64_t n + bint normalize + dict _cache def __init__(self, n=1, normalize=False): n = self._validate_n(n) - object.__setattr__(self, "n", n) - object.__setattr__(self, "normalize", normalize) - object.__setattr__(self, "_cache", {}) - - def __setattr__(self, name, value): - raise AttributeError("DateOffset objects are immutable.") + self.n = n + self.normalize = normalize + self._cache = {} def __eq__(self, other: Any) -> bool: if isinstance(other, str): @@ -447,6 +450,8 @@ class _BaseOffset: """ # NB: non-cython subclasses override property with cache_readonly all_paras = self.__dict__.copy() + all_paras["n"] = self.n + all_paras["normalize"] = self.normalize if 'holidays' in all_paras and not all_paras['holidays']: all_paras.pop('holidays') exclude = ['kwds', 'name', 'calendar'] @@ -613,6 +618,7 @@ class _BaseOffset: TimeStamp Rolled timestamp if not on offset, otherwise unchanged timestamp. """ + from pandas import Timestamp dt = Timestamp(dt) if not self.is_on_offset(dt): dt = dt - type(self)(1, normalize=self.normalize, **self.kwds) @@ -627,6 +633,7 @@ class _BaseOffset: TimeStamp Rolled timestamp if not on offset, otherwise unchanged timestamp. """ + from pandas import Timestamp dt = Timestamp(dt) if not self.is_on_offset(dt): dt = dt + type(self)(1, normalize=self.normalize, **self.kwds) @@ -696,9 +703,9 @@ class _BaseOffset: kwds = {key: odict[key] for key in odict if odict[key]} state.update(kwds) - if '_cache' not in state: - state['_cache'] = {} - + self.n = state.pop("n") + self.normalize = state.pop("normalize") + self._cache = state.pop("_cache", {}) self.__dict__.update(state) if 'weekmask' in state and 'holidays' in state: @@ -711,6 +718,8 @@ class _BaseOffset: def __getstate__(self): """Return a pickleable state""" state = self.__dict__.copy() + state["n"] = self.n + state["normalize"] = self.normalize # we don't want to actually pickle the calendar object # as its a np.busyday; we recreate on deserialization @@ -761,7 +770,7 @@ class BaseOffset(_BaseOffset): return (-self).__add__(other) -cdef class _Tick(ABCTick): +cdef class _Tick(_BaseOffset): """ dummy class to mix into tseries.offsets.Tick so that in tslibs.period we can do isinstance checks on _Tick and avoid importing tseries.offsets @@ -770,17 +779,11 @@ cdef class _Tick(ABCTick): # ensure that reversed-ops with numpy scalars return NotImplemented __array_priority__ = 1000 _adjust_dst = False - _inc = Timedelta(microseconds=1000) _prefix = "undefined" _attributes = frozenset(["n", "normalize"]) - cdef readonly: - int64_t n - bint normalize - dict _cache - def __init__(self, n=1, normalize=False): - n = _BaseOffset._validate_n(n) + n = self._validate_n(n) self.n = n self.normalize = False self._cache = {} @@ -791,7 +794,7 @@ cdef class _Tick(ABCTick): ) @property - def delta(self) -> Timedelta: + def delta(self): return self.n * self._inc @property diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 8af467c3b0950..380231129c9b2 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -37,7 +37,7 @@ cdef extern from "src/datetime/np_datetime.h": cimport pandas._libs.tslibs.util as util -from pandas._libs.tslibs.base cimport ABCPeriod, is_period_object, is_tick_object +from pandas._libs.tslibs.base cimport ABCPeriod, is_period_object from pandas._libs.tslibs.timestamps import Timestamp from pandas._libs.tslibs.timezones cimport is_utc, is_tzlocal, get_dst_info @@ -68,7 +68,7 @@ from pandas._libs.tslibs.nattype cimport ( c_NaT as NaT, c_nat_strings as nat_strings, ) -from pandas._libs.tslibs.offsets cimport to_offset +from pandas._libs.tslibs.offsets cimport to_offset, is_tick_object from pandas._libs.tslibs.tzconversion cimport tz_convert_utc_to_tzlocal diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 7a3af169a960e..c336e5f990f9a 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -21,7 +21,7 @@ from pandas._libs.tslibs.util cimport ( is_float_object, is_array ) -from pandas._libs.tslibs.base cimport ABCTimedelta, ABCTimestamp, is_tick_object +from pandas._libs.tslibs.base cimport ABCTimedelta, ABCTimestamp from pandas._libs.tslibs.ccalendar cimport DAY_NANOS @@ -34,6 +34,7 @@ from pandas._libs.tslibs.nattype cimport ( c_NaT as NaT, c_nat_strings as nat_strings, ) +from pandas._libs.tslibs.offsets cimport is_tick_object # ---------------------------------------------------------------------- # Constants diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 4f8b85240c79f..d4a1a52019503 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -25,7 +25,7 @@ from pandas._libs.tslibs.util cimport ( is_timedelta64_object, is_array, ) -from pandas._libs.tslibs.base cimport ABCTimedelta, ABCTimestamp, is_tick_object +from pandas._libs.tslibs.base cimport ABCTimedelta, ABCTimestamp from pandas._libs.tslibs cimport ccalendar @@ -40,7 +40,7 @@ from pandas._libs.tslibs.np_datetime cimport ( cmp_scalar, ) from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime -from pandas._libs.tslibs.offsets cimport to_offset +from pandas._libs.tslibs.offsets cimport to_offset, is_tick_object from pandas._libs.tslibs.timedeltas import Timedelta from pandas._libs.tslibs.timezones cimport ( is_utc, maybe_get_tz, treat_tz_as_pytz, utc_pytz as UTC, diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index cd2ded874c08c..8a2626f9a7e68 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -2,14 +2,16 @@ Support pre-0.12 series pickle compatibility. """ +import contextlib import copy +import io import pickle as pkl from typing import TYPE_CHECKING, Optional import warnings from pandas import Index -from pandas.tseries.offsets import Tick +from pandas.tseries.offsets import DateOffset if TYPE_CHECKING: from pandas import Series, DataFrame @@ -40,7 +42,7 @@ def load_reduce(self): return except TypeError: pass - elif args and issubclass(args[0], Tick): + elif args and issubclass(args[0], DateOffset): # TypeError: object.__new__(Day) is not safe, use Day.__new__() cls = args[0] stack[-1] = cls.__new__(*args) @@ -247,3 +249,32 @@ def load(fh, encoding: Optional[str] = None, is_verbose: bool = False): return up.load() except (ValueError, TypeError): raise + + +def loads( + bytes_object: bytes, + *, + fix_imports: bool = True, + encoding: str = "ASCII", + errors: str = "strict", +): + """ + Analogous to pickle._loads. + """ + fd = io.BytesIO(bytes_object) + return Unpickler( + fd, fix_imports=fix_imports, encoding=encoding, errors=errors + ).load() + + +@contextlib.contextmanager +def patch_pickle(): + """ + Temporarily patch pickle to use our unpickler. + """ + orig_loads = pkl.loads + try: + pkl.loads = loads + yield + finally: + pkl.loads = orig_loads diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 85fcfd107b121..36cd61b6c3adb 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2,7 +2,6 @@ High level interface to PyTables for reading and writing pandas data structures to disk """ - import copy from datetime import date, tzinfo import itertools @@ -19,6 +18,7 @@ from pandas._libs.tslibs import timezones from pandas._typing import ArrayLike, FrameOrSeries, Label from pandas.compat._optional import import_optional_dependency +from pandas.compat.pickle_compat import patch_pickle from pandas.errors import PerformanceWarning from pandas.util._decorators import cache_readonly @@ -729,10 +729,13 @@ def get(self, key: str): object Same type as object stored in file. """ - group = self.get_node(key) - if group is None: - raise KeyError(f"No object named {key} in the file") - return self._read_group(group) + with patch_pickle(): + # GH#31167 Without this patch, pickle doesn't know how to unpickle + # old DateOffset objects now that they are cdef classes. + group = self.get_node(key) + if group is None: + raise KeyError(f"No object named {key} in the file") + return self._read_group(group) def select( self, diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 88f77a8d7f054..364a50be5c291 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -185,8 +185,6 @@ def __add__(date): Timestamp('2017-03-01 09:10:11') """ - _params = cache_readonly(BaseOffset._params.fget) - freqstr = cache_readonly(BaseOffset.freqstr.fget) _attributes = frozenset(["n", "normalize"] + list(liboffsets.relativedelta_kwds)) _adjust_dst = False @@ -295,11 +293,18 @@ def is_on_offset(self, dt): # TODO, see #1395 return True + @cache_readonly + def _params(self): + # TODO: see if we can just write cache_readonly(BaseOffset._params.__get__) + return BaseOffset._params.__get__(self) -class SingleConstructorMixin: - _params = cache_readonly(BaseOffset._params.fget) - freqstr = cache_readonly(BaseOffset.freqstr.fget) + @cache_readonly + def freqstr(self): + # TODO: see if we can just write cache_readonly(BaseOffset.freqstr.__get__) + return BaseOffset.freqstr.__get__(self) + +class SingleConstructorMixin: @classmethod def _from_name(cls, suffix=None): # default _from_name calls cls with no args @@ -307,6 +312,16 @@ def _from_name(cls, suffix=None): raise ValueError(f"Bad freq suffix {suffix}") return cls() + @cache_readonly + def _params(self): + # TODO: see if we can just write cache_readonly(BaseOffset._params.__get__) + return BaseOffset._params.__get__(self) + + @cache_readonly + def freqstr(self): + # TODO: see if we can just write cache_readonly(BaseOffset.freqstr.__get__) + return BaseOffset.freqstr.__get__(self) + class SingleConstructorOffset(SingleConstructorMixin, BaseOffset): pass @@ -320,6 +335,10 @@ class BusinessDay(BusinessMixin, SingleConstructorOffset): _prefix = "B" _attributes = frozenset(["n", "normalize", "offset"]) + def __reduce__(self): + tup = (self.n, self.normalize, self.offset) + return type(self), tup + def _offset_str(self) -> str: def get_str(td): off_str = "" @@ -710,6 +729,12 @@ class CustomBusinessDay(CustomMixin, BusinessDay): ["n", "normalize", "weekmask", "holidays", "calendar", "offset"] ) + def __reduce__(self): + # np.holidaycalendar cant be pickled, so pass None there and + # it will be re-constructed within __init__ + tup = (self.n, self.normalize, self.weekmask, self.holidays, None, self.offset) + return type(self), tup + def __init__( self, n=1, @@ -2104,6 +2129,8 @@ def is_on_offset(self, dt: datetime) -> bool: class Tick(liboffsets._Tick, SingleConstructorOffset): + _inc = Timedelta(microseconds=1000) + def __add__(self, other): if isinstance(other, Tick): if type(self) == type(other):