From 55afda4d0d13b512f83f49587266d069626d81cd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 May 2020 16:09:13 -0700 Subject: [PATCH 01/10] REF: make BaseOffset a cdef class --- pandas/_libs/tslibs/base.pxd | 5 -- pandas/_libs/tslibs/base.pyx | 8 --- pandas/_libs/tslibs/offsets.pxd | 1 + pandas/_libs/tslibs/offsets.pyx | 62 +++++++++++++------ pandas/_libs/tslibs/period.pyx | 4 +- pandas/_libs/tslibs/timedeltas.pyx | 3 +- pandas/_libs/tslibs/timestamps.pyx | 4 +- pandas/compat/pickle_compat.py | 4 +- .../tests/scalar/timestamp/test_arithmetic.py | 2 +- pandas/tseries/offsets.py | 27 ++++---- 10 files changed, 70 insertions(+), 50 deletions(-) diff --git a/pandas/_libs/tslibs/base.pxd b/pandas/_libs/tslibs/base.pxd index d32413c3bad86..0521279025d4f 100644 --- a/pandas/_libs/tslibs/base.pxd +++ b/pandas/_libs/tslibs/base.pxd @@ -8,13 +8,8 @@ cdef class ABCTimestamp(datetime): pass -cdef class ABCTick: - pass - - cdef class ABCPeriod: pass -cdef bint is_tick_object(object obj) cdef bint is_period_object(object obj) diff --git a/pandas/_libs/tslibs/base.pyx b/pandas/_libs/tslibs/base.pyx index 59daba4510b6e..91178fe3933f7 100644 --- a/pandas/_libs/tslibs/base.pyx +++ b/pandas/_libs/tslibs/base.pyx @@ -20,13 +20,5 @@ cdef class ABCPeriod: pass -cdef class ABCTick: - pass - - -cdef bint is_tick_object(object obj): - return isinstance(obj, ABCTick) - - cdef bint is_period_object(object obj): return isinstance(obj, ABCPeriod) diff --git a/pandas/_libs/tslibs/offsets.pxd b/pandas/_libs/tslibs/offsets.pxd index c6afb557ba2ef..e75cd8bdf1baf 100644 --- a/pandas/_libs/tslibs/offsets.pxd +++ b/pandas/_libs/tslibs/offsets.pxd @@ -1,2 +1,3 @@ cdef to_offset(object obj) cdef bint is_offset_object(object obj) +cdef bint is_tick_object(object obj) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 5efb9b3534f14..8618a1fc1d770 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -21,7 +21,7 @@ cnp.import_array() from pandas._libs.tslibs cimport util from pandas._libs.tslibs.util cimport is_integer_object -from pandas._libs.tslibs.base cimport ABCTick, ABCTimestamp, is_tick_object +from pandas._libs.tslibs.base cimport ABCTimestamp from pandas._libs.tslibs.ccalendar import MONTHS, DAYS from pandas._libs.tslibs.ccalendar cimport get_days_in_month, dayofweek @@ -35,12 +35,9 @@ from pandas._libs.tslibs.np_datetime cimport ( from pandas._libs.tslibs.timezones cimport utc_pytz as UTC from pandas._libs.tslibs.tzconversion cimport tz_convert_single -from pandas._libs.tslibs.timestamps import Timestamp - # --------------------------------------------------------------------- # Constants - _offset_to_period_map = { 'WEEKDAY': 'D', 'EOM': 'M', @@ -93,6 +90,10 @@ cdef bint is_offset_object(object obj): return isinstance(obj, _BaseOffset) +cdef bint is_tick_object(object obj): + return isinstance(obj, _Tick) + + cdef to_offset(object obj): """ Wrap pandas.tseries.frequencies.to_offset to keep centralize runtime @@ -156,6 +157,8 @@ def apply_wraps(func): # not play nicely with cython class methods def wrapper(self, other): + # TODO: try to avoid runtime/circular import + from pandas import Timestamp if other is NaT: return NaT elif isinstance(other, (timedelta, BaseOffset)): @@ -395,7 +398,7 @@ class ApplyTypeError(TypeError): # --------------------------------------------------------------------- # Base Classes -class _BaseOffset: +cdef class _BaseOffset: """ Base class for DateOffset methods that are not overridden by subclasses and will (after pickle errors are resolved) go into a cdef class. @@ -406,16 +409,17 @@ class _BaseOffset: _use_relativedelta = False _adjust_dst = True _deprecations = frozenset(["isAnchored", "onOffset"]) - normalize = False # default for prior pickles + + cdef readonly: + int64_t n + bint normalize + dict _cache def __init__(self, n=1, normalize=False): n = self._validate_n(n) - object.__setattr__(self, "n", n) - object.__setattr__(self, "normalize", normalize) - object.__setattr__(self, "_cache", {}) - - def __setattr__(self, name, value): - raise AttributeError("DateOffset objects are immutable.") + self.n = n + self.normalize = normalize + self._cache = {} def __eq__(self, other: Any) -> bool: if isinstance(other, str): @@ -446,6 +450,8 @@ class _BaseOffset: """ # NB: non-cython subclasses override property with cache_readonly all_paras = self.__dict__.copy() + all_paras["n"] = self.n + all_paras["normalize"] = self.normalize if 'holidays' in all_paras and not all_paras['holidays']: all_paras.pop('holidays') exclude = ['kwds', 'name', 'calendar'] @@ -606,6 +612,8 @@ class _BaseOffset: TimeStamp Rolled timestamp if not on offset, otherwise unchanged timestamp. """ + # TODO: try to avoid runtime/circular import + from pandas import Timestamp dt = Timestamp(dt) if not self.is_on_offset(dt): dt = dt - type(self)(1, normalize=self.normalize, **self.kwds) @@ -620,6 +628,8 @@ class _BaseOffset: TimeStamp Rolled timestamp if not on offset, otherwise unchanged timestamp. """ + # TODO: try to avoid runtime/circular import + from pandas import Timestamp dt = Timestamp(dt) if not self.is_on_offset(dt): dt = dt + type(self)(1, normalize=self.normalize, **self.kwds) @@ -643,7 +653,8 @@ class _BaseOffset: # ------------------------------------------------------------------ - def _validate_n(self, n): + @staticmethod + def _validate_n(n): """ Require that `n` be an integer. @@ -686,9 +697,9 @@ class _BaseOffset: kwds = {key: odict[key] for key in odict if odict[key]} state.update(kwds) - if '_cache' not in state: - state['_cache'] = {} - + self.n = state.pop("n") + self.normalize = state.pop("normalize") + self._cache = state.pop("_cache", {}) self.__dict__.update(state) if 'weekmask' in state and 'holidays' in state: @@ -701,6 +712,8 @@ class _BaseOffset: def __getstate__(self): """Return a pickleable state""" state = self.__dict__.copy() + state["n"] = self.n + state["normalize"] = self.normalize # we don't want to actually pickle the calendar object # as its a np.busyday; we recreate on deserialization @@ -751,7 +764,7 @@ class BaseOffset(_BaseOffset): return (-self).__add__(other) -cdef class _Tick(ABCTick): +cdef class _Tick(_BaseOffset): """ dummy class to mix into tseries.offsets.Tick so that in tslibs.period we can do isinstance checks on _Tick and avoid importing tseries.offsets @@ -761,6 +774,18 @@ cdef class _Tick(ABCTick): __array_priority__ = 1000 _adjust_dst = False + def __init__(self, n=1, normalize=False): + n = _BaseOffset._validate_n(n) + self.n = n + self.normalize = normalize + self._cache = {} + + if normalize: + # GH#21427 + raise ValueError( + "Tick offset with `normalize=True` are not allowed." + ) + def is_on_offset(self, dt) -> bool: return True @@ -779,7 +804,8 @@ cdef class _Tick(ABCTick): return (type(self), (self.n,)) def __setstate__(self, state): - object.__setattr__(self, "n", state["n"]) + self.n = state["n"] + self.normalize = False class BusinessMixin: diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index c5be5b1d96469..a6aea912eac39 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -37,7 +37,7 @@ cdef extern from "src/datetime/np_datetime.h": cimport pandas._libs.tslibs.util as util -from pandas._libs.tslibs.base cimport ABCPeriod, is_period_object, is_tick_object +from pandas._libs.tslibs.base cimport ABCPeriod, is_period_object from pandas._libs.tslibs.timestamps import Timestamp from pandas._libs.tslibs.timezones cimport is_utc, is_tzlocal, get_dst_info @@ -68,7 +68,7 @@ from pandas._libs.tslibs.nattype cimport ( c_NaT as NaT, c_nat_strings as nat_strings, ) -from pandas._libs.tslibs.offsets cimport to_offset +from pandas._libs.tslibs.offsets cimport to_offset, is_tick_object from pandas._libs.tslibs.tzconversion cimport tz_convert_utc_to_tzlocal diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 03419a6267983..37c6e836c1140 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -21,7 +21,7 @@ from pandas._libs.tslibs.util cimport ( is_float_object, is_array ) -from pandas._libs.tslibs.base cimport ABCTimedelta, ABCTimestamp, is_tick_object +from pandas._libs.tslibs.base cimport ABCTimedelta, ABCTimestamp from pandas._libs.tslibs.ccalendar cimport DAY_NANOS @@ -34,6 +34,7 @@ from pandas._libs.tslibs.nattype cimport ( c_NaT as NaT, c_nat_strings as nat_strings, ) +from pandas._libs.tslibs.offsets cimport is_tick_object # ---------------------------------------------------------------------- # Constants diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index af98057378d7b..accb48027e8e0 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -25,7 +25,7 @@ from pandas._libs.tslibs.util cimport ( is_timedelta64_object, is_array, ) -from pandas._libs.tslibs.base cimport ABCTimestamp, is_tick_object +from pandas._libs.tslibs.base cimport ABCTimestamp from pandas._libs.tslibs cimport ccalendar @@ -40,7 +40,7 @@ from pandas._libs.tslibs.np_datetime cimport ( cmp_scalar, ) from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime -from pandas._libs.tslibs.offsets cimport to_offset +from pandas._libs.tslibs.offsets cimport to_offset, is_tick_object from pandas._libs.tslibs.timedeltas import Timedelta from pandas._libs.tslibs.timezones cimport ( is_utc, maybe_get_tz, treat_tz_as_pytz, utc_pytz as UTC, diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index cd2ded874c08c..989fe1cbb0f99 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -9,7 +9,7 @@ from pandas import Index -from pandas.tseries.offsets import Tick +from pandas.tseries.offsets import DateOffset if TYPE_CHECKING: from pandas import Series, DataFrame @@ -40,7 +40,7 @@ def load_reduce(self): return except TypeError: pass - elif args and issubclass(args[0], Tick): + elif args and issubclass(args[0], DateOffset): # TypeError: object.__new__(Day) is not safe, use Day.__new__() cls = args[0] stack[-1] = cls.__new__(*args) diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py index b038ee1aee106..ed0045bcab989 100644 --- a/pandas/tests/scalar/timestamp/test_arithmetic.py +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -52,7 +52,7 @@ def test_overflow_offset_raises(self): # used to crash, so check for proper overflow exception stamp = Timestamp("2000/1/1") - offset_overflow = to_offset("D") * 100 ** 25 + offset_overflow = to_offset("D") * 100 ** 5 with pytest.raises(OverflowError, match=msg): stamp + offset_overflow diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 4912dc0eb349e..9ee924dc419be 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -185,8 +185,9 @@ def __add__(date): Timestamp('2017-03-01 09:10:11') """ - _params = cache_readonly(BaseOffset._params.fget) - freqstr = cache_readonly(BaseOffset.freqstr.fget) + # FIXME: restore these as cache_readonly + # _params = cache_readonly(BaseOffset._params.__get__) + # freqstr = cache_readonly(BaseOffset.freqstr.__get__) _attributes = frozenset(["n", "normalize"] + list(liboffsets.relativedelta_kwds)) _adjust_dst = False @@ -297,8 +298,9 @@ def is_on_offset(self, dt): class SingleConstructorOffset(BaseOffset): - _params = cache_readonly(BaseOffset._params.fget) - freqstr = cache_readonly(BaseOffset.freqstr.fget) + # FIXME: restore these as cache_readonly + # _params = cache_readonly(BaseOffset._params.__get__) + # freqstr = cache_readonly(BaseOffset.freqstr.__get__) @classmethod def _from_name(cls, suffix=None): @@ -316,6 +318,10 @@ class BusinessDay(BusinessMixin, SingleConstructorOffset): _prefix = "B" _attributes = frozenset(["n", "normalize", "offset"]) + def __reduce__(self): + tup = (self.n, self.normalize, self.offset) + return type(self), tup + def __init__(self, n=1, normalize=False, offset=timedelta(0)): BaseOffset.__init__(self, n, normalize) object.__setattr__(self, "_offset", offset) @@ -718,6 +724,12 @@ class CustomBusinessDay(CustomMixin, BusinessDay): ["n", "normalize", "weekmask", "holidays", "calendar", "offset"] ) + def __reduce__(self): + # np.holidaycalendar cant be pickled, so pass None there and + # it will be re-constructed within __init__ + tup = (self.n, self.normalize, self.weekmask, self.holidays, None, self.offset) + return type(self), tup + def __init__( self, n=1, @@ -2151,13 +2163,6 @@ class Tick(liboffsets._Tick, SingleConstructorOffset): _prefix = "undefined" _attributes = frozenset(["n", "normalize"]) - def __init__(self, n=1, normalize=False): - BaseOffset.__init__(self, n, normalize) - if normalize: - raise ValueError( - "Tick offset with `normalize=True` are not allowed." - ) # GH#21427 - __gt__ = _tick_comp(operator.gt) __ge__ = _tick_comp(operator.ge) __lt__ = _tick_comp(operator.lt) From 24685a50f770576bd37350aad069331985967778 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 May 2020 20:08:46 -0700 Subject: [PATCH 02/10] Pickle patch --- pandas/compat/pickle_compat.py | 23 +++++++++++++++++++++++ pandas/io/pytables.py | 11 ++++++----- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 989fe1cbb0f99..0e82dbdee0379 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -2,7 +2,9 @@ Support pre-0.12 series pickle compatibility. """ +import contextlib import copy +import io import pickle as pkl from typing import TYPE_CHECKING, Optional import warnings @@ -247,3 +249,24 @@ def load(fh, encoding: Optional[str] = None, is_verbose: bool = False): return up.load() except (ValueError, TypeError): raise + + +def loads(obj: bytes): + """ + Analogous to pickle._loads. + """ + fd = io.BytesIO(obj) + return Unpickler(fd).load() + + +@contextlib.contextmanager +def patch_pickle(): + """ + Temporarily patch pickle to use our unpickler. + """ + orig_loads = pkl.loads + try: + pkl.loads = loads + yield + finally: + pkl.loads = orig_loads diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 345402e619ff2..a40ab3f7c63d4 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2,7 +2,6 @@ High level interface to PyTables for reading and writing pandas data structures to disk """ - import copy from datetime import date, tzinfo import itertools @@ -19,6 +18,7 @@ from pandas._libs.tslibs import timezones from pandas._typing import ArrayLike, FrameOrSeries, Label from pandas.compat._optional import import_optional_dependency +from pandas.compat.pickle_compat import patch_pickle from pandas.errors import PerformanceWarning from pandas.util._decorators import cache_readonly @@ -729,10 +729,11 @@ def get(self, key: str): object Same type as object stored in file. """ - group = self.get_node(key) - if group is None: - raise KeyError(f"No object named {key} in the file") - return self._read_group(group) + with patch_pickle(): + group = self.get_node(key) + if group is None: + raise KeyError(f"No object named {key} in the file") + return self._read_group(group) def select( self, From b5ce30f36df7b1fb9315ab168c596e7956626e0b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 May 2020 20:18:05 -0700 Subject: [PATCH 03/10] update doc --- doc/source/reference/offset_frequency.rst | 45 +++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/doc/source/reference/offset_frequency.rst b/doc/source/reference/offset_frequency.rst index 6240181708f97..fb262af1e63f6 100644 --- a/doc/source/reference/offset_frequency.rst +++ b/doc/source/reference/offset_frequency.rst @@ -25,6 +25,7 @@ Properties DateOffset.nanos DateOffset.normalize DateOffset.rule_code + DateOffset.n Methods ~~~~~~~ @@ -57,6 +58,7 @@ Properties BusinessDay.nanos BusinessDay.normalize BusinessDay.rule_code + BusinessDay.n Methods ~~~~~~~ @@ -90,6 +92,7 @@ Properties BusinessHour.nanos BusinessHour.normalize BusinessHour.rule_code + BusinessHour.n Methods ~~~~~~~ @@ -122,6 +125,7 @@ Properties CustomBusinessDay.nanos CustomBusinessDay.normalize CustomBusinessDay.rule_code + CustomBusinessDay.n Methods ~~~~~~~ @@ -154,6 +158,7 @@ Properties CustomBusinessHour.nanos CustomBusinessHour.normalize CustomBusinessHour.rule_code + CustomBusinessHour.n Methods ~~~~~~~ @@ -186,6 +191,7 @@ Properties MonthOffset.nanos MonthOffset.normalize MonthOffset.rule_code + MonthOffset.n Methods ~~~~~~~ @@ -219,6 +225,7 @@ Properties MonthEnd.nanos MonthEnd.normalize MonthEnd.rule_code + MonthEnd.n Methods ~~~~~~~ @@ -252,6 +259,7 @@ Properties MonthBegin.nanos MonthBegin.normalize MonthBegin.rule_code + MonthBegin.n Methods ~~~~~~~ @@ -285,6 +293,7 @@ Properties BusinessMonthEnd.nanos BusinessMonthEnd.normalize BusinessMonthEnd.rule_code + BusinessMonthEnd.n Methods ~~~~~~~ @@ -318,6 +327,7 @@ Properties BusinessMonthBegin.nanos BusinessMonthBegin.normalize BusinessMonthBegin.rule_code + BusinessMonthBegin.n Methods ~~~~~~~ @@ -352,6 +362,7 @@ Properties CustomBusinessMonthEnd.nanos CustomBusinessMonthEnd.normalize CustomBusinessMonthEnd.rule_code + CustomBusinessMonthEnd.n Methods ~~~~~~~ @@ -385,6 +396,7 @@ Properties CustomBusinessMonthBegin.nanos CustomBusinessMonthBegin.normalize CustomBusinessMonthBegin.rule_code + CustomBusinessMonthBegin.n Methods ~~~~~~~ @@ -417,6 +429,7 @@ Properties SemiMonthOffset.nanos SemiMonthOffset.normalize SemiMonthOffset.rule_code + SemiMonthOffset.n Methods ~~~~~~~ @@ -450,6 +463,7 @@ Properties SemiMonthEnd.nanos SemiMonthEnd.normalize SemiMonthEnd.rule_code + SemiMonthEnd.n Methods ~~~~~~~ @@ -483,6 +497,7 @@ Properties SemiMonthBegin.nanos SemiMonthBegin.normalize SemiMonthBegin.rule_code + SemiMonthBegin.n Methods ~~~~~~~ @@ -516,6 +531,7 @@ Properties Week.nanos Week.normalize Week.rule_code + Week.n Methods ~~~~~~~ @@ -549,6 +565,7 @@ Properties WeekOfMonth.nanos WeekOfMonth.normalize WeekOfMonth.rule_code + WeekOfMonth.n Methods ~~~~~~~ @@ -581,6 +598,7 @@ Properties LastWeekOfMonth.nanos LastWeekOfMonth.normalize LastWeekOfMonth.rule_code + LastWeekOfMonth.n Methods ~~~~~~~ @@ -613,6 +631,7 @@ Properties QuarterOffset.nanos QuarterOffset.normalize QuarterOffset.rule_code + QuarterOffset.n Methods ~~~~~~~ @@ -646,6 +665,7 @@ Properties BQuarterEnd.nanos BQuarterEnd.normalize BQuarterEnd.rule_code + BQuarterEnd.n Methods ~~~~~~~ @@ -679,6 +699,7 @@ Properties BQuarterBegin.nanos BQuarterBegin.normalize BQuarterBegin.rule_code + BQuarterBegin.n Methods ~~~~~~~ @@ -712,6 +733,7 @@ Properties QuarterEnd.nanos QuarterEnd.normalize QuarterEnd.rule_code + QuarterEnd.n Methods ~~~~~~~ @@ -745,6 +767,7 @@ Properties QuarterBegin.nanos QuarterBegin.normalize QuarterBegin.rule_code + QuarterBegin.n Methods ~~~~~~~ @@ -778,6 +801,7 @@ Properties YearOffset.nanos YearOffset.normalize YearOffset.rule_code + YearOffset.n Methods ~~~~~~~ @@ -811,6 +835,7 @@ Properties BYearEnd.nanos BYearEnd.normalize BYearEnd.rule_code + BYearEnd.n Methods ~~~~~~~ @@ -844,6 +869,7 @@ Properties BYearBegin.nanos BYearBegin.normalize BYearBegin.rule_code + BYearBegin.n Methods ~~~~~~~ @@ -877,6 +903,7 @@ Properties YearEnd.nanos YearEnd.normalize YearEnd.rule_code + YearEnd.n Methods ~~~~~~~ @@ -910,6 +937,7 @@ Properties YearBegin.nanos YearBegin.normalize YearBegin.rule_code + YearBegin.n Methods ~~~~~~~ @@ -943,6 +971,7 @@ Properties FY5253.nanos FY5253.normalize FY5253.rule_code + FY5253.n Methods ~~~~~~~ @@ -977,6 +1006,7 @@ Properties FY5253Quarter.nanos FY5253Quarter.normalize FY5253Quarter.rule_code + FY5253Quarter.n Methods ~~~~~~~ @@ -1011,6 +1041,7 @@ Properties Easter.nanos Easter.normalize Easter.rule_code + Easter.n Methods ~~~~~~~ @@ -1044,6 +1075,7 @@ Properties Tick.nanos Tick.normalize Tick.rule_code + Tick.n Methods ~~~~~~~ @@ -1077,6 +1109,7 @@ Properties Day.nanos Day.normalize Day.rule_code + Day.n Methods ~~~~~~~ @@ -1110,6 +1143,7 @@ Properties Hour.nanos Hour.normalize Hour.rule_code + Hour.n Methods ~~~~~~~ @@ -1143,6 +1177,7 @@ Properties Minute.nanos Minute.normalize Minute.rule_code + Minute.n Methods ~~~~~~~ @@ -1176,6 +1211,7 @@ Properties Second.nanos Second.normalize Second.rule_code + Second.n Methods ~~~~~~~ @@ -1209,6 +1245,7 @@ Properties Milli.nanos Milli.normalize Milli.rule_code + Milli.n Methods ~~~~~~~ @@ -1242,6 +1279,7 @@ Properties Micro.nanos Micro.normalize Micro.rule_code + Micro.n Methods ~~~~~~~ @@ -1275,6 +1313,7 @@ Properties Nano.nanos Nano.normalize Nano.rule_code + Nano.n Methods ~~~~~~~ @@ -1309,6 +1348,7 @@ Properties BDay.normalize BDay.offset BDay.rule_code + BDay.n Methods ~~~~~~~ @@ -1345,6 +1385,7 @@ Properties BMonthEnd.nanos BMonthEnd.normalize BMonthEnd.rule_code + BMonthEnd.n Methods ~~~~~~~ @@ -1381,6 +1422,7 @@ Properties BMonthBegin.nanos BMonthBegin.normalize BMonthBegin.rule_code + BMonthBegin.n Methods ~~~~~~~ @@ -1421,6 +1463,7 @@ Properties CBMonthEnd.normalize CBMonthEnd.offset CBMonthEnd.rule_code + CBMonthEnd.n Methods ~~~~~~~ @@ -1461,6 +1504,7 @@ Properties CBMonthBegin.normalize CBMonthBegin.offset CBMonthBegin.rule_code + CBMonthBegin.n Methods ~~~~~~~ @@ -1498,6 +1542,7 @@ Properties CDay.normalize CDay.offset CDay.rule_code + CDay.n Methods ~~~~~~~ From fb8bc5c62f027762b95fb63aa1cc4bad5d8c7ca1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 May 2020 21:49:30 -0700 Subject: [PATCH 04/10] mypy fixup --- pandas/compat/pickle_compat.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 0e82dbdee0379..8a2626f9a7e68 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -251,12 +251,20 @@ def load(fh, encoding: Optional[str] = None, is_verbose: bool = False): raise -def loads(obj: bytes): +def loads( + bytes_object: bytes, + *, + fix_imports: bool = True, + encoding: str = "ASCII", + errors: str = "strict", +): """ Analogous to pickle._loads. """ - fd = io.BytesIO(obj) - return Unpickler(fd).load() + fd = io.BytesIO(bytes_object) + return Unpickler( + fd, fix_imports=fix_imports, encoding=encoding, errors=errors + ).load() @contextlib.contextmanager From f8d10d72227de2d04dad02f94f2811433b0b2392 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 14 May 2020 10:44:57 -0700 Subject: [PATCH 05/10] re-implement as_timestamp --- pandas/_libs/tslibs/offsets.pyx | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 0a42272901155..dddc6dc3e8041 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -111,6 +111,14 @@ def as_datetime(obj: datetime) -> datetime: return obj +cdef ABCTimestamp as_timestamp(obj): + if isinstance(obj, ABCTimestamp): + return obj + + from pandas import Timestamp + return Timestamp(obj) + + cpdef bint is_normalized(datetime dt): if dt.hour != 0 or dt.minute != 0 or dt.second != 0 or dt.microsecond != 0: # Regardless of whether dt is datetime vs Timestamp @@ -157,15 +165,13 @@ def apply_wraps(func): # not play nicely with cython class methods def wrapper(self, other): - # TODO: try to avoid runtime/circular import - from pandas import Timestamp if other is NaT: return NaT elif isinstance(other, (timedelta, BaseOffset)): # timedelta path return func(self, other) elif isinstance(other, (np.datetime64, datetime, date)): - other = Timestamp(other) + other = as_timestamp(other) else: # This will end up returning NotImplemented back in __add__ raise ApplyTypeError @@ -178,7 +184,7 @@ def apply_wraps(func): result = func(self, other) - result = Timestamp(result) + result = as_timestamp(result) if self._adjust_dst: result = result.tz_localize(tz) @@ -193,7 +199,7 @@ def apply_wraps(func): value = result.tz_localize(None).value else: value = result.value - result = Timestamp(value + nano) + result = as_timestamp(value + nano) if tz is not None and result.tzinfo is None: result = result.tz_localize(tz) @@ -618,9 +624,7 @@ cdef class _BaseOffset: TimeStamp Rolled timestamp if not on offset, otherwise unchanged timestamp. """ - # TODO: try to avoid runtime/circular import - from pandas import Timestamp - dt = Timestamp(dt) + dt = as_timestamp(dt) if not self.is_on_offset(dt): dt = dt - type(self)(1, normalize=self.normalize, **self.kwds) return dt @@ -634,9 +638,7 @@ cdef class _BaseOffset: TimeStamp Rolled timestamp if not on offset, otherwise unchanged timestamp. """ - # TODO: try to avoid runtime/circular import - from pandas import Timestamp - dt = Timestamp(dt) + dt = as_timestamp(dt) if not self.is_on_offset(dt): dt = dt + type(self)(1, normalize=self.normalize, **self.kwds) return dt From 7150f8a8377edaca503f42d7e806bfc97ecf26e1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 14 May 2020 12:06:51 -0700 Subject: [PATCH 06/10] restore caching --- pandas/tseries/offsets.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 9ee924dc419be..860511c8f88bf 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -185,9 +185,6 @@ def __add__(date): Timestamp('2017-03-01 09:10:11') """ - # FIXME: restore these as cache_readonly - # _params = cache_readonly(BaseOffset._params.__get__) - # freqstr = cache_readonly(BaseOffset.freqstr.__get__) _attributes = frozenset(["n", "normalize"] + list(liboffsets.relativedelta_kwds)) _adjust_dst = False @@ -296,12 +293,18 @@ def is_on_offset(self, dt): # TODO, see #1395 return True + @cache_readonly + def _params(self): + # TODO: see if we can just write cache_readonly(BaseOffset._params.__get__) + return BaseOffset._params.__get__(self) -class SingleConstructorOffset(BaseOffset): - # FIXME: restore these as cache_readonly - # _params = cache_readonly(BaseOffset._params.__get__) - # freqstr = cache_readonly(BaseOffset.freqstr.__get__) + @cache_readonly + def freqstr(self): + # TODO: see if we can just write cache_readonly(BaseOffset.freqstr.__get__) + return BaseOffset.freqstr.__get__(self) + +class SingleConstructorOffset(BaseOffset): @classmethod def _from_name(cls, suffix=None): # default _from_name calls cls with no args @@ -309,6 +312,16 @@ def _from_name(cls, suffix=None): raise ValueError(f"Bad freq suffix {suffix}") return cls() + @cache_readonly + def _params(self): + # TODO: see if we can just write cache_readonly(BaseOffset._params.__get__) + return BaseOffset._params.__get__(self) + + @cache_readonly + def freqstr(self): + # TODO: see if we can just write cache_readonly(BaseOffset.freqstr.__get__) + return BaseOffset.freqstr.__get__(self) + class BusinessDay(BusinessMixin, SingleConstructorOffset): """ From 1b3b3e9a5fbe777ae7aaacef8c19749305ef0cd0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 15 May 2020 12:18:47 -0700 Subject: [PATCH 07/10] unxfail --- .../tests/scalar/timedelta/test_arithmetic.py | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 2114962cfc0bd..140ea281baa6e 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -8,7 +8,7 @@ import pytest import pandas as pd -from pandas import NaT, Timedelta, Timestamp, _is_numpy_dev, compat, offsets +from pandas import NaT, Timedelta, Timestamp, offsets import pandas._testing as tm from pandas.core import ops @@ -416,20 +416,7 @@ def test_td_div_numeric_scalar(self): assert result == Timedelta(days=2) @pytest.mark.parametrize( - "nan", - [ - np.nan, - pytest.param( - np.float64("NaN"), - marks=pytest.mark.xfail( - # Works on numpy dev only in python 3.9 - _is_numpy_dev and not compat.PY39, - raises=RuntimeWarning, - reason="https://github.com/pandas-dev/pandas/issues/31992", - ), - ), - float("nan"), - ], + "nan", [np.nan, np.float64("NaN"), float("nan")], ) def test_td_div_nan(self, nan): # np.float64('NaN') has a 'dtype' attr, avoid treating as array From 965d99a3a49df586cec54e8609577aa11c40fdfc Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 15 May 2020 14:21:52 -0700 Subject: [PATCH 08/10] revert --- .../tests/scalar/timedelta/test_arithmetic.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 140ea281baa6e..2114962cfc0bd 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -8,7 +8,7 @@ import pytest import pandas as pd -from pandas import NaT, Timedelta, Timestamp, offsets +from pandas import NaT, Timedelta, Timestamp, _is_numpy_dev, compat, offsets import pandas._testing as tm from pandas.core import ops @@ -416,7 +416,20 @@ def test_td_div_numeric_scalar(self): assert result == Timedelta(days=2) @pytest.mark.parametrize( - "nan", [np.nan, np.float64("NaN"), float("nan")], + "nan", + [ + np.nan, + pytest.param( + np.float64("NaN"), + marks=pytest.mark.xfail( + # Works on numpy dev only in python 3.9 + _is_numpy_dev and not compat.PY39, + raises=RuntimeWarning, + reason="https://github.com/pandas-dev/pandas/issues/31992", + ), + ), + float("nan"), + ], ) def test_td_div_nan(self, nan): # np.float64('NaN') has a 'dtype' attr, avoid treating as array From e4146662024b9b2e0b2e1f04e88fd7ed30da81e4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 17 May 2020 14:37:35 -0700 Subject: [PATCH 09/10] comment --- pandas/io/pytables.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index f4336a981f2a1..36cd61b6c3adb 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -730,6 +730,8 @@ def get(self, key: str): Same type as object stored in file. """ with patch_pickle(): + # GH#31167 Without this patch, pickle doesn't know how to unpickle + # old DateOffset objects now that they are cdef classes. group = self.get_node(key) if group is None: raise KeyError(f"No object named {key} in the file") From 1abc5acf5949c8dbc32c348ba8a5ea1a3a9425e3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 18 May 2020 07:16:19 -0700 Subject: [PATCH 10/10] remove as_timestamp --- pandas/_libs/tslibs/offsets.pyx | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 5e368c42b6dee..b25558e8572fe 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -111,14 +111,6 @@ def as_datetime(obj: datetime) -> datetime: return obj -cdef ABCTimestamp as_timestamp(obj): - if isinstance(obj, ABCTimestamp): - return obj - - from pandas import Timestamp - return Timestamp(obj) - - cpdef bint is_normalized(datetime dt): if dt.hour != 0 or dt.minute != 0 or dt.second != 0 or dt.microsecond != 0: # Regardless of whether dt is datetime vs Timestamp @@ -165,13 +157,15 @@ def apply_wraps(func): # not play nicely with cython class methods def wrapper(self, other): + from pandas import Timestamp + if other is NaT: return NaT elif isinstance(other, (timedelta, BaseOffset)): # timedelta path return func(self, other) elif isinstance(other, (datetime, date)) or is_datetime64_object(other): - other = as_timestamp(other) + other = Timestamp(other) else: # This will end up returning NotImplemented back in __add__ raise ApplyTypeError @@ -184,7 +178,7 @@ def apply_wraps(func): result = func(self, other) - result = as_timestamp(result) + result = Timestamp(result) if self._adjust_dst: result = result.tz_localize(tz) @@ -199,7 +193,7 @@ def apply_wraps(func): value = result.tz_localize(None).value else: value = result.value - result = as_timestamp(value + nano) + result = Timestamp(value + nano) if tz is not None and result.tzinfo is None: result = result.tz_localize(tz) @@ -624,7 +618,8 @@ cdef class _BaseOffset: TimeStamp Rolled timestamp if not on offset, otherwise unchanged timestamp. """ - dt = as_timestamp(dt) + from pandas import Timestamp + dt = Timestamp(dt) if not self.is_on_offset(dt): dt = dt - type(self)(1, normalize=self.normalize, **self.kwds) return dt @@ -638,7 +633,8 @@ cdef class _BaseOffset: TimeStamp Rolled timestamp if not on offset, otherwise unchanged timestamp. """ - dt = as_timestamp(dt) + from pandas import Timestamp + dt = Timestamp(dt) if not self.is_on_offset(dt): dt = dt + type(self)(1, normalize=self.normalize, **self.kwds) return dt