From 55afda4d0d13b512f83f49587266d069626d81cd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 May 2020 16:09:13 -0700 Subject: [PATCH 01/13] REF: make BaseOffset a cdef class --- pandas/_libs/tslibs/base.pxd | 5 -- pandas/_libs/tslibs/base.pyx | 8 --- pandas/_libs/tslibs/offsets.pxd | 1 + pandas/_libs/tslibs/offsets.pyx | 62 +++++++++++++------ pandas/_libs/tslibs/period.pyx | 4 +- pandas/_libs/tslibs/timedeltas.pyx | 3 +- pandas/_libs/tslibs/timestamps.pyx | 4 +- pandas/compat/pickle_compat.py | 4 +- .../tests/scalar/timestamp/test_arithmetic.py | 2 +- pandas/tseries/offsets.py | 27 ++++---- 10 files changed, 70 insertions(+), 50 deletions(-) diff --git a/pandas/_libs/tslibs/base.pxd b/pandas/_libs/tslibs/base.pxd index d32413c3bad86..0521279025d4f 100644 --- a/pandas/_libs/tslibs/base.pxd +++ b/pandas/_libs/tslibs/base.pxd @@ -8,13 +8,8 @@ cdef class ABCTimestamp(datetime): pass -cdef class ABCTick: - pass - - cdef class ABCPeriod: pass -cdef bint is_tick_object(object obj) cdef bint is_period_object(object obj) diff --git a/pandas/_libs/tslibs/base.pyx b/pandas/_libs/tslibs/base.pyx index 59daba4510b6e..91178fe3933f7 100644 --- a/pandas/_libs/tslibs/base.pyx +++ b/pandas/_libs/tslibs/base.pyx @@ -20,13 +20,5 @@ cdef class ABCPeriod: pass -cdef class ABCTick: - pass - - -cdef bint is_tick_object(object obj): - return isinstance(obj, ABCTick) - - cdef bint is_period_object(object obj): return isinstance(obj, ABCPeriod) diff --git a/pandas/_libs/tslibs/offsets.pxd b/pandas/_libs/tslibs/offsets.pxd index c6afb557ba2ef..e75cd8bdf1baf 100644 --- a/pandas/_libs/tslibs/offsets.pxd +++ b/pandas/_libs/tslibs/offsets.pxd @@ -1,2 +1,3 @@ cdef to_offset(object obj) cdef bint is_offset_object(object obj) +cdef bint is_tick_object(object obj) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 5efb9b3534f14..8618a1fc1d770 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -21,7 +21,7 @@ cnp.import_array() from pandas._libs.tslibs cimport util from pandas._libs.tslibs.util cimport is_integer_object -from pandas._libs.tslibs.base cimport ABCTick, ABCTimestamp, is_tick_object +from pandas._libs.tslibs.base cimport ABCTimestamp from pandas._libs.tslibs.ccalendar import MONTHS, DAYS from pandas._libs.tslibs.ccalendar cimport get_days_in_month, dayofweek @@ -35,12 +35,9 @@ from pandas._libs.tslibs.np_datetime cimport ( from pandas._libs.tslibs.timezones cimport utc_pytz as UTC from pandas._libs.tslibs.tzconversion cimport tz_convert_single -from pandas._libs.tslibs.timestamps import Timestamp - # --------------------------------------------------------------------- # Constants - _offset_to_period_map = { 'WEEKDAY': 'D', 'EOM': 'M', @@ -93,6 +90,10 @@ cdef bint is_offset_object(object obj): return isinstance(obj, _BaseOffset) +cdef bint is_tick_object(object obj): + return isinstance(obj, _Tick) + + cdef to_offset(object obj): """ Wrap pandas.tseries.frequencies.to_offset to keep centralize runtime @@ -156,6 +157,8 @@ def apply_wraps(func): # not play nicely with cython class methods def wrapper(self, other): + # TODO: try to avoid runtime/circular import + from pandas import Timestamp if other is NaT: return NaT elif isinstance(other, (timedelta, BaseOffset)): @@ -395,7 +398,7 @@ class ApplyTypeError(TypeError): # --------------------------------------------------------------------- # Base Classes -class _BaseOffset: +cdef class _BaseOffset: """ Base class for DateOffset methods that are not overridden by subclasses and will (after pickle errors are resolved) go into a cdef class. @@ -406,16 +409,17 @@ class _BaseOffset: _use_relativedelta = False _adjust_dst = True _deprecations = frozenset(["isAnchored", "onOffset"]) - normalize = False # default for prior pickles + + cdef readonly: + int64_t n + bint normalize + dict _cache def __init__(self, n=1, normalize=False): n = self._validate_n(n) - object.__setattr__(self, "n", n) - object.__setattr__(self, "normalize", normalize) - object.__setattr__(self, "_cache", {}) - - def __setattr__(self, name, value): - raise AttributeError("DateOffset objects are immutable.") + self.n = n + self.normalize = normalize + self._cache = {} def __eq__(self, other: Any) -> bool: if isinstance(other, str): @@ -446,6 +450,8 @@ class _BaseOffset: """ # NB: non-cython subclasses override property with cache_readonly all_paras = self.__dict__.copy() + all_paras["n"] = self.n + all_paras["normalize"] = self.normalize if 'holidays' in all_paras and not all_paras['holidays']: all_paras.pop('holidays') exclude = ['kwds', 'name', 'calendar'] @@ -606,6 +612,8 @@ class _BaseOffset: TimeStamp Rolled timestamp if not on offset, otherwise unchanged timestamp. """ + # TODO: try to avoid runtime/circular import + from pandas import Timestamp dt = Timestamp(dt) if not self.is_on_offset(dt): dt = dt - type(self)(1, normalize=self.normalize, **self.kwds) @@ -620,6 +628,8 @@ class _BaseOffset: TimeStamp Rolled timestamp if not on offset, otherwise unchanged timestamp. """ + # TODO: try to avoid runtime/circular import + from pandas import Timestamp dt = Timestamp(dt) if not self.is_on_offset(dt): dt = dt + type(self)(1, normalize=self.normalize, **self.kwds) @@ -643,7 +653,8 @@ class _BaseOffset: # ------------------------------------------------------------------ - def _validate_n(self, n): + @staticmethod + def _validate_n(n): """ Require that `n` be an integer. @@ -686,9 +697,9 @@ class _BaseOffset: kwds = {key: odict[key] for key in odict if odict[key]} state.update(kwds) - if '_cache' not in state: - state['_cache'] = {} - + self.n = state.pop("n") + self.normalize = state.pop("normalize") + self._cache = state.pop("_cache", {}) self.__dict__.update(state) if 'weekmask' in state and 'holidays' in state: @@ -701,6 +712,8 @@ class _BaseOffset: def __getstate__(self): """Return a pickleable state""" state = self.__dict__.copy() + state["n"] = self.n + state["normalize"] = self.normalize # we don't want to actually pickle the calendar object # as its a np.busyday; we recreate on deserialization @@ -751,7 +764,7 @@ class BaseOffset(_BaseOffset): return (-self).__add__(other) -cdef class _Tick(ABCTick): +cdef class _Tick(_BaseOffset): """ dummy class to mix into tseries.offsets.Tick so that in tslibs.period we can do isinstance checks on _Tick and avoid importing tseries.offsets @@ -761,6 +774,18 @@ cdef class _Tick(ABCTick): __array_priority__ = 1000 _adjust_dst = False + def __init__(self, n=1, normalize=False): + n = _BaseOffset._validate_n(n) + self.n = n + self.normalize = normalize + self._cache = {} + + if normalize: + # GH#21427 + raise ValueError( + "Tick offset with `normalize=True` are not allowed." + ) + def is_on_offset(self, dt) -> bool: return True @@ -779,7 +804,8 @@ cdef class _Tick(ABCTick): return (type(self), (self.n,)) def __setstate__(self, state): - object.__setattr__(self, "n", state["n"]) + self.n = state["n"] + self.normalize = False class BusinessMixin: diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index c5be5b1d96469..a6aea912eac39 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -37,7 +37,7 @@ cdef extern from "src/datetime/np_datetime.h": cimport pandas._libs.tslibs.util as util -from pandas._libs.tslibs.base cimport ABCPeriod, is_period_object, is_tick_object +from pandas._libs.tslibs.base cimport ABCPeriod, is_period_object from pandas._libs.tslibs.timestamps import Timestamp from pandas._libs.tslibs.timezones cimport is_utc, is_tzlocal, get_dst_info @@ -68,7 +68,7 @@ from pandas._libs.tslibs.nattype cimport ( c_NaT as NaT, c_nat_strings as nat_strings, ) -from pandas._libs.tslibs.offsets cimport to_offset +from pandas._libs.tslibs.offsets cimport to_offset, is_tick_object from pandas._libs.tslibs.tzconversion cimport tz_convert_utc_to_tzlocal diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 03419a6267983..37c6e836c1140 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -21,7 +21,7 @@ from pandas._libs.tslibs.util cimport ( is_float_object, is_array ) -from pandas._libs.tslibs.base cimport ABCTimedelta, ABCTimestamp, is_tick_object +from pandas._libs.tslibs.base cimport ABCTimedelta, ABCTimestamp from pandas._libs.tslibs.ccalendar cimport DAY_NANOS @@ -34,6 +34,7 @@ from pandas._libs.tslibs.nattype cimport ( c_NaT as NaT, c_nat_strings as nat_strings, ) +from pandas._libs.tslibs.offsets cimport is_tick_object # ---------------------------------------------------------------------- # Constants diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index af98057378d7b..accb48027e8e0 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -25,7 +25,7 @@ from pandas._libs.tslibs.util cimport ( is_timedelta64_object, is_array, ) -from pandas._libs.tslibs.base cimport ABCTimestamp, is_tick_object +from pandas._libs.tslibs.base cimport ABCTimestamp from pandas._libs.tslibs cimport ccalendar @@ -40,7 +40,7 @@ from pandas._libs.tslibs.np_datetime cimport ( cmp_scalar, ) from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime -from pandas._libs.tslibs.offsets cimport to_offset +from pandas._libs.tslibs.offsets cimport to_offset, is_tick_object from pandas._libs.tslibs.timedeltas import Timedelta from pandas._libs.tslibs.timezones cimport ( is_utc, maybe_get_tz, treat_tz_as_pytz, utc_pytz as UTC, diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index cd2ded874c08c..989fe1cbb0f99 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -9,7 +9,7 @@ from pandas import Index -from pandas.tseries.offsets import Tick +from pandas.tseries.offsets import DateOffset if TYPE_CHECKING: from pandas import Series, DataFrame @@ -40,7 +40,7 @@ def load_reduce(self): return except TypeError: pass - elif args and issubclass(args[0], Tick): + elif args and issubclass(args[0], DateOffset): # TypeError: object.__new__(Day) is not safe, use Day.__new__() cls = args[0] stack[-1] = cls.__new__(*args) diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py index b038ee1aee106..ed0045bcab989 100644 --- a/pandas/tests/scalar/timestamp/test_arithmetic.py +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -52,7 +52,7 @@ def test_overflow_offset_raises(self): # used to crash, so check for proper overflow exception stamp = Timestamp("2000/1/1") - offset_overflow = to_offset("D") * 100 ** 25 + offset_overflow = to_offset("D") * 100 ** 5 with pytest.raises(OverflowError, match=msg): stamp + offset_overflow diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 4912dc0eb349e..9ee924dc419be 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -185,8 +185,9 @@ def __add__(date): Timestamp('2017-03-01 09:10:11') """ - _params = cache_readonly(BaseOffset._params.fget) - freqstr = cache_readonly(BaseOffset.freqstr.fget) + # FIXME: restore these as cache_readonly + # _params = cache_readonly(BaseOffset._params.__get__) + # freqstr = cache_readonly(BaseOffset.freqstr.__get__) _attributes = frozenset(["n", "normalize"] + list(liboffsets.relativedelta_kwds)) _adjust_dst = False @@ -297,8 +298,9 @@ def is_on_offset(self, dt): class SingleConstructorOffset(BaseOffset): - _params = cache_readonly(BaseOffset._params.fget) - freqstr = cache_readonly(BaseOffset.freqstr.fget) + # FIXME: restore these as cache_readonly + # _params = cache_readonly(BaseOffset._params.__get__) + # freqstr = cache_readonly(BaseOffset.freqstr.__get__) @classmethod def _from_name(cls, suffix=None): @@ -316,6 +318,10 @@ class BusinessDay(BusinessMixin, SingleConstructorOffset): _prefix = "B" _attributes = frozenset(["n", "normalize", "offset"]) + def __reduce__(self): + tup = (self.n, self.normalize, self.offset) + return type(self), tup + def __init__(self, n=1, normalize=False, offset=timedelta(0)): BaseOffset.__init__(self, n, normalize) object.__setattr__(self, "_offset", offset) @@ -718,6 +724,12 @@ class CustomBusinessDay(CustomMixin, BusinessDay): ["n", "normalize", "weekmask", "holidays", "calendar", "offset"] ) + def __reduce__(self): + # np.holidaycalendar cant be pickled, so pass None there and + # it will be re-constructed within __init__ + tup = (self.n, self.normalize, self.weekmask, self.holidays, None, self.offset) + return type(self), tup + def __init__( self, n=1, @@ -2151,13 +2163,6 @@ class Tick(liboffsets._Tick, SingleConstructorOffset): _prefix = "undefined" _attributes = frozenset(["n", "normalize"]) - def __init__(self, n=1, normalize=False): - BaseOffset.__init__(self, n, normalize) - if normalize: - raise ValueError( - "Tick offset with `normalize=True` are not allowed." - ) # GH#21427 - __gt__ = _tick_comp(operator.gt) __ge__ = _tick_comp(operator.ge) __lt__ = _tick_comp(operator.lt) From 24685a50f770576bd37350aad069331985967778 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 May 2020 20:08:46 -0700 Subject: [PATCH 02/13] Pickle patch --- pandas/compat/pickle_compat.py | 23 +++++++++++++++++++++++ pandas/io/pytables.py | 11 ++++++----- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 989fe1cbb0f99..0e82dbdee0379 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -2,7 +2,9 @@ Support pre-0.12 series pickle compatibility. """ +import contextlib import copy +import io import pickle as pkl from typing import TYPE_CHECKING, Optional import warnings @@ -247,3 +249,24 @@ def load(fh, encoding: Optional[str] = None, is_verbose: bool = False): return up.load() except (ValueError, TypeError): raise + + +def loads(obj: bytes): + """ + Analogous to pickle._loads. + """ + fd = io.BytesIO(obj) + return Unpickler(fd).load() + + +@contextlib.contextmanager +def patch_pickle(): + """ + Temporarily patch pickle to use our unpickler. + """ + orig_loads = pkl.loads + try: + pkl.loads = loads + yield + finally: + pkl.loads = orig_loads diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 345402e619ff2..a40ab3f7c63d4 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2,7 +2,6 @@ High level interface to PyTables for reading and writing pandas data structures to disk """ - import copy from datetime import date, tzinfo import itertools @@ -19,6 +18,7 @@ from pandas._libs.tslibs import timezones from pandas._typing import ArrayLike, FrameOrSeries, Label from pandas.compat._optional import import_optional_dependency +from pandas.compat.pickle_compat import patch_pickle from pandas.errors import PerformanceWarning from pandas.util._decorators import cache_readonly @@ -729,10 +729,11 @@ def get(self, key: str): object Same type as object stored in file. """ - group = self.get_node(key) - if group is None: - raise KeyError(f"No object named {key} in the file") - return self._read_group(group) + with patch_pickle(): + group = self.get_node(key) + if group is None: + raise KeyError(f"No object named {key} in the file") + return self._read_group(group) def select( self, From b5ce30f36df7b1fb9315ab168c596e7956626e0b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 May 2020 20:18:05 -0700 Subject: [PATCH 03/13] update doc --- doc/source/reference/offset_frequency.rst | 45 +++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/doc/source/reference/offset_frequency.rst b/doc/source/reference/offset_frequency.rst index 6240181708f97..fb262af1e63f6 100644 --- a/doc/source/reference/offset_frequency.rst +++ b/doc/source/reference/offset_frequency.rst @@ -25,6 +25,7 @@ Properties DateOffset.nanos DateOffset.normalize DateOffset.rule_code + DateOffset.n Methods ~~~~~~~ @@ -57,6 +58,7 @@ Properties BusinessDay.nanos BusinessDay.normalize BusinessDay.rule_code + BusinessDay.n Methods ~~~~~~~ @@ -90,6 +92,7 @@ Properties BusinessHour.nanos BusinessHour.normalize BusinessHour.rule_code + BusinessHour.n Methods ~~~~~~~ @@ -122,6 +125,7 @@ Properties CustomBusinessDay.nanos CustomBusinessDay.normalize CustomBusinessDay.rule_code + CustomBusinessDay.n Methods ~~~~~~~ @@ -154,6 +158,7 @@ Properties CustomBusinessHour.nanos CustomBusinessHour.normalize CustomBusinessHour.rule_code + CustomBusinessHour.n Methods ~~~~~~~ @@ -186,6 +191,7 @@ Properties MonthOffset.nanos MonthOffset.normalize MonthOffset.rule_code + MonthOffset.n Methods ~~~~~~~ @@ -219,6 +225,7 @@ Properties MonthEnd.nanos MonthEnd.normalize MonthEnd.rule_code + MonthEnd.n Methods ~~~~~~~ @@ -252,6 +259,7 @@ Properties MonthBegin.nanos MonthBegin.normalize MonthBegin.rule_code + MonthBegin.n Methods ~~~~~~~ @@ -285,6 +293,7 @@ Properties BusinessMonthEnd.nanos BusinessMonthEnd.normalize BusinessMonthEnd.rule_code + BusinessMonthEnd.n Methods ~~~~~~~ @@ -318,6 +327,7 @@ Properties BusinessMonthBegin.nanos BusinessMonthBegin.normalize BusinessMonthBegin.rule_code + BusinessMonthBegin.n Methods ~~~~~~~ @@ -352,6 +362,7 @@ Properties CustomBusinessMonthEnd.nanos CustomBusinessMonthEnd.normalize CustomBusinessMonthEnd.rule_code + CustomBusinessMonthEnd.n Methods ~~~~~~~ @@ -385,6 +396,7 @@ Properties CustomBusinessMonthBegin.nanos CustomBusinessMonthBegin.normalize CustomBusinessMonthBegin.rule_code + CustomBusinessMonthBegin.n Methods ~~~~~~~ @@ -417,6 +429,7 @@ Properties SemiMonthOffset.nanos SemiMonthOffset.normalize SemiMonthOffset.rule_code + SemiMonthOffset.n Methods ~~~~~~~ @@ -450,6 +463,7 @@ Properties SemiMonthEnd.nanos SemiMonthEnd.normalize SemiMonthEnd.rule_code + SemiMonthEnd.n Methods ~~~~~~~ @@ -483,6 +497,7 @@ Properties SemiMonthBegin.nanos SemiMonthBegin.normalize SemiMonthBegin.rule_code + SemiMonthBegin.n Methods ~~~~~~~ @@ -516,6 +531,7 @@ Properties Week.nanos Week.normalize Week.rule_code + Week.n Methods ~~~~~~~ @@ -549,6 +565,7 @@ Properties WeekOfMonth.nanos WeekOfMonth.normalize WeekOfMonth.rule_code + WeekOfMonth.n Methods ~~~~~~~ @@ -581,6 +598,7 @@ Properties LastWeekOfMonth.nanos LastWeekOfMonth.normalize LastWeekOfMonth.rule_code + LastWeekOfMonth.n Methods ~~~~~~~ @@ -613,6 +631,7 @@ Properties QuarterOffset.nanos QuarterOffset.normalize QuarterOffset.rule_code + QuarterOffset.n Methods ~~~~~~~ @@ -646,6 +665,7 @@ Properties BQuarterEnd.nanos BQuarterEnd.normalize BQuarterEnd.rule_code + BQuarterEnd.n Methods ~~~~~~~ @@ -679,6 +699,7 @@ Properties BQuarterBegin.nanos BQuarterBegin.normalize BQuarterBegin.rule_code + BQuarterBegin.n Methods ~~~~~~~ @@ -712,6 +733,7 @@ Properties QuarterEnd.nanos QuarterEnd.normalize QuarterEnd.rule_code + QuarterEnd.n Methods ~~~~~~~ @@ -745,6 +767,7 @@ Properties QuarterBegin.nanos QuarterBegin.normalize QuarterBegin.rule_code + QuarterBegin.n Methods ~~~~~~~ @@ -778,6 +801,7 @@ Properties YearOffset.nanos YearOffset.normalize YearOffset.rule_code + YearOffset.n Methods ~~~~~~~ @@ -811,6 +835,7 @@ Properties BYearEnd.nanos BYearEnd.normalize BYearEnd.rule_code + BYearEnd.n Methods ~~~~~~~ @@ -844,6 +869,7 @@ Properties BYearBegin.nanos BYearBegin.normalize BYearBegin.rule_code + BYearBegin.n Methods ~~~~~~~ @@ -877,6 +903,7 @@ Properties YearEnd.nanos YearEnd.normalize YearEnd.rule_code + YearEnd.n Methods ~~~~~~~ @@ -910,6 +937,7 @@ Properties YearBegin.nanos YearBegin.normalize YearBegin.rule_code + YearBegin.n Methods ~~~~~~~ @@ -943,6 +971,7 @@ Properties FY5253.nanos FY5253.normalize FY5253.rule_code + FY5253.n Methods ~~~~~~~ @@ -977,6 +1006,7 @@ Properties FY5253Quarter.nanos FY5253Quarter.normalize FY5253Quarter.rule_code + FY5253Quarter.n Methods ~~~~~~~ @@ -1011,6 +1041,7 @@ Properties Easter.nanos Easter.normalize Easter.rule_code + Easter.n Methods ~~~~~~~ @@ -1044,6 +1075,7 @@ Properties Tick.nanos Tick.normalize Tick.rule_code + Tick.n Methods ~~~~~~~ @@ -1077,6 +1109,7 @@ Properties Day.nanos Day.normalize Day.rule_code + Day.n Methods ~~~~~~~ @@ -1110,6 +1143,7 @@ Properties Hour.nanos Hour.normalize Hour.rule_code + Hour.n Methods ~~~~~~~ @@ -1143,6 +1177,7 @@ Properties Minute.nanos Minute.normalize Minute.rule_code + Minute.n Methods ~~~~~~~ @@ -1176,6 +1211,7 @@ Properties Second.nanos Second.normalize Second.rule_code + Second.n Methods ~~~~~~~ @@ -1209,6 +1245,7 @@ Properties Milli.nanos Milli.normalize Milli.rule_code + Milli.n Methods ~~~~~~~ @@ -1242,6 +1279,7 @@ Properties Micro.nanos Micro.normalize Micro.rule_code + Micro.n Methods ~~~~~~~ @@ -1275,6 +1313,7 @@ Properties Nano.nanos Nano.normalize Nano.rule_code + Nano.n Methods ~~~~~~~ @@ -1309,6 +1348,7 @@ Properties BDay.normalize BDay.offset BDay.rule_code + BDay.n Methods ~~~~~~~ @@ -1345,6 +1385,7 @@ Properties BMonthEnd.nanos BMonthEnd.normalize BMonthEnd.rule_code + BMonthEnd.n Methods ~~~~~~~ @@ -1381,6 +1422,7 @@ Properties BMonthBegin.nanos BMonthBegin.normalize BMonthBegin.rule_code + BMonthBegin.n Methods ~~~~~~~ @@ -1421,6 +1463,7 @@ Properties CBMonthEnd.normalize CBMonthEnd.offset CBMonthEnd.rule_code + CBMonthEnd.n Methods ~~~~~~~ @@ -1461,6 +1504,7 @@ Properties CBMonthBegin.normalize CBMonthBegin.offset CBMonthBegin.rule_code + CBMonthBegin.n Methods ~~~~~~~ @@ -1498,6 +1542,7 @@ Properties CDay.normalize CDay.offset CDay.rule_code + CDay.n Methods ~~~~~~~ From fb8bc5c62f027762b95fb63aa1cc4bad5d8c7ca1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 May 2020 21:49:30 -0700 Subject: [PATCH 04/13] mypy fixup --- pandas/compat/pickle_compat.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 0e82dbdee0379..8a2626f9a7e68 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -251,12 +251,20 @@ def load(fh, encoding: Optional[str] = None, is_verbose: bool = False): raise -def loads(obj: bytes): +def loads( + bytes_object: bytes, + *, + fix_imports: bool = True, + encoding: str = "ASCII", + errors: str = "strict", +): """ Analogous to pickle._loads. """ - fd = io.BytesIO(obj) - return Unpickler(fd).load() + fd = io.BytesIO(bytes_object) + return Unpickler( + fd, fix_imports=fix_imports, encoding=encoding, errors=errors + ).load() @contextlib.contextmanager From f8d10d72227de2d04dad02f94f2811433b0b2392 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 14 May 2020 10:44:57 -0700 Subject: [PATCH 05/13] re-implement as_timestamp --- pandas/_libs/tslibs/offsets.pyx | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 0a42272901155..dddc6dc3e8041 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -111,6 +111,14 @@ def as_datetime(obj: datetime) -> datetime: return obj +cdef ABCTimestamp as_timestamp(obj): + if isinstance(obj, ABCTimestamp): + return obj + + from pandas import Timestamp + return Timestamp(obj) + + cpdef bint is_normalized(datetime dt): if dt.hour != 0 or dt.minute != 0 or dt.second != 0 or dt.microsecond != 0: # Regardless of whether dt is datetime vs Timestamp @@ -157,15 +165,13 @@ def apply_wraps(func): # not play nicely with cython class methods def wrapper(self, other): - # TODO: try to avoid runtime/circular import - from pandas import Timestamp if other is NaT: return NaT elif isinstance(other, (timedelta, BaseOffset)): # timedelta path return func(self, other) elif isinstance(other, (np.datetime64, datetime, date)): - other = Timestamp(other) + other = as_timestamp(other) else: # This will end up returning NotImplemented back in __add__ raise ApplyTypeError @@ -178,7 +184,7 @@ def apply_wraps(func): result = func(self, other) - result = Timestamp(result) + result = as_timestamp(result) if self._adjust_dst: result = result.tz_localize(tz) @@ -193,7 +199,7 @@ def apply_wraps(func): value = result.tz_localize(None).value else: value = result.value - result = Timestamp(value + nano) + result = as_timestamp(value + nano) if tz is not None and result.tzinfo is None: result = result.tz_localize(tz) @@ -618,9 +624,7 @@ cdef class _BaseOffset: TimeStamp Rolled timestamp if not on offset, otherwise unchanged timestamp. """ - # TODO: try to avoid runtime/circular import - from pandas import Timestamp - dt = Timestamp(dt) + dt = as_timestamp(dt) if not self.is_on_offset(dt): dt = dt - type(self)(1, normalize=self.normalize, **self.kwds) return dt @@ -634,9 +638,7 @@ cdef class _BaseOffset: TimeStamp Rolled timestamp if not on offset, otherwise unchanged timestamp. """ - # TODO: try to avoid runtime/circular import - from pandas import Timestamp - dt = Timestamp(dt) + dt = as_timestamp(dt) if not self.is_on_offset(dt): dt = dt + type(self)(1, normalize=self.normalize, **self.kwds) return dt From 7150f8a8377edaca503f42d7e806bfc97ecf26e1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 14 May 2020 12:06:51 -0700 Subject: [PATCH 06/13] restore caching --- pandas/tseries/offsets.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 9ee924dc419be..860511c8f88bf 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -185,9 +185,6 @@ def __add__(date): Timestamp('2017-03-01 09:10:11') """ - # FIXME: restore these as cache_readonly - # _params = cache_readonly(BaseOffset._params.__get__) - # freqstr = cache_readonly(BaseOffset.freqstr.__get__) _attributes = frozenset(["n", "normalize"] + list(liboffsets.relativedelta_kwds)) _adjust_dst = False @@ -296,12 +293,18 @@ def is_on_offset(self, dt): # TODO, see #1395 return True + @cache_readonly + def _params(self): + # TODO: see if we can just write cache_readonly(BaseOffset._params.__get__) + return BaseOffset._params.__get__(self) -class SingleConstructorOffset(BaseOffset): - # FIXME: restore these as cache_readonly - # _params = cache_readonly(BaseOffset._params.__get__) - # freqstr = cache_readonly(BaseOffset.freqstr.__get__) + @cache_readonly + def freqstr(self): + # TODO: see if we can just write cache_readonly(BaseOffset.freqstr.__get__) + return BaseOffset.freqstr.__get__(self) + +class SingleConstructorOffset(BaseOffset): @classmethod def _from_name(cls, suffix=None): # default _from_name calls cls with no args @@ -309,6 +312,16 @@ def _from_name(cls, suffix=None): raise ValueError(f"Bad freq suffix {suffix}") return cls() + @cache_readonly + def _params(self): + # TODO: see if we can just write cache_readonly(BaseOffset._params.__get__) + return BaseOffset._params.__get__(self) + + @cache_readonly + def freqstr(self): + # TODO: see if we can just write cache_readonly(BaseOffset.freqstr.__get__) + return BaseOffset.freqstr.__get__(self) + class BusinessDay(BusinessMixin, SingleConstructorOffset): """ From 1b3b3e9a5fbe777ae7aaacef8c19749305ef0cd0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 15 May 2020 12:18:47 -0700 Subject: [PATCH 07/13] unxfail --- .../tests/scalar/timedelta/test_arithmetic.py | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 2114962cfc0bd..140ea281baa6e 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -8,7 +8,7 @@ import pytest import pandas as pd -from pandas import NaT, Timedelta, Timestamp, _is_numpy_dev, compat, offsets +from pandas import NaT, Timedelta, Timestamp, offsets import pandas._testing as tm from pandas.core import ops @@ -416,20 +416,7 @@ def test_td_div_numeric_scalar(self): assert result == Timedelta(days=2) @pytest.mark.parametrize( - "nan", - [ - np.nan, - pytest.param( - np.float64("NaN"), - marks=pytest.mark.xfail( - # Works on numpy dev only in python 3.9 - _is_numpy_dev and not compat.PY39, - raises=RuntimeWarning, - reason="https://github.com/pandas-dev/pandas/issues/31992", - ), - ), - float("nan"), - ], + "nan", [np.nan, np.float64("NaN"), float("nan")], ) def test_td_div_nan(self, nan): # np.float64('NaN') has a 'dtype' attr, avoid treating as array From 965d99a3a49df586cec54e8609577aa11c40fdfc Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 15 May 2020 14:21:52 -0700 Subject: [PATCH 08/13] revert --- .../tests/scalar/timedelta/test_arithmetic.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 140ea281baa6e..2114962cfc0bd 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -8,7 +8,7 @@ import pytest import pandas as pd -from pandas import NaT, Timedelta, Timestamp, offsets +from pandas import NaT, Timedelta, Timestamp, _is_numpy_dev, compat, offsets import pandas._testing as tm from pandas.core import ops @@ -416,7 +416,20 @@ def test_td_div_numeric_scalar(self): assert result == Timedelta(days=2) @pytest.mark.parametrize( - "nan", [np.nan, np.float64("NaN"), float("nan")], + "nan", + [ + np.nan, + pytest.param( + np.float64("NaN"), + marks=pytest.mark.xfail( + # Works on numpy dev only in python 3.9 + _is_numpy_dev and not compat.PY39, + raises=RuntimeWarning, + reason="https://github.com/pandas-dev/pandas/issues/31992", + ), + ), + float("nan"), + ], ) def test_td_div_nan(self, nan): # np.float64('NaN') has a 'dtype' attr, avoid treating as array From e4146662024b9b2e0b2e1f04e88fd7ed30da81e4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 17 May 2020 14:37:35 -0700 Subject: [PATCH 09/13] comment --- pandas/io/pytables.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index f4336a981f2a1..36cd61b6c3adb 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -730,6 +730,8 @@ def get(self, key: str): Same type as object stored in file. """ with patch_pickle(): + # GH#31167 Without this patch, pickle doesn't know how to unpickle + # old DateOffset objects now that they are cdef classes. group = self.get_node(key) if group is None: raise KeyError(f"No object named {key} in the file") From cbd7a9177ccd53717c81bac3609729df9380e766 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 17 May 2020 11:27:46 -0700 Subject: [PATCH 10/13] REF: make Tick entirely cdef --- pandas/_libs/tslibs/offsets.pyx | 176 +++++++++++++++++---- pandas/core/arrays/period.py | 3 +- pandas/tests/tseries/offsets/test_ticks.py | 8 +- pandas/tseries/offsets.py | 124 ++------------- 4 files changed, 163 insertions(+), 148 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 5e368c42b6dee..84dcc8aae9c0e 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -5,6 +5,7 @@ from typing import Any import warnings from cpython.datetime cimport (PyDateTime_IMPORT, PyDateTime_Check, + PyDate_Check, PyDelta_Check, datetime, timedelta, date, time as dt_time) @@ -35,6 +36,8 @@ from pandas._libs.tslibs.np_datetime cimport ( from pandas._libs.tslibs.timezones cimport utc_pytz as UTC from pandas._libs.tslibs.tzconversion cimport tz_convert_single +from .timedeltas cimport delta_to_nanoseconds + # --------------------------------------------------------------------- # Constants @@ -87,11 +90,11 @@ for _d in DAYS: # Misc Helpers cdef bint is_offset_object(object obj): - return isinstance(obj, _BaseOffset) + return isinstance(obj, BaseOffset) cdef bint is_tick_object(object obj): - return isinstance(obj, _Tick) + return isinstance(obj, Tick) cdef to_offset(object obj): @@ -99,7 +102,7 @@ cdef to_offset(object obj): Wrap pandas.tseries.frequencies.to_offset to keep centralize runtime imports """ - if isinstance(obj, _BaseOffset): + if isinstance(obj, BaseOffset): return obj from pandas.tseries.frequencies import to_offset return to_offset(obj) @@ -167,10 +170,11 @@ def apply_wraps(func): def wrapper(self, other): if other is NaT: return NaT - elif isinstance(other, (timedelta, BaseOffset)): + elif isinstance(other, BaseOffset) or PyDelta_Check(other): # timedelta path return func(self, other) - elif isinstance(other, (datetime, date)) or is_datetime64_object(other): + elif is_datetime64_object(other) or PyDate_Check(other): + # PyDate_Check includes date, datetime other = as_timestamp(other) else: # This will end up returning NotImplemented back in __add__ @@ -233,7 +237,6 @@ cdef _wrap_timedelta_result(result): """ if PyDelta_Check(result): # convert Timedelta back to a Tick - from pandas.tseries.offsets import delta_to_tick return delta_to_tick(result) return result @@ -404,7 +407,7 @@ class ApplyTypeError(TypeError): # --------------------------------------------------------------------- # Base Classes -cdef class _BaseOffset: +cdef class BaseOffset: """ Base class for DateOffset methods that are not overridden by subclasses and will (after pickle errors are resolved) go into a cdef class. @@ -483,6 +486,9 @@ cdef class _BaseOffset: return type(self)(n=1, normalize=self.normalize, **self.kwds) def __add__(self, other): + if not isinstance(self, BaseOffset): + # cython semantics; this is __radd__ + return other.__add__(self) try: return self.apply(other) except ApplyTypeError: @@ -494,6 +500,9 @@ cdef class _BaseOffset: elif type(other) == type(self): return type(self)(self.n - other.n, normalize=self.normalize, **self.kwds) + elif not isinstance(self, BaseOffset): + # cython semantics, this is __rsub__ + return (-other).__add__(self) else: # pragma: no cover return NotImplemented @@ -512,6 +521,9 @@ cdef class _BaseOffset: elif is_integer_object(other): return type(self)(n=other * self.n, normalize=self.normalize, **self.kwds) + elif not isinstance(self, BaseOffset): + # cython semantics, this is __rmul__ + return other.__mul__(self) return NotImplemented def __neg__(self): @@ -661,8 +673,8 @@ cdef class _BaseOffset: # ------------------------------------------------------------------ - # Staticmethod so we can call from _Tick.__init__, will be unnecessary - # once BaseOffset is a cdef class and is inherited by _Tick + # Staticmethod so we can call from Tick.__init__, will be unnecessary + # once BaseOffset is a cdef class and is inherited by Tick @staticmethod def _validate_n(n): """ @@ -762,23 +774,7 @@ cdef class _BaseOffset: return self.n == 1 -class BaseOffset(_BaseOffset): - # Here we add __rfoo__ methods that don't play well with cdef classes - def __rmul__(self, other): - return self.__mul__(other) - - def __radd__(self, other): - return self.__add__(other) - - def __rsub__(self, other): - return (-self).__add__(other) - - -cdef class _Tick(_BaseOffset): - """ - dummy class to mix into tseries.offsets.Tick so that in tslibs.period we - can do isinstance checks on _Tick and avoid importing tseries.offsets - """ +cdef class Tick(BaseOffset): # ensure that reversed-ops with numpy scalars return NotImplemented __array_priority__ = 1000 @@ -797,13 +793,25 @@ cdef class _Tick(_BaseOffset): "Tick offset with `normalize=True` are not allowed." ) + @classmethod + def _from_name(cls, suffix=None): + # default _from_name calls cls with no args + if suffix: + raise ValueError(f"Bad freq suffix {suffix}") + return cls() + + def _repr_attrs(self) -> str: + # Since cdef classes have no __dict__, we need to override + return "" + @property def delta(self): - return self.n * self._inc + from .timedeltas import Timedelta + return self.n * Timedelta(self._nanos_inc) @property def nanos(self) -> int64_t: - return self.delta.value + return self.n * self._nanos_inc def is_on_offset(self, dt) -> bool: return True @@ -841,13 +849,62 @@ cdef class _Tick(_BaseOffset): return self.delta.__gt__(other) def __truediv__(self, other): - if not isinstance(self, _Tick): + if not isinstance(self, Tick): # cython semantics mean the args are sometimes swapped result = other.delta.__rtruediv__(self) else: result = self.delta.__truediv__(other) return _wrap_timedelta_result(result) + def __add__(self, other): + if not isinstance(self, Tick): + # cython semantics; this is __radd__ + return other.__add__(self) + + if isinstance(other, Tick): + if type(self) == type(other): + return type(self)(self.n + other.n) + else: + return delta_to_tick(self.delta + other.delta) + try: + return self.apply(other) + except ApplyTypeError: + # Includes pd.Period + return NotImplemented + except OverflowError as err: + raise OverflowError( + f"the add operation between {self} and {other} will overflow" + ) from err + + def apply(self, other): + # Timestamp can handle tz and nano sec, thus no need to use apply_wraps + if isinstance(other, ABCTimestamp): + + # GH#15126 + # in order to avoid a recursive + # call of __add__ and __radd__ if there is + # an exception, when we call using the + operator, + # we directly call the known method + result = other.__add__(self) + if result is NotImplemented: + raise OverflowError + return result + elif other is NaT: + return NaT + elif is_datetime64_object(other) or PyDate_Check(other): + # PyDate_Check includes date, datetime + return as_timestamp(other) + self + + if PyDelta_Check(other): + return other + self.delta + elif isinstance(other, type(self)): + # TODO: this is reached in tests that specifically call apply, + # but should not be reached "naturally" because __add__ should + # catch this case first. + return type(self)(self.n + other.n) + + raise ApplyTypeError(f"Unhandled type: {type(other).__name__}") + # -------------------------------------------------------------------- # Pickle Methods @@ -859,6 +916,67 @@ cdef class _Tick(_BaseOffset): self.normalize = False +cdef class Day(Tick): + _nanos_inc = 24 * 3600 * 1_000_000_000 + _prefix = "D" + + +cdef class Hour(Tick): + _nanos_inc = 3600 * 1_000_000_000 + _prefix = "H" + + +cdef class Minute(Tick): + _nanos_inc = 60 * 1_000_000_000 + _prefix = "T" + + +cdef class Second(Tick): + _nanos_inc = 1_000_000_000 + _prefix = "S" + + +cdef class Milli(Tick): + _nanos_inc = 1_000_000 + _prefix = "L" + + +cdef class Micro(Tick): + _nanos_inc = 1000 + _prefix = "U" + + +cdef class Nano(Tick): + _nanos_inc = 1 + _prefix = "N" + + +def delta_to_tick(delta: timedelta) -> Tick: + if delta.microseconds == 0 and getattr(delta, "nanoseconds", 0) == 0: + # nanoseconds only for pd.Timedelta + if delta.seconds == 0: + return Day(delta.days) + else: + seconds = delta.days * 86400 + delta.seconds + if seconds % 3600 == 0: + return Hour(seconds / 3600) + elif seconds % 60 == 0: + return Minute(seconds / 60) + else: + return Second(seconds) + else: + nanos = delta_to_nanoseconds(delta) + if nanos % 1_000_000 == 0: + return Milli(nanos // 1_000_000) + elif nanos % 1000 == 0: + return Micro(nanos // 1000) + else: # pragma: no cover + return Nano(nanos) + + +# -------------------------------------------------------------------- + + class BusinessMixin(BaseOffset): """ Mixin to business types to provide related functions. diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 3978161829481..7a7fdba88cda1 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -12,6 +12,7 @@ period as libperiod, ) from pandas._libs.tslibs.fields import isleapyear_arr +from pandas._libs.tslibs.offsets import Tick, delta_to_tick from pandas._libs.tslibs.period import ( DIFFERENT_FREQ, IncompatibleFrequency, @@ -45,7 +46,7 @@ import pandas.core.common as com from pandas.tseries import frequencies -from pandas.tseries.offsets import DateOffset, Tick, delta_to_tick +from pandas.tseries.offsets import DateOffset def _field_accessor(name: str, alias: int, docstring=None): diff --git a/pandas/tests/tseries/offsets/test_ticks.py b/pandas/tests/tseries/offsets/test_ticks.py index a37dbbc89f5af..e5b0142dae48b 100644 --- a/pandas/tests/tseries/offsets/test_ticks.py +++ b/pandas/tests/tseries/offsets/test_ticks.py @@ -7,6 +7,8 @@ import numpy as np import pytest +from pandas._libs.tslibs.offsets import delta_to_tick + from pandas import Timedelta, Timestamp import pandas._testing as tm @@ -33,11 +35,11 @@ def test_apply_ticks(): def test_delta_to_tick(): delta = timedelta(3) - tick = offsets.delta_to_tick(delta) + tick = delta_to_tick(delta) assert tick == offsets.Day(3) td = Timedelta(nanoseconds=5) - tick = offsets.delta_to_tick(td) + tick = delta_to_tick(td) assert tick == Nano(5) @@ -234,7 +236,7 @@ def test_tick_division(cls): assert not isinstance(result, cls) assert result.delta == off.delta / 1000 - if cls._inc < Timedelta(seconds=1): + if cls._nanos_inc < Timedelta(seconds=1).value: # Case where we end up with a bigger class result = off / 0.001 assert isinstance(result, offsets.Tick) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 364a50be5c291..2d2d0a8523db6 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -6,20 +6,26 @@ import numpy as np from pandas._libs.tslibs import ( - Period, Timedelta, Timestamp, ccalendar, conversion, - delta_to_nanoseconds, frequencies as libfrequencies, offsets as liboffsets, ) -from pandas._libs.tslibs.offsets import ( +from pandas._libs.tslibs.offsets import ( # noqa:F401 ApplyTypeError, BaseOffset, BusinessMixin, CustomMixin, + Day, + Hour, + Micro, + Milli, + Minute, + Nano, + Second, + Tick, apply_index_wraps, apply_wraps, as_datetime, @@ -2125,118 +2131,6 @@ def is_on_offset(self, dt: datetime) -> bool: # --------------------------------------------------------------------- -# Ticks - - -class Tick(liboffsets._Tick, SingleConstructorOffset): - _inc = Timedelta(microseconds=1000) - - def __add__(self, other): - if isinstance(other, Tick): - if type(self) == type(other): - return type(self)(self.n + other.n) - else: - return delta_to_tick(self.delta + other.delta) - elif isinstance(other, Period): - return other + self - try: - return self.apply(other) - except ApplyTypeError: - return NotImplemented - except OverflowError as err: - raise OverflowError( - f"the add operation between {self} and {other} will overflow" - ) from err - - # This is identical to DateOffset.__hash__, but has to be redefined here - # for Python 3, because we've redefined __eq__. - def __hash__(self) -> int: - return hash(self._params) - - def apply(self, other): - # Timestamp can handle tz and nano sec, thus no need to use apply_wraps - if isinstance(other, Timestamp): - - # GH 15126 - # in order to avoid a recursive - # call of __add__ and __radd__ if there is - # an exception, when we call using the + operator, - # we directly call the known method - result = other.__add__(self) - if result is NotImplemented: - raise OverflowError - return result - elif isinstance(other, (datetime, np.datetime64, date)): - return Timestamp(other) + self - - if isinstance(other, timedelta): - return other + self.delta - elif isinstance(other, type(self)): - # TODO: this is reached in tests that specifically call apply, - # but should not be reached "naturally" because __add__ should - # catch this case first. - return type(self)(self.n + other.n) - - raise ApplyTypeError(f"Unhandled type: {type(other).__name__}") - - -def delta_to_tick(delta: timedelta) -> Tick: - if delta.microseconds == 0 and getattr(delta, "nanoseconds", 0) == 0: - # nanoseconds only for pd.Timedelta - if delta.seconds == 0: - return Day(delta.days) - else: - seconds = delta.days * 86400 + delta.seconds - if seconds % 3600 == 0: - return Hour(seconds / 3600) - elif seconds % 60 == 0: - return Minute(seconds / 60) - else: - return Second(seconds) - else: - nanos = delta_to_nanoseconds(delta) - if nanos % 1_000_000 == 0: - return Milli(nanos // 1_000_000) - elif nanos % 1000 == 0: - return Micro(nanos // 1000) - else: # pragma: no cover - return Nano(nanos) - - -class Day(Tick): - _inc = Timedelta(days=1) - _prefix = "D" - - -class Hour(Tick): - _inc = Timedelta(hours=1) - _prefix = "H" - - -class Minute(Tick): - _inc = Timedelta(minutes=1) - _prefix = "T" - - -class Second(Tick): - _inc = Timedelta(seconds=1) - _prefix = "S" - - -class Milli(Tick): - _inc = Timedelta(milliseconds=1) - _prefix = "L" - - -class Micro(Tick): - _inc = Timedelta(microseconds=1) - _prefix = "U" - - -class Nano(Tick): - _inc = Timedelta(nanoseconds=1) - _prefix = "N" - BDay = BusinessDay BMonthEnd = BusinessMonthEnd From 052fb6eaff296fd7d6611ffda64fcd41f393784e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 17 May 2020 11:55:48 -0700 Subject: [PATCH 11/13] mypy fixup --- pandas/tseries/frequencies.py | 2 +- pandas/tseries/offsets.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index d95ffd5b0876d..e764f2a40e781 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -219,7 +219,7 @@ def _get_offset(name: str) -> DateOffset: klass = prefix_mapping[split[0]] # handles case where there's no suffix (and will TypeError if too # many '-') - offset = klass._from_name(*split[1:]) # type: ignore + offset = klass._from_name(*split[1:]) except (ValueError, TypeError, KeyError) as err: # bad prefix or suffix raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(name)) from err diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 2d2d0a8523db6..3dd5f2a2fc4c8 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -2140,7 +2140,7 @@ def is_on_offset(self, dt: datetime) -> bool: CDay = CustomBusinessDay prefix_mapping = { - offset._prefix: offset # type: ignore + offset._prefix: offset for offset in [ YearBegin, # 'AS' YearEnd, # 'A' From 3e7d13f66365c90a77125252db4ee90d400f3695 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 17 May 2020 18:15:31 -0700 Subject: [PATCH 12/13] mypy fixup --- pandas/tseries/frequencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index e764f2a40e781..fa41ce00c4559 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -165,7 +165,7 @@ def to_offset(freq) -> Optional[DateOffset]: ) stride = int(stride) offset = _get_offset(name) - offset = offset * int(np.fabs(stride) * stride_sign) # type: ignore + offset = offset * int(np.fabs(stride) * stride_sign) if delta is None: delta = offset else: From 04631a204a4d11a840e811495d374a83ff67b707 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 18 May 2020 12:25:51 -0700 Subject: [PATCH 13/13] re-remove as_timestamp --- pandas/_libs/tslibs/offsets.pyx | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 0bd2533dc1c2e..17ea389611b84 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -114,14 +114,6 @@ def as_datetime(obj: datetime) -> datetime: return obj -cdef ABCTimestamp as_timestamp(obj): - if isinstance(obj, ABCTimestamp): - return obj - - from pandas import Timestamp - return Timestamp(obj) - - cpdef bint is_normalized(datetime dt): if dt.hour != 0 or dt.minute != 0 or dt.second != 0 or dt.microsecond != 0: # Regardless of whether dt is datetime vs Timestamp @@ -177,7 +169,7 @@ def apply_wraps(func): return func(self, other) elif is_datetime64_object(other) or PyDate_Check(other): # PyDate_Check includes date, datetime - other = as_timestamp(other) + other = Timestamp(other) else: # This will end up returning NotImplemented back in __add__ raise ApplyTypeError @@ -190,7 +182,7 @@ def apply_wraps(func): result = func(self, other) - result = as_timestamp(result) + result = Timestamp(result) if self._adjust_dst: result = result.tz_localize(tz) @@ -205,7 +197,7 @@ def apply_wraps(func): value = result.tz_localize(None).value else: value = result.value - result = as_timestamp(value + nano) + result = Timestamp(value + nano) if tz is not None and result.tzinfo is None: result = result.tz_localize(tz) @@ -896,7 +888,8 @@ cdef class Tick(BaseOffset): return NaT elif is_datetime64_object(other) or PyDate_Check(other): # PyDate_Check includes date, datetime - return as_timestamp(other) + self + from pandas import Timestamp + return Timestamp(other) + self if PyDelta_Check(other): return other + self.delta