Skip to content

REF: make Tick entirely a cdef class #34227

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
May 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 148 additions & 30 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ from typing import Any
import warnings
from cpython.datetime cimport (PyDateTime_IMPORT,
PyDateTime_Check,
PyDate_Check,
PyDelta_Check,
datetime, timedelta, date,
time as dt_time)
Expand Down Expand Up @@ -35,6 +36,8 @@ from pandas._libs.tslibs.np_datetime cimport (
from pandas._libs.tslibs.timezones cimport utc_pytz as UTC
from pandas._libs.tslibs.tzconversion cimport tz_convert_single

from .timedeltas cimport delta_to_nanoseconds

# ---------------------------------------------------------------------
# Constants

Expand Down Expand Up @@ -87,19 +90,19 @@ for _d in DAYS:
# Misc Helpers

cdef bint is_offset_object(object obj):
return isinstance(obj, _BaseOffset)
return isinstance(obj, BaseOffset)


cdef bint is_tick_object(object obj):
return isinstance(obj, _Tick)
return isinstance(obj, Tick)


cdef to_offset(object obj):
"""
Wrap pandas.tseries.frequencies.to_offset to keep centralize runtime
imports
"""
if isinstance(obj, _BaseOffset):
if isinstance(obj, BaseOffset):
return obj
from pandas.tseries.frequencies import to_offset
return to_offset(obj)
Expand Down Expand Up @@ -161,10 +164,11 @@ def apply_wraps(func):

if other is NaT:
return NaT
elif isinstance(other, (timedelta, BaseOffset)):
elif isinstance(other, BaseOffset) or PyDelta_Check(other):
# timedelta path
return func(self, other)
elif isinstance(other, (datetime, date)) or is_datetime64_object(other):
elif is_datetime64_object(other) or PyDate_Check(other):
# PyDate_Check includes date, datetime
other = Timestamp(other)
else:
# This will end up returning NotImplemented back in __add__
Expand Down Expand Up @@ -227,7 +231,6 @@ cdef _wrap_timedelta_result(result):
"""
if PyDelta_Check(result):
# convert Timedelta back to a Tick
from pandas.tseries.offsets import delta_to_tick
return delta_to_tick(result)

return result
Expand Down Expand Up @@ -398,7 +401,7 @@ class ApplyTypeError(TypeError):
# ---------------------------------------------------------------------
# Base Classes

cdef class _BaseOffset:
cdef class BaseOffset:
"""
Base class for DateOffset methods that are not overridden by subclasses
and will (after pickle errors are resolved) go into a cdef class.
Expand Down Expand Up @@ -477,6 +480,9 @@ cdef class _BaseOffset:
return type(self)(n=1, normalize=self.normalize, **self.kwds)

def __add__(self, other):
if not isinstance(self, BaseOffset):
# cython semantics; this is __radd__
return other.__add__(self)
try:
return self.apply(other)
except ApplyTypeError:
Expand All @@ -488,6 +494,9 @@ cdef class _BaseOffset:
elif type(other) == type(self):
return type(self)(self.n - other.n, normalize=self.normalize,
**self.kwds)
elif not isinstance(self, BaseOffset):
# cython semantics, this is __rsub__
return (-other).__add__(self)
else: # pragma: no cover
return NotImplemented

Expand All @@ -506,6 +515,9 @@ cdef class _BaseOffset:
elif is_integer_object(other):
return type(self)(n=other * self.n, normalize=self.normalize,
**self.kwds)
elif not isinstance(self, BaseOffset):
# cython semantics, this is __rmul__
return other.__mul__(self)
return NotImplemented

def __neg__(self):
Expand Down Expand Up @@ -657,8 +669,8 @@ cdef class _BaseOffset:

# ------------------------------------------------------------------

# Staticmethod so we can call from _Tick.__init__, will be unnecessary
# once BaseOffset is a cdef class and is inherited by _Tick
# Staticmethod so we can call from Tick.__init__, will be unnecessary
# once BaseOffset is a cdef class and is inherited by Tick
@staticmethod
def _validate_n(n):
"""
Expand Down Expand Up @@ -758,24 +770,7 @@ cdef class _BaseOffset:
return self.n == 1


class BaseOffset(_BaseOffset):
# Here we add __rfoo__ methods that don't play well with cdef classes
def __rmul__(self, other):
return self.__mul__(other)

def __radd__(self, other):
return self.__add__(other)

def __rsub__(self, other):
return (-self).__add__(other)


cdef class _Tick(_BaseOffset):
"""
dummy class to mix into tseries.offsets.Tick so that in tslibs.period we
can do isinstance checks on _Tick and avoid importing tseries.offsets
"""

cdef class Tick(BaseOffset):
# ensure that reversed-ops with numpy scalars return NotImplemented
__array_priority__ = 1000
_adjust_dst = False
Expand All @@ -793,13 +788,25 @@ cdef class _Tick(_BaseOffset):
"Tick offset with `normalize=True` are not allowed."
)

@classmethod
def _from_name(cls, suffix=None):
# default _from_name calls cls with no args
if suffix:
raise ValueError(f"Bad freq suffix {suffix}")
return cls()

def _repr_attrs(self) -> str:
# Since cdef classes have no __dict__, we need to override
return ""

@property
def delta(self):
return self.n * self._inc
from .timedeltas import Timedelta
return self.n * Timedelta(self._nanos_inc)

@property
def nanos(self) -> int64_t:
return self.delta.value
return self.n * self._nanos_inc

def is_on_offset(self, dt) -> bool:
return True
Expand Down Expand Up @@ -837,13 +844,63 @@ cdef class _Tick(_BaseOffset):
return self.delta.__gt__(other)

def __truediv__(self, other):
if not isinstance(self, _Tick):
if not isinstance(self, Tick):
# cython semantics mean the args are sometimes swapped
result = other.delta.__rtruediv__(self)
else:
result = self.delta.__truediv__(other)
return _wrap_timedelta_result(result)

def __add__(self, other):
if not isinstance(self, Tick):
# cython semantics; this is __radd__
return other.__add__(self)

if isinstance(other, Tick):
if type(self) == type(other):
return type(self)(self.n + other.n)
else:
return delta_to_tick(self.delta + other.delta)
try:
return self.apply(other)
except ApplyTypeError:
# Includes pd.Period
return NotImplemented
except OverflowError as err:
raise OverflowError(
f"the add operation between {self} and {other} will overflow"
) from err

def apply(self, other):
# Timestamp can handle tz and nano sec, thus no need to use apply_wraps
if isinstance(other, ABCTimestamp):

# GH#15126
# in order to avoid a recursive
# call of __add__ and __radd__ if there is
# an exception, when we call using the + operator,
# we directly call the known method
result = other.__add__(self)
if result is NotImplemented:
raise OverflowError
return result
elif other is NaT:
return NaT
elif is_datetime64_object(other) or PyDate_Check(other):
# PyDate_Check includes date, datetime
from pandas import Timestamp
return Timestamp(other) + self

if PyDelta_Check(other):
return other + self.delta
elif isinstance(other, type(self)):
# TODO: this is reached in tests that specifically call apply,
# but should not be reached "naturally" because __add__ should
# catch this case first.
return type(self)(self.n + other.n)

raise ApplyTypeError(f"Unhandled type: {type(other).__name__}")

# --------------------------------------------------------------------
# Pickle Methods

Expand All @@ -855,6 +912,67 @@ cdef class _Tick(_BaseOffset):
self.normalize = False


cdef class Day(Tick):
_nanos_inc = 24 * 3600 * 1_000_000_000
_prefix = "D"


cdef class Hour(Tick):
_nanos_inc = 3600 * 1_000_000_000
_prefix = "H"


cdef class Minute(Tick):
_nanos_inc = 60 * 1_000_000_000
_prefix = "T"


cdef class Second(Tick):
_nanos_inc = 1_000_000_000
_prefix = "S"


cdef class Milli(Tick):
_nanos_inc = 1_000_000
_prefix = "L"


cdef class Micro(Tick):
_nanos_inc = 1000
_prefix = "U"


cdef class Nano(Tick):
_nanos_inc = 1
_prefix = "N"


def delta_to_tick(delta: timedelta) -> Tick:
if delta.microseconds == 0 and getattr(delta, "nanoseconds", 0) == 0:
# nanoseconds only for pd.Timedelta
if delta.seconds == 0:
return Day(delta.days)
else:
seconds = delta.days * 86400 + delta.seconds
if seconds % 3600 == 0:
return Hour(seconds / 3600)
elif seconds % 60 == 0:
return Minute(seconds / 60)
else:
return Second(seconds)
else:
nanos = delta_to_nanoseconds(delta)
if nanos % 1_000_000 == 0:
return Milli(nanos // 1_000_000)
elif nanos % 1000 == 0:
return Micro(nanos // 1000)
else: # pragma: no cover
return Nano(nanos)


# --------------------------------------------------------------------


class BusinessMixin(BaseOffset):
"""
Mixin to business types to provide related functions.
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
period as libperiod,
)
from pandas._libs.tslibs.fields import isleapyear_arr
from pandas._libs.tslibs.offsets import Tick, delta_to_tick
from pandas._libs.tslibs.period import (
DIFFERENT_FREQ,
IncompatibleFrequency,
Expand Down Expand Up @@ -45,7 +46,7 @@
import pandas.core.common as com

from pandas.tseries import frequencies
from pandas.tseries.offsets import DateOffset, Tick, delta_to_tick
from pandas.tseries.offsets import DateOffset


def _field_accessor(name: str, alias: int, docstring=None):
Expand Down
8 changes: 5 additions & 3 deletions pandas/tests/tseries/offsets/test_ticks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import numpy as np
import pytest

from pandas._libs.tslibs.offsets import delta_to_tick

from pandas import Timedelta, Timestamp
import pandas._testing as tm

Expand All @@ -33,11 +35,11 @@ def test_apply_ticks():
def test_delta_to_tick():
delta = timedelta(3)

tick = offsets.delta_to_tick(delta)
tick = delta_to_tick(delta)
assert tick == offsets.Day(3)

td = Timedelta(nanoseconds=5)
tick = offsets.delta_to_tick(td)
tick = delta_to_tick(td)
assert tick == Nano(5)


Expand Down Expand Up @@ -234,7 +236,7 @@ def test_tick_division(cls):
assert not isinstance(result, cls)
assert result.delta == off.delta / 1000

if cls._inc < Timedelta(seconds=1):
if cls._nanos_inc < Timedelta(seconds=1).value:
# Case where we end up with a bigger class
result = off / 0.001
assert isinstance(result, offsets.Tick)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tseries/frequencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def to_offset(freq) -> Optional[DateOffset]:
)
stride = int(stride)
offset = _get_offset(name)
offset = offset * int(np.fabs(stride) * stride_sign) # type: ignore
offset = offset * int(np.fabs(stride) * stride_sign)
if delta is None:
delta = offset
else:
Expand Down Expand Up @@ -218,7 +218,7 @@ def _get_offset(name: str) -> DateOffset:
klass = prefix_mapping[split[0]]
# handles case where there's no suffix (and will TypeError if too
# many '-')
offset = klass._from_name(*split[1:]) # type: ignore
offset = klass._from_name(*split[1:])
except (ValueError, TypeError, KeyError) as err:
# bad prefix or suffix
raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(name)) from err
Expand Down
Loading