-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Implement mul, floordiv, mod, divmod, and reversed directly in TimedeltaArray #23885
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2f75de9
749cfed
1515e21
d7c727c
8ae9059
2577ee6
62d2018
8f7b7b4
6d162d8
ef25750
bc5a3d6
700c5a2
d7591b6
bdb27ec
87f36ac
987eecd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,6 @@ | |
from __future__ import division | ||
|
||
from datetime import timedelta | ||
import operator | ||
import warnings | ||
|
||
import numpy as np | ||
|
@@ -17,13 +16,12 @@ | |
|
||
from pandas.core.dtypes.common import ( | ||
_TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype, | ||
is_integer_dtype, is_list_like, is_object_dtype, is_string_dtype, | ||
is_timedelta64_dtype) | ||
is_integer_dtype, is_list_like, is_object_dtype, is_scalar, | ||
is_string_dtype, is_timedelta64_dtype) | ||
from pandas.core.dtypes.generic import ( | ||
ABCDataFrame, ABCIndexClass, ABCSeries, ABCTimedeltaIndex) | ||
from pandas.core.dtypes.missing import isna | ||
|
||
from pandas.core import ops | ||
from pandas.core.algorithms import checked_add_with_arr, unique1d | ||
import pandas.core.common as com | ||
|
||
|
@@ -106,29 +104,6 @@ def wrapper(self, other): | |
return compat.set_function_name(wrapper, opname, cls) | ||
|
||
|
||
def _wrap_tdi_op(op): | ||
""" | ||
Instead of re-implementing multiplication/division etc operations | ||
in the Array class, for now we dispatch to the TimedeltaIndex | ||
implementations. | ||
""" | ||
# TODO: implement directly here and wrap in TimedeltaIndex, instead of | ||
# the other way around | ||
def method(self, other): | ||
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): | ||
return NotImplemented | ||
|
||
from pandas import TimedeltaIndex | ||
obj = TimedeltaIndex(self) | ||
result = op(obj, other) | ||
if is_timedelta64_dtype(result): | ||
return type(self)(result) | ||
return np.array(result) | ||
|
||
method.__name__ = '__{name}__'.format(name=op.__name__) | ||
return method | ||
|
||
|
||
class TimedeltaArrayMixin(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps): | ||
_typ = "timedeltaarray" | ||
__array_priority__ = 1000 | ||
|
@@ -332,37 +307,41 @@ def _addsub_offset_array(self, other, op): | |
raise TypeError("Cannot add/subtract non-tick DateOffset to {cls}" | ||
.format(cls=type(self).__name__)) | ||
|
||
def _evaluate_with_timedelta_like(self, other, op): | ||
if isinstance(other, ABCSeries): | ||
# GH#19042 | ||
def __mul__(self, other): | ||
other = lib.item_from_zerodim(other) | ||
|
||
if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)): | ||
return NotImplemented | ||
|
||
opstr = '__{opname}__'.format(opname=op.__name__).replace('__r', '__') | ||
# allow division by a timedelta | ||
if opstr in ['__div__', '__truediv__', '__floordiv__']: | ||
if _is_convertible_to_td(other): | ||
other = Timedelta(other) | ||
if isna(other): | ||
raise NotImplementedError( | ||
"division by pd.NaT not implemented") | ||
|
||
i8 = self.asi8 | ||
left, right = i8, other.value | ||
|
||
if opstr in ['__floordiv__']: | ||
result = op(left, right) | ||
else: | ||
result = op(left, np.float64(right)) | ||
result = self._maybe_mask_results(result, fill_value=None, | ||
convert='float64') | ||
return result | ||
if is_scalar(other): | ||
# numpy will accept float and int, raise TypeError for others | ||
result = self._data * other | ||
freq = None | ||
if self.freq is not None and not isna(other): | ||
freq = self.freq * other | ||
return type(self)(result, freq=freq) | ||
|
||
if not hasattr(other, "dtype"): | ||
# list, tuple | ||
other = np.array(other) | ||
if len(other) != len(self) and not is_timedelta64_dtype(other): | ||
# Exclude timedelta64 here so we correctly raise TypeError | ||
# for that instead of ValueError | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lots of this impl matches how this is done in Timedelta. too bad can't easily share. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After DTA/TDA are done I might look at this more seriously. |
||
raise ValueError("Cannot multiply with unequal lengths") | ||
|
||
if is_object_dtype(other): | ||
# this multiplication will succeed only if all elements of other | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. might be useful to put this in a method that can be shared across multiple ops |
||
# are int or float scalars, so we will end up with | ||
# timedelta64[ns]-dtyped result | ||
result = [self[n] * other[n] for n in range(len(self))] | ||
result = np.array(result) | ||
return type(self)(result) | ||
|
||
return NotImplemented | ||
# numpy will accept float or int dtype, raise TypeError for others | ||
result = self._data * other | ||
return type(self)(result) | ||
|
||
__mul__ = _wrap_tdi_op(operator.mul) | ||
__rmul__ = __mul__ | ||
__floordiv__ = _wrap_tdi_op(operator.floordiv) | ||
__rfloordiv__ = _wrap_tdi_op(ops.rfloordiv) | ||
|
||
def __truediv__(self, other): | ||
# timedelta / X is well-defined for timedelta-like or numeric X | ||
|
@@ -464,6 +443,165 @@ def __rtruediv__(self, other): | |
__div__ = __truediv__ | ||
__rdiv__ = __rtruediv__ | ||
|
||
def __floordiv__(self, other): | ||
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): | ||
return NotImplemented | ||
|
||
other = lib.item_from_zerodim(other) | ||
if is_scalar(other): | ||
if isinstance(other, (timedelta, np.timedelta64, Tick)): | ||
other = Timedelta(other) | ||
if other is NaT: | ||
# treat this specifically as timedelta-NaT | ||
result = np.empty(self.shape, dtype=np.float64) | ||
result.fill(np.nan) | ||
return result | ||
|
||
# dispatch to Timedelta implementation | ||
result = other.__rfloordiv__(self._data) | ||
return result | ||
|
||
# at this point we should only have numeric scalars; anything | ||
# else will raise | ||
result = self.asi8 // other | ||
result[self._isnan] = iNaT | ||
freq = None | ||
if self.freq is not None: | ||
# Note: freq gets division, not floor-division | ||
freq = self.freq / other | ||
return type(self)(result.view('m8[ns]'), freq=freq) | ||
|
||
if not hasattr(other, "dtype"): | ||
# list, tuple | ||
other = np.array(other) | ||
if len(other) != len(self): | ||
raise ValueError("Cannot divide with unequal lengths") | ||
|
||
elif is_timedelta64_dtype(other): | ||
other = type(self)(other) | ||
|
||
# numpy timedelta64 does not natively support floordiv, so operate | ||
# on the i8 values | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this looks an awful lot like _maybe_mask_missing (in Index) |
||
result = self.asi8 // other.asi8 | ||
mask = self._isnan | other._isnan | ||
if mask.any(): | ||
result = result.astype(np.int64) | ||
result[mask] = np.nan | ||
return result | ||
|
||
elif is_object_dtype(other): | ||
result = [self[n] // other[n] for n in range(len(self))] | ||
result = np.array(result) | ||
if lib.infer_dtype(result) == 'timedelta': | ||
result, _ = sequence_to_td64ns(result) | ||
return type(self)(result) | ||
return result | ||
|
||
elif is_integer_dtype(other) or is_float_dtype(other): | ||
result = self._data // other | ||
return type(self)(result) | ||
|
||
else: | ||
dtype = getattr(other, "dtype", type(other).__name__) | ||
raise TypeError("Cannot divide {typ} by {cls}" | ||
.format(typ=dtype, cls=type(self).__name__)) | ||
|
||
def __rfloordiv__(self, other): | ||
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can this share with floordiv? |
||
return NotImplemented | ||
|
||
other = lib.item_from_zerodim(other) | ||
if is_scalar(other): | ||
if isinstance(other, (timedelta, np.timedelta64, Tick)): | ||
other = Timedelta(other) | ||
if other is NaT: | ||
# treat this specifically as timedelta-NaT | ||
result = np.empty(self.shape, dtype=np.float64) | ||
result.fill(np.nan) | ||
return result | ||
|
||
# dispatch to Timedelta implementation | ||
result = other.__floordiv__(self._data) | ||
return result | ||
|
||
raise TypeError("Cannot divide {typ} by {cls}" | ||
.format(typ=type(other).__name__, | ||
cls=type(self).__name__)) | ||
|
||
if not hasattr(other, "dtype"): | ||
# list, tuple | ||
other = np.array(other) | ||
if len(other) != len(self): | ||
raise ValueError("Cannot divide with unequal lengths") | ||
|
||
elif is_timedelta64_dtype(other): | ||
other = type(self)(other) | ||
|
||
# numpy timedelta64 does not natively support floordiv, so operate | ||
# on the i8 values | ||
result = other.asi8 // self.asi8 | ||
mask = self._isnan | other._isnan | ||
if mask.any(): | ||
result = result.astype(np.int64) | ||
result[mask] = np.nan | ||
return result | ||
|
||
elif is_object_dtype(other): | ||
result = [other[n] // self[n] for n in range(len(self))] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. any nice way to remove some of this duplication (floordir / div), maybe with some helper functions. |
||
result = np.array(result) | ||
return result | ||
|
||
else: | ||
dtype = getattr(other, "dtype", type(other).__name__) | ||
raise TypeError("Cannot divide {typ} by {cls}" | ||
.format(typ=dtype, cls=type(self).__name__)) | ||
|
||
def __mod__(self, other): | ||
# Note: This is a naive implementation, can likely be optimized | ||
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): | ||
return NotImplemented | ||
|
||
other = lib.item_from_zerodim(other) | ||
if isinstance(other, (timedelta, np.timedelta64, Tick)): | ||
other = Timedelta(other) | ||
return self - (self // other) * other | ||
|
||
def __rmod__(self, other): | ||
# Note: This is a naive implementation, can likely be optimized | ||
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): | ||
return NotImplemented | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can this share with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not really. For div and divmod either timedeltas or numeric are valid. For reversed ops only timedeltas are valid. |
||
|
||
other = lib.item_from_zerodim(other) | ||
if isinstance(other, (timedelta, np.timedelta64, Tick)): | ||
other = Timedelta(other) | ||
return other - (other // self) * self | ||
|
||
def __divmod__(self, other): | ||
# Note: This is a naive implementation, can likely be optimized | ||
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): | ||
return NotImplemented | ||
|
||
other = lib.item_from_zerodim(other) | ||
if isinstance(other, (timedelta, np.timedelta64, Tick)): | ||
other = Timedelta(other) | ||
|
||
res1 = self // other | ||
res2 = self - res1 * other | ||
return res1, res2 | ||
|
||
def __rdivmod__(self, other): | ||
# Note: This is a naive implementation, can likely be optimized | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can this share with |
||
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): | ||
return NotImplemented | ||
|
||
other = lib.item_from_zerodim(other) | ||
if isinstance(other, (timedelta, np.timedelta64, Tick)): | ||
other = Timedelta(other) | ||
|
||
res1 = other // self | ||
res2 = other - res1 * self | ||
return res1, res2 | ||
|
||
# Note: TimedeltaIndex overrides this in call to cls._add_numeric_methods | ||
def __neg__(self): | ||
if self.freq is not None: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -573,6 +573,12 @@ def wrap_arithmetic_op(self, other, result): | |
if result is NotImplemented: | ||
return NotImplemented | ||
|
||
if isinstance(result, tuple): | ||
# divmod, rdivmod | ||
assert len(result) == 2 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would this bubble up in any way? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you mean the whole There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Referring just to the |
||
return (wrap_arithmetic_op(self, other, result[0]), | ||
wrap_arithmetic_op(self, other, result[1])) | ||
|
||
if not isinstance(result, Index): | ||
# Index.__new__ will choose appropriate subclass for dtype | ||
result = Index(result) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
so you don't do this everywhere?
maybe add a function that you call to avoid repeating code
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I opened #23853 for exactly this reason. It merits a dedicated PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
right, but now you are missing some code here on several of the operators (from_zero_dim), so maybe better to fix now.