Skip to content

Implement mul, floordiv, mod, divmod, and reversed directly in TimedeltaArray #23885

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Dec 3, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
2f75de9
implement floordiv, rfloordiv, mod, rmod, dimod, rdivmod directly in …
jbrockmendel Nov 23, 2018
749cfed
Merge branch 'master' of https://github.com/pandas-dev/pandas into ht…
jbrockmendel Nov 23, 2018
1515e21
Merge branch 'master' of https://github.com/pandas-dev/pandas into ht…
jbrockmendel Nov 24, 2018
d7c727c
define __mul__, __rmul__ directly in TimedeltaArray
jbrockmendel Nov 24, 2018
8ae9059
erroy handling
jbrockmendel Nov 24, 2018
2577ee6
Merge branch 'master' of https://github.com/pandas-dev/pandas into ht…
jbrockmendel Nov 24, 2018
62d2018
Merge branch 'master' of https://github.com/pandas-dev/pandas into ht…
jbrockmendel Nov 24, 2018
8f7b7b4
Merge branch 'master' of https://github.com/pandas-dev/pandas into ht…
jbrockmendel Nov 28, 2018
6d162d8
Merge branch 'master' of https://github.com/pandas-dev/pandas into ht…
jbrockmendel Nov 29, 2018
ef25750
remove no-longer-necessary
jbrockmendel Nov 29, 2018
bc5a3d6
Merge branch 'master' of https://github.com/pandas-dev/pandas into ht…
jbrockmendel Nov 29, 2018
700c5a2
update error message
jbrockmendel Nov 29, 2018
d7591b6
remove unused import
jbrockmendel Nov 29, 2018
bdb27ec
Merge branch 'master' of https://github.com/pandas-dev/pandas into ht…
jbrockmendel Dec 2, 2018
87f36ac
item_from_zerodim
jbrockmendel Dec 3, 2018
987eecd
Merge branch 'master' of https://github.com/pandas-dev/pandas into ht…
jbrockmendel Dec 3, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
244 changes: 191 additions & 53 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from __future__ import division

from datetime import timedelta
import operator
import warnings

import numpy as np
Expand All @@ -17,13 +16,12 @@

from pandas.core.dtypes.common import (
_TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype,
is_integer_dtype, is_list_like, is_object_dtype, is_string_dtype,
is_timedelta64_dtype)
is_integer_dtype, is_list_like, is_object_dtype, is_scalar,
is_string_dtype, is_timedelta64_dtype)
from pandas.core.dtypes.generic import (
ABCDataFrame, ABCIndexClass, ABCSeries, ABCTimedeltaIndex)
from pandas.core.dtypes.missing import isna

from pandas.core import ops
from pandas.core.algorithms import checked_add_with_arr, unique1d
import pandas.core.common as com

Expand Down Expand Up @@ -106,29 +104,6 @@ def wrapper(self, other):
return compat.set_function_name(wrapper, opname, cls)


def _wrap_tdi_op(op):
"""
Instead of re-implementing multiplication/division etc operations
in the Array class, for now we dispatch to the TimedeltaIndex
implementations.
"""
# TODO: implement directly here and wrap in TimedeltaIndex, instead of
# the other way around
def method(self, other):
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
return NotImplemented

from pandas import TimedeltaIndex
obj = TimedeltaIndex(self)
result = op(obj, other)
if is_timedelta64_dtype(result):
return type(self)(result)
return np.array(result)

method.__name__ = '__{name}__'.format(name=op.__name__)
return method


class TimedeltaArrayMixin(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps):
_typ = "timedeltaarray"
__array_priority__ = 1000
Expand Down Expand Up @@ -332,37 +307,41 @@ def _addsub_offset_array(self, other, op):
raise TypeError("Cannot add/subtract non-tick DateOffset to {cls}"
.format(cls=type(self).__name__))

def _evaluate_with_timedelta_like(self, other, op):
if isinstance(other, ABCSeries):
# GH#19042
def __mul__(self, other):
other = lib.item_from_zerodim(other)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so you don't do this everywhere?

maybe add a function that you call to avoid repeating code

def _prepare_other(other):
   other = lib.item_from_zerodim(other)
   if is_list_like(other) and not hasattr(other, "dtype"):
            # list, tuple
            other = np.array(other)
   return other

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I opened #23853 for exactly this reason. It merits a dedicated PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right, but now you are missing some code here on several of the operators (from_zero_dim), so maybe better to fix now.


if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
return NotImplemented

opstr = '__{opname}__'.format(opname=op.__name__).replace('__r', '__')
# allow division by a timedelta
if opstr in ['__div__', '__truediv__', '__floordiv__']:
if _is_convertible_to_td(other):
other = Timedelta(other)
if isna(other):
raise NotImplementedError(
"division by pd.NaT not implemented")

i8 = self.asi8
left, right = i8, other.value

if opstr in ['__floordiv__']:
result = op(left, right)
else:
result = op(left, np.float64(right))
result = self._maybe_mask_results(result, fill_value=None,
convert='float64')
return result
if is_scalar(other):
# numpy will accept float and int, raise TypeError for others
result = self._data * other
freq = None
if self.freq is not None and not isna(other):
freq = self.freq * other
return type(self)(result, freq=freq)

if not hasattr(other, "dtype"):
# list, tuple
other = np.array(other)
if len(other) != len(self) and not is_timedelta64_dtype(other):
# Exclude timedelta64 here so we correctly raise TypeError
# for that instead of ValueError
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lots of this impl matches how this is done in Timedelta. too bad can't easily share.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After DTA/TDA are done I might look at this more seriously.

raise ValueError("Cannot multiply with unequal lengths")

if is_object_dtype(other):
# this multiplication will succeed only if all elements of other
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

might be useful to put this in a method that can be shared across multiple ops

# are int or float scalars, so we will end up with
# timedelta64[ns]-dtyped result
result = [self[n] * other[n] for n in range(len(self))]
result = np.array(result)
return type(self)(result)

return NotImplemented
# numpy will accept float or int dtype, raise TypeError for others
result = self._data * other
return type(self)(result)

__mul__ = _wrap_tdi_op(operator.mul)
__rmul__ = __mul__
__floordiv__ = _wrap_tdi_op(operator.floordiv)
__rfloordiv__ = _wrap_tdi_op(ops.rfloordiv)

def __truediv__(self, other):
# timedelta / X is well-defined for timedelta-like or numeric X
Expand Down Expand Up @@ -464,6 +443,165 @@ def __rtruediv__(self, other):
__div__ = __truediv__
__rdiv__ = __rtruediv__

def __floordiv__(self, other):
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
return NotImplemented

other = lib.item_from_zerodim(other)
if is_scalar(other):
if isinstance(other, (timedelta, np.timedelta64, Tick)):
other = Timedelta(other)
if other is NaT:
# treat this specifically as timedelta-NaT
result = np.empty(self.shape, dtype=np.float64)
result.fill(np.nan)
return result

# dispatch to Timedelta implementation
result = other.__rfloordiv__(self._data)
return result

# at this point we should only have numeric scalars; anything
# else will raise
result = self.asi8 // other
result[self._isnan] = iNaT
freq = None
if self.freq is not None:
# Note: freq gets division, not floor-division
freq = self.freq / other
return type(self)(result.view('m8[ns]'), freq=freq)

if not hasattr(other, "dtype"):
# list, tuple
other = np.array(other)
if len(other) != len(self):
raise ValueError("Cannot divide with unequal lengths")

elif is_timedelta64_dtype(other):
other = type(self)(other)

# numpy timedelta64 does not natively support floordiv, so operate
# on the i8 values
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this looks an awful lot like _maybe_mask_missing (in Index)

result = self.asi8 // other.asi8
mask = self._isnan | other._isnan
if mask.any():
result = result.astype(np.int64)
result[mask] = np.nan
return result

elif is_object_dtype(other):
result = [self[n] // other[n] for n in range(len(self))]
result = np.array(result)
if lib.infer_dtype(result) == 'timedelta':
result, _ = sequence_to_td64ns(result)
return type(self)(result)
return result

elif is_integer_dtype(other) or is_float_dtype(other):
result = self._data // other
return type(self)(result)

else:
dtype = getattr(other, "dtype", type(other).__name__)
raise TypeError("Cannot divide {typ} by {cls}"
.format(typ=dtype, cls=type(self).__name__))

def __rfloordiv__(self, other):
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this share with floordiv?

return NotImplemented

other = lib.item_from_zerodim(other)
if is_scalar(other):
if isinstance(other, (timedelta, np.timedelta64, Tick)):
other = Timedelta(other)
if other is NaT:
# treat this specifically as timedelta-NaT
result = np.empty(self.shape, dtype=np.float64)
result.fill(np.nan)
return result

# dispatch to Timedelta implementation
result = other.__floordiv__(self._data)
return result

raise TypeError("Cannot divide {typ} by {cls}"
.format(typ=type(other).__name__,
cls=type(self).__name__))

if not hasattr(other, "dtype"):
# list, tuple
other = np.array(other)
if len(other) != len(self):
raise ValueError("Cannot divide with unequal lengths")

elif is_timedelta64_dtype(other):
other = type(self)(other)

# numpy timedelta64 does not natively support floordiv, so operate
# on the i8 values
result = other.asi8 // self.asi8
mask = self._isnan | other._isnan
if mask.any():
result = result.astype(np.int64)
result[mask] = np.nan
return result

elif is_object_dtype(other):
result = [other[n] // self[n] for n in range(len(self))]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any nice way to remove some of this duplication (floordir / div), maybe with some helper functions.

result = np.array(result)
return result

else:
dtype = getattr(other, "dtype", type(other).__name__)
raise TypeError("Cannot divide {typ} by {cls}"
.format(typ=dtype, cls=type(self).__name__))

def __mod__(self, other):
# Note: This is a naive implementation, can likely be optimized
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
return NotImplemented

other = lib.item_from_zerodim(other)
if isinstance(other, (timedelta, np.timedelta64, Tick)):
other = Timedelta(other)
return self - (self // other) * other

def __rmod__(self, other):
# Note: This is a naive implementation, can likely be optimized
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
return NotImplemented
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this share with __mod__?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really. For div and divmod either timedeltas or numeric are valid. For reversed ops only timedeltas are valid.


other = lib.item_from_zerodim(other)
if isinstance(other, (timedelta, np.timedelta64, Tick)):
other = Timedelta(other)
return other - (other // self) * self

def __divmod__(self, other):
# Note: This is a naive implementation, can likely be optimized
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
return NotImplemented

other = lib.item_from_zerodim(other)
if isinstance(other, (timedelta, np.timedelta64, Tick)):
other = Timedelta(other)

res1 = self // other
res2 = self - res1 * other
return res1, res2

def __rdivmod__(self, other):
# Note: This is a naive implementation, can likely be optimized
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this share with __divmod__?

if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
return NotImplemented

other = lib.item_from_zerodim(other)
if isinstance(other, (timedelta, np.timedelta64, Tick)):
other = Timedelta(other)

res1 = other // self
res2 = other - res1 * self
return res1, res2

# Note: TimedeltaIndex overrides this in call to cls._add_numeric_methods
def __neg__(self):
if self.freq is not None:
Expand Down
21 changes: 10 additions & 11 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5031,23 +5031,22 @@ def _add_numeric_methods_binary(cls):
cls.__radd__ = _make_arithmetic_op(ops.radd, cls)
cls.__sub__ = _make_arithmetic_op(operator.sub, cls)
cls.__rsub__ = _make_arithmetic_op(ops.rsub, cls)
cls.__mul__ = _make_arithmetic_op(operator.mul, cls)
cls.__rmul__ = _make_arithmetic_op(ops.rmul, cls)
cls.__rpow__ = _make_arithmetic_op(ops.rpow, cls)
cls.__pow__ = _make_arithmetic_op(operator.pow, cls)

cls.__truediv__ = _make_arithmetic_op(operator.truediv, cls)
cls.__rtruediv__ = _make_arithmetic_op(ops.rtruediv, cls)
if not compat.PY3:
cls.__div__ = _make_arithmetic_op(operator.div, cls)
cls.__rdiv__ = _make_arithmetic_op(ops.rdiv, cls)

# TODO: rmod? rdivmod?
cls.__mod__ = _make_arithmetic_op(operator.mod, cls)
cls.__floordiv__ = _make_arithmetic_op(operator.floordiv, cls)
cls.__rfloordiv__ = _make_arithmetic_op(ops.rfloordiv, cls)

if not issubclass(cls, ABCTimedeltaIndex):
# GH#23829 TimedeltaIndex defines these directly
cls.__truediv__ = _make_arithmetic_op(operator.truediv, cls)
cls.__rtruediv__ = _make_arithmetic_op(ops.rtruediv, cls)
if not compat.PY3:
cls.__div__ = _make_arithmetic_op(operator.div, cls)
cls.__rdiv__ = _make_arithmetic_op(ops.rdiv, cls)

cls.__divmod__ = _make_arithmetic_op(divmod, cls)
cls.__mul__ = _make_arithmetic_op(operator.mul, cls)
cls.__rmul__ = _make_arithmetic_op(ops.rmul, cls)

@classmethod
def _add_numeric_methods_unary(cls):
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,12 @@ def wrap_arithmetic_op(self, other, result):
if result is NotImplemented:
return NotImplemented

if isinstance(result, tuple):
# divmod, rdivmod
assert len(result) == 2
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would this bubble up in any way?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean the whole isinstance block or the len-2 assertion? The former is necessary, the latter is just protecting against me being a dummy.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Referring just to the len-2. While I appreciate dummy-protection, I would still want to know if that assert would surface for end-users, that's all.

return (wrap_arithmetic_op(self, other, result[0]),
wrap_arithmetic_op(self, other, result[1]))

if not isinstance(result, Index):
# Index.__new__ will choose appropriate subclass for dtype
result = Index(result)
Expand Down
37 changes: 27 additions & 10 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,24 @@
from pandas.tseries.frequencies import to_offset


class TimedeltaIndex(TimedeltaArray, DatetimeIndexOpsMixin, Int64Index):
def _make_wrapped_arith_op(opname):

meth = getattr(TimedeltaArray, opname)

def method(self, other):
oth = other
if isinstance(other, Index):
oth = other._data

result = meth(self, oth)
return wrap_arithmetic_op(self, other, result)

method.__name__ = opname
return method


class TimedeltaIndex(TimedeltaArray, DatetimeIndexOpsMixin,
dtl.TimelikeOps, Int64Index):
"""
Immutable ndarray of timedelta64 data, represented internally as int64, and
which can be boxed to timedelta objects
Expand Down Expand Up @@ -203,10 +220,6 @@ def _maybe_update_attributes(self, attrs):
attrs['freq'] = 'infer'
return attrs

def _evaluate_with_timedelta_like(self, other, op):
result = TimedeltaArray._evaluate_with_timedelta_like(self, other, op)
return wrap_arithmetic_op(self, other, result)

# -------------------------------------------------------------------
# Rendering Methods

Expand All @@ -224,10 +237,14 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):
# -------------------------------------------------------------------
# Wrapping TimedeltaArray

__mul__ = Index.__mul__
__rmul__ = Index.__rmul__
__floordiv__ = Index.__floordiv__
__rfloordiv__ = Index.__rfloordiv__
__mul__ = _make_wrapped_arith_op("__mul__")
__rmul__ = _make_wrapped_arith_op("__rmul__")
__floordiv__ = _make_wrapped_arith_op("__floordiv__")
__rfloordiv__ = _make_wrapped_arith_op("__rfloordiv__")
__mod__ = _make_wrapped_arith_op("__mod__")
__rmod__ = _make_wrapped_arith_op("__rmod__")
__divmod__ = _make_wrapped_arith_op("__divmod__")
__rdivmod__ = _make_wrapped_arith_op("__rdivmod__")

days = wrap_field_accessor(TimedeltaArray.days)
seconds = wrap_field_accessor(TimedeltaArray.seconds)
Expand Down Expand Up @@ -658,7 +675,7 @@ def delete(self, loc):


TimedeltaIndex._add_comparison_ops()
TimedeltaIndex._add_numeric_methods()
TimedeltaIndex._add_numeric_methods_unary()
TimedeltaIndex._add_logical_methods_disabled()
TimedeltaIndex._add_datetimelike_methods()

Expand Down
Loading