Skip to content

Implement mul, floordiv, mod, divmod, and reversed directly in TimedeltaArray #23885

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Dec 3, 2018
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
2f75de9
implement floordiv, rfloordiv, mod, rmod, dimod, rdivmod directly in …
jbrockmendel Nov 23, 2018
749cfed
Merge branch 'master' of https://github.com/pandas-dev/pandas into ht…
jbrockmendel Nov 23, 2018
1515e21
Merge branch 'master' of https://github.com/pandas-dev/pandas into ht…
jbrockmendel Nov 24, 2018
d7c727c
define __mul__, __rmul__ directly in TimedeltaArray
jbrockmendel Nov 24, 2018
8ae9059
erroy handling
jbrockmendel Nov 24, 2018
2577ee6
Merge branch 'master' of https://github.com/pandas-dev/pandas into ht…
jbrockmendel Nov 24, 2018
62d2018
Merge branch 'master' of https://github.com/pandas-dev/pandas into ht…
jbrockmendel Nov 24, 2018
8f7b7b4
Merge branch 'master' of https://github.com/pandas-dev/pandas into ht…
jbrockmendel Nov 28, 2018
6d162d8
Merge branch 'master' of https://github.com/pandas-dev/pandas into ht…
jbrockmendel Nov 29, 2018
ef25750
remove no-longer-necessary
jbrockmendel Nov 29, 2018
bc5a3d6
Merge branch 'master' of https://github.com/pandas-dev/pandas into ht…
jbrockmendel Nov 29, 2018
700c5a2
update error message
jbrockmendel Nov 29, 2018
d7591b6
remove unused import
jbrockmendel Nov 29, 2018
bdb27ec
Merge branch 'master' of https://github.com/pandas-dev/pandas into ht…
jbrockmendel Dec 2, 2018
87f36ac
item_from_zerodim
jbrockmendel Dec 3, 2018
987eecd
Merge branch 'master' of https://github.com/pandas-dev/pandas into ht…
jbrockmendel Dec 3, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
207 changes: 199 additions & 8 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import numpy as np

from pandas._libs import tslibs
from pandas._libs import algos, lib, tslibs
from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT
from pandas._libs.tslibs.fields import get_timedelta_field
from pandas._libs.tslibs.timedeltas import (
Expand All @@ -17,14 +17,13 @@

from pandas.core.dtypes.common import (
_TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype,
is_integer_dtype, is_list_like, is_object_dtype, is_string_dtype,
is_timedelta64_dtype)
is_integer_dtype, is_list_like, is_object_dtype, is_scalar,
is_string_dtype, is_timedelta64_dtype)
from pandas.core.dtypes.generic import (
ABCDataFrame, ABCIndexClass, ABCSeries, ABCTimedeltaIndex)
from pandas.core.dtypes.missing import isna

from pandas.core import ops
from pandas.core.algorithms import checked_add_with_arr
from pandas.core.algorithms import checked_add_with_arr, unique1d
import pandas.core.common as com

from pandas.tseries.frequencies import to_offset
Expand Down Expand Up @@ -227,6 +226,18 @@ def _validate_fill_value(self, fill_value):
"Got '{got}'.".format(got=fill_value))
return fill_value

@property
def is_monotonic_increasing(self):
return algos.is_monotonic(self.asi8, timelike=True)[0]

@property
def is_monotonic_decreasing(self):
return algos.is_monotonic(self.asi8, timelike=True)[1]

@property
def is_unique(self):
return len(unique1d(self.asi8)) == len(self)

# ----------------------------------------------------------------
# Arithmetic Methods

Expand Down Expand Up @@ -322,15 +333,195 @@ def _evaluate_with_timedelta_like(self, other, op):

return NotImplemented

__mul__ = _wrap_tdi_op(operator.mul)
def __mul__(self, other):
other = lib.item_from_zerodim(other)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so you don't do this everywhere?

maybe add a function that you call to avoid repeating code

def _prepare_other(other):
   other = lib.item_from_zerodim(other)
   if is_list_like(other) and not hasattr(other, "dtype"):
            # list, tuple
            other = np.array(other)
   return other

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I opened #23853 for exactly this reason. It merits a dedicated PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right, but now you are missing some code here on several of the operators (from_zero_dim), so maybe better to fix now.


if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
return NotImplemented

if is_scalar(other):
# numpy will accept float and int, raise TypeError for others
result = self._data * other
freq = None
if self.freq is not None and not isna(other):
freq = self.freq * other
return type(self)(result, freq=freq)

if not hasattr(other, "dtype"):
# list, tuple
other = np.array(other)
if len(other) != len(self) and not is_timedelta64_dtype(other):
# Exclude timedelta64 here so we correctly raise TypeError
# for that instead of ValueError
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lots of this impl matches how this is done in Timedelta. too bad can't easily share.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After DTA/TDA are done I might look at this more seriously.

raise ValueError("Cannot multiply with unequal lengths")

if is_object_dtype(other):
# this multiplication will succeed only if all elements of other
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

might be useful to put this in a method that can be shared across multiple ops

# are int or float scalars, so we will end up with
# timedelta64[ns]-dtyped result
result = [self[n] * other[n] for n in range(len(self))]
result = np.array(result)
return type(self)(result)

# numpy will accept float or int dtype, raise TypeError for others
result = self._data * other
return type(self)(result)

__rmul__ = __mul__
__truediv__ = _wrap_tdi_op(operator.truediv)
__floordiv__ = _wrap_tdi_op(operator.floordiv)
__rfloordiv__ = _wrap_tdi_op(ops.rfloordiv)

if compat.PY2:
__div__ = __truediv__

def __floordiv__(self, other):
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
return NotImplemented

if is_scalar(other):
if isinstance(other, (timedelta, np.timedelta64, Tick)):
other = Timedelta(other)
if other is NaT:
# treat this specifically as timedelta-NaT
result = np.empty(self.shape, dtype=np.float64)
result.fill(np.nan)
return result

# dispatch to Timedelta implementation
result = other.__rfloordiv__(self._data)
return result

# at this point we should only have numeric scalars; anything
# else will raise
result = self.asi8 // other
result[self._isnan] = iNaT
freq = None
if self.freq is not None:
# Note: freq gets division, not floor-division
freq = self.freq / other
return type(self)(result.view('m8[ns]'), freq=freq)

if not hasattr(other, "dtype"):
# list, tuple
other = np.array(other)
if len(other) != len(self):
raise ValueError("Cannot divide with unequal lengths")

elif is_timedelta64_dtype(other):
other = type(self)(other)

# numpy timedelta64 does not natively support floordiv, so operate
# on the i8 values
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this looks an awful lot like _maybe_mask_missing (in Index)

result = self.asi8 // other.asi8
mask = self._isnan | other._isnan
if mask.any():
result = result.astype(np.int64)
result[mask] = np.nan
return result

elif is_object_dtype(other):
result = [self[n] // other[n] for n in range(len(self))]
result = np.array(result)
if lib.infer_dtype(result) == 'timedelta':
result, _ = sequence_to_td64ns(result)
return type(self)(result)
return result

elif is_integer_dtype(other) or is_float_dtype(other):
result = self._data // other
return type(self)(result)

else:
dtype = getattr(other, "dtype", type(other).__name__)
raise TypeError("Cannot divide {typ} by {cls}"
.format(typ=dtype, cls=type(self).__name__))

def __rfloordiv__(self, other):
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this share with floordiv?

return NotImplemented

if is_scalar(other):
if isinstance(other, (timedelta, np.timedelta64, Tick)):
other = Timedelta(other)
if other is NaT:
# treat this specifically as timedelta-NaT
result = np.empty(self.shape, dtype=np.float64)
result.fill(np.nan)
return result

# dispatch to Timedelta implementation
result = other.__floordiv__(self._data)
return result

raise TypeError("Cannot divide {typ} by {cls}"
.format(typ=type(other).__name__,
cls=type(self).__name__))

if not hasattr(other, "dtype"):
# list, tuple
other = np.array(other)
if len(other) != len(self):
raise ValueError("Cannot divide with unequal lengths")

elif is_timedelta64_dtype(other):
other = type(self)(other)

# numpy timedelta64 does not natively support floordiv, so operate
# on the i8 values
result = other.asi8 // self.asi8
mask = self._isnan | other._isnan
if mask.any():
result = result.astype(np.int64)
result[mask] = np.nan
return result

elif is_object_dtype(other):
result = [other[n] // self[n] for n in range(len(self))]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any nice way to remove some of this duplication (floordir / div), maybe with some helper functions.

result = np.array(result)
return result

else:
dtype = getattr(other, "dtype", type(other).__name__)
raise TypeError("Cannot divide {typ} by {cls}"
.format(typ=dtype, cls=type(self).__name__))

def __mod__(self, other):
# Note: This is a naive implementation, can likely be optimized
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
return NotImplemented
if isinstance(other, (timedelta, np.timedelta64, Tick)):
other = Timedelta(other)
return self - (self // other) * other

def __rmod__(self, other):
# Note: This is a naive implementation, can likely be optimized
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
return NotImplemented
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this share with __mod__?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really. For div and divmod either timedeltas or numeric are valid. For reversed ops only timedeltas are valid.

if isinstance(other, (timedelta, np.timedelta64, Tick)):
other = Timedelta(other)
return other - (other // self) * self

def __divmod__(self, other):
# Note: This is a naive implementation, can likely be optimized
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
return NotImplemented
if isinstance(other, (timedelta, np.timedelta64, Tick)):
other = Timedelta(other)

res1 = self // other
res2 = self - res1 * other
return res1, res2

def __rdivmod__(self, other):
# Note: This is a naive implementation, can likely be optimized
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this share with __divmod__?

if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
return NotImplemented
if isinstance(other, (timedelta, np.timedelta64, Tick)):
other = Timedelta(other)

res1 = other // self
res2 = other - res1 * self
return res1, res2

# Note: TimedeltaIndex overrides this in call to cls._add_numeric_methods
def __neg__(self):
if self.freq is not None:
Expand Down
14 changes: 8 additions & 6 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4863,20 +4863,22 @@ def _add_numeric_methods_binary(cls):
cls.__radd__ = _make_arithmetic_op(ops.radd, cls)
cls.__sub__ = _make_arithmetic_op(operator.sub, cls)
cls.__rsub__ = _make_arithmetic_op(ops.rsub, cls)
cls.__mul__ = _make_arithmetic_op(operator.mul, cls)
cls.__rmul__ = _make_arithmetic_op(ops.rmul, cls)
cls.__rpow__ = _make_arithmetic_op(ops.rpow, cls)
cls.__pow__ = _make_arithmetic_op(operator.pow, cls)
cls.__mod__ = _make_arithmetic_op(operator.mod, cls)
cls.__floordiv__ = _make_arithmetic_op(operator.floordiv, cls)
cls.__rfloordiv__ = _make_arithmetic_op(ops.rfloordiv, cls)
cls.__truediv__ = _make_arithmetic_op(operator.truediv, cls)
cls.__rtruediv__ = _make_arithmetic_op(ops.rtruediv, cls)
if not compat.PY3:
cls.__div__ = _make_arithmetic_op(operator.div, cls)
cls.__rdiv__ = _make_arithmetic_op(ops.rdiv, cls)

cls.__divmod__ = _make_arithmetic_op(divmod, cls)
if not issubclass(cls, ABCTimedeltaIndex):
# TODO: rmod? rdivmod?
cls.__mod__ = _make_arithmetic_op(operator.mod, cls)
cls.__floordiv__ = _make_arithmetic_op(operator.floordiv, cls)
cls.__rfloordiv__ = _make_arithmetic_op(ops.rfloordiv, cls)
cls.__divmod__ = _make_arithmetic_op(divmod, cls)
cls.__mul__ = _make_arithmetic_op(operator.mul, cls)
cls.__rmul__ = _make_arithmetic_op(ops.rmul, cls)

@classmethod
def _add_numeric_methods_unary(cls):
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,12 @@ def wrap_arithmetic_op(self, other, result):
if result is NotImplemented:
return NotImplemented

if isinstance(result, tuple):
# divmod, rdivmod
assert len(result) == 2
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would this bubble up in any way?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean the whole isinstance block or the len-2 assertion? The former is necessary, the latter is just protecting against me being a dummy.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Referring just to the len-2. While I appreciate dummy-protection, I would still want to know if that assert would surface for end-users, that's all.

return (wrap_arithmetic_op(self, other, result[0]),
wrap_arithmetic_op(self, other, result[1]))

if not isinstance(result, Index):
# Index.__new__ will choose appropriate subclass for dtype
result = Index(result)
Expand Down
29 changes: 25 additions & 4 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,22 @@
from pandas.tseries.frequencies import to_offset


def _make_wrapped_arith_op(opname):

meth = getattr(TimedeltaArray, opname)

def method(self, other):
oth = other
if isinstance(other, Index):
oth = other._data

result = meth(self, oth)
return wrap_arithmetic_op(self, other, result)

method.__name__ = opname
return method


class TimedeltaIndex(TimedeltaArray, DatetimeIndexOpsMixin,
TimelikeOps, Int64Index):
"""
Expand Down Expand Up @@ -227,14 +243,19 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):
# -------------------------------------------------------------------
# Wrapping TimedeltaArray

__mul__ = Index.__mul__
__rmul__ = Index.__rmul__
__truediv__ = Index.__truediv__
__floordiv__ = Index.__floordiv__
__rfloordiv__ = Index.__rfloordiv__
if compat.PY2:
__div__ = Index.__div__

__mul__ = _make_wrapped_arith_op("__mul__")
__rmul__ = _make_wrapped_arith_op("__rmul__")
__floordiv__ = _make_wrapped_arith_op("__floordiv__")
__rfloordiv__ = _make_wrapped_arith_op("__rfloordiv__")
__mod__ = _make_wrapped_arith_op("__mod__")
__rmod__ = _make_wrapped_arith_op("__rmod__")
__divmod__ = _make_wrapped_arith_op("__divmod__")
__rdivmod__ = _make_wrapped_arith_op("__rdivmod__")

days = wrap_field_accessor(TimedeltaArray.days)
seconds = wrap_field_accessor(TimedeltaArray.seconds)
microseconds = wrap_field_accessor(TimedeltaArray.microseconds)
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1404,8 +1404,7 @@ def wrapper(left, right):
elif is_timedelta64_dtype(left):
result = dispatch_to_index_op(op, left, right, pd.TimedeltaIndex)
return construct_result(left, result,
index=left.index, name=res_name,
dtype=result.dtype)
index=left.index, name=res_name)

elif is_timedelta64_dtype(right):
# We should only get here with non-scalar or timedelta64('NaT')
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arithmetic/test_datetime64.py
Original file line number Diff line number Diff line change
Expand Up @@ -1188,7 +1188,7 @@ def check(get_ser, test_ser):
# with 'operate' (from core/ops.py) for the ops that are not
# defined
op = getattr(get_ser, op_str, None)
with pytest.raises(TypeError, match='operate|cannot'):
with pytest.raises(TypeError, match='operate|[cC]annot'):
op(test_ser)

# ## timedelta64 ###
Expand Down
Loading