Skip to content

implement truediv, rtruediv directly in TimedeltaArray; tests #23829

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Nov 29, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
6ec1f08
implement truediv, rtruediv directly in TimedeltaArray; tests
jbrockmendel Nov 21, 2018
4c2cc59
test tdi/tdi specifically
jbrockmendel Nov 21, 2018
bd2ee96
more checks and test cases
jbrockmendel Nov 21, 2018
3275dd9
Merge branch 'master' of https://github.com/pandas-dev/pandas into gt…
jbrockmendel Nov 21, 2018
79901f5
dont define _override_div_mod_methods, matches for pytest.raises
jbrockmendel Nov 21, 2018
adea273
change comment
jbrockmendel Nov 21, 2018
da9f743
whatsnew, GH references
jbrockmendel Nov 21, 2018
ba9e490
error msg py3 compat
jbrockmendel Nov 21, 2018
10bb49b
Merge branch 'master' of https://github.com/pandas-dev/pandas into gt…
jbrockmendel Nov 21, 2018
8f276ae
flake8 fixup, raise directly
jbrockmendel Nov 21, 2018
7d56da9
Merge branch 'master' of https://github.com/pandas-dev/pandas into gt…
jbrockmendel Nov 24, 2018
6097789
Merge branch 'master' of https://github.com/pandas-dev/pandas into gt…
jbrockmendel Nov 26, 2018
2037be8
sidestep object conversion
jbrockmendel Nov 26, 2018
ffedf35
Merge branch 'master' of https://github.com/pandas-dev/pandas into gt…
jbrockmendel Nov 27, 2018
2fc44aa
dont case result when operating against object dtype
jbrockmendel Nov 27, 2018
cd4ff57
Merge branch 'master' of https://github.com/pandas-dev/pandas into gt…
jbrockmendel Nov 28, 2018
641ad20
another GH reference
jbrockmendel Nov 28, 2018
e0d696f
Merge branch 'master' of https://github.com/pandas-dev/pandas into gt…
jbrockmendel Nov 28, 2018
7d9e677
comment
jbrockmendel Nov 28, 2018
dfc7af4
Merge branch 'master' of https://github.com/pandas-dev/pandas into gt…
jbrockmendel Nov 28, 2018
55cad6b
Fixup rebase mixup, un-skip part of a test that isnt broken after all
jbrockmendel Nov 29, 2018
d21ae78
Merge branch 'master' of https://github.com/pandas-dev/pandas into gt…
jbrockmendel Nov 29, 2018
d72bf90
flake8 fixup
jbrockmendel Nov 29, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1256,7 +1256,7 @@ Timedelta
- Bug in :class:`TimedeltaIndex` where adding a timezone-aware datetime scalar incorrectly returned a timezone-naive :class:`DatetimeIndex` (:issue:`23215`)
- Bug in :class:`TimedeltaIndex` where adding ``np.timedelta64('NaT')`` incorrectly returned an all-`NaT` :class:`DatetimeIndex` instead of an all-`NaT` :class:`TimedeltaIndex` (:issue:`23215`)
- Bug in :class:`Timedelta` and :func:`to_timedelta()` have inconsistencies in supported unit string (:issue:`21762`)

- Bug in :class:`TimedeltaIndex` division where dividing by another :class:`TimedeltaIndex` raised ``TypeError`` instead of returning a :class:`Float64Index` (:issue:`23829`, :issue:`22631`)

Timezones
^^^^^^^^^
Expand Down
102 changes: 99 additions & 3 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import numpy as np

from pandas._libs import algos, tslibs
from pandas._libs import algos, lib, tslibs
from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT
from pandas._libs.tslibs.fields import get_timedelta_field
from pandas._libs.tslibs.timedeltas import (
Expand Down Expand Up @@ -177,7 +177,7 @@ def __new__(cls, values, freq=None, dtype=_TD_DTYPE, copy=False):
passed=freq.freqstr))
elif freq is None:
freq = inferred_freq
freq_infer = False
freq_infer = False

result = cls._simple_new(values, freq=freq)
# check that we are matching freqs
Expand Down Expand Up @@ -355,12 +355,108 @@ def _evaluate_with_timedelta_like(self, other, op):

__mul__ = _wrap_tdi_op(operator.mul)
__rmul__ = __mul__
__truediv__ = _wrap_tdi_op(operator.truediv)
__floordiv__ = _wrap_tdi_op(operator.floordiv)
__rfloordiv__ = _wrap_tdi_op(ops.rfloordiv)

def __truediv__(self, other):
# timedelta / X is well-defined for timedelta-like or numeric X
other = lib.item_from_zerodim(other)

if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
return NotImplemented

if isinstance(other, (timedelta, np.timedelta64, Tick)):
other = Timedelta(other)
if other is NaT:
# specifically timedelta64-NaT
result = np.empty(self.shape, dtype=np.float64)
result.fill(np.nan)
return result

# otherwise, dispatch to Timedelta implementation
return self._data / other

elif lib.is_scalar(other):
# assume it is numeric
result = self._data / other
freq = None
if self.freq is not None:
# Tick division is not implemented, so operate on Timedelta
freq = self.freq.delta / other
return type(self)(result, freq=freq)

if not hasattr(other, "dtype"):
# e.g. list, tuple
other = np.array(other)

if len(other) != len(self):
raise ValueError("Cannot divide vectors with unequal lengths")

elif is_timedelta64_dtype(other):
# let numpy handle it
return self._data / other
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to ensure other is a np.ndarray and not TimedeltaArray here? (meaning, extract the numpy array out of the TimedeltaArray)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. self._data.__div__(other) would return NotImplemented if other were a TimedeltaArray. This PR includes a test that covers TDA/TDA.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but that is another level of redirection, while we know this will happen and can directly do the correct thing here?
(I suppose is_timedelta64_dtype only passes through those two cases of ndarray or TimedeltaArray?)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose is_timedelta64_dtype only passes through those two cases of ndarray or TimedeltaArray?

Yes, since we exclude Series and Index at the start.

Yes, but that is another level of redirection, while we know this will happen and can directly do the correct thing here?

I guess we could replace other with getattr(other, "_data", other) (or if/when TimedeltaArray gets an __array__ method, just np.array(other), which would be prettier)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the (hopefully not too distant future), TimedeltaArray will no longer be an Index. In this case we would want to explicitly grab the ._data out of it and proceed?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And what's the return type here? Does this need to be wrapped in a a type(self) so that we return a TimedeltaArray?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And what's the return type here?

float-dtyped ndarray


elif is_object_dtype(other):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to allow this?
I would be fine with raising a TypeError here.

(I first wanted to say: can't we dispatch that to numpy, thinking that numpy object dtype would handle that, but they raise a TypeError)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can see why this would be first on the chopping block if we had to support fewer cases. Is there a compelling reason not to handle this case?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can also turn around the question :) Is there a compelling reason to do handle this case?

It's just an extra case to support. And eg, we could discuss whether this should return object dtype data or timedelta, as you are inferring now? Looking at Series behaviour with int64 and object integers, it actually returns object. For datetimes it now raises. So at least, our support is at the moment not very consistent.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a compelling reason to do handle this case?

Because a selling point of pandas is that things Just Work? Because the code and tests are already written, so the marginal cost is \approx zero?

our support is at the moment not very consistent

Fair enough. If a goal is to make things more consistent (which I'm +1 on BTW) then we're probably not going to go around and start breaking the places where it currently is supported.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i agree with @jbrockmendel here

# Note: we do not do type inference on the result, so either
# an object array or numeric-dtyped (if numpy does inference)
# will be returned. GH#23829
result = [self[n] / other[n] for n in range(len(self))]
result = np.array(result)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually this is really close to what soft_convert_objects does.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That isn't clear to me. soft_convert_objects doesn't call lib.infer_dtype or any analogue.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you are essentially re-implementing it. i would rather not do that.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not clear on what you have in mind. Something like:

if lib.infer_dtype(result) == 'timedelta':
    result = soft_convert_objects(result, timedelta=True, coerce=False)
    return type(self)(result)
return result

?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough, will change

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changed. this ends up changing the behavior of the DataFrame test case, but that's largely driven by the fact that DataFrame([NaT]) gets inferred as datetime64[ns]

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is changed here? shouldn't is_object_type result in a TypeError or a NotImplemented?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are two independent questions that have been asked about the object-dtype case:

  1. should we just raise TypeError instead or should we handle it so it Just Works (the latter being what this PR does)
  2. Given that we handle this case, do we try to infer the output dtpye or just return object dtype? This PR originally did the former, then changed to do the latter following discussion.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok this is fine, you are returning object dtype (which is consistent with how we do for Series now)

return result

else:
result = self._data / other
return type(self)(result)

def __rtruediv__(self, other):
# X / timedelta is defined only for timedelta-like X
other = lib.item_from_zerodim(other)

if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
return NotImplemented

if isinstance(other, (timedelta, np.timedelta64, Tick)):
other = Timedelta(other)
if other is NaT:
# specifically timedelta64-NaT
result = np.empty(self.shape, dtype=np.float64)
result.fill(np.nan)
return result

# otherwise, dispatch to Timedelta implementation
return other / self._data

elif lib.is_scalar(other):
raise TypeError("Cannot divide {typ} by {cls}"
.format(typ=type(other).__name__,
cls=type(self).__name__))

if not hasattr(other, "dtype"):
# e.g. list, tuple
other = np.array(other)

if len(other) != len(self):
raise ValueError("Cannot divide vectors with unequal lengths")

elif is_timedelta64_dtype(other):
# let numpy handle it
return other / self._data

elif is_object_dtype(other):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or can raise NotImplemented here? does that work?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that might be fragile; might depend on having __array__ implemented. Either way, better to make it explicit than rely on numpy imlpementation

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment as above

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, can you add a comment here (and above), were we do the operation but do not infer the output type (just for posterity), otherwise this PR lgtm. ping on green.

# Note: unlike in __truediv__, we do not _need_ to do type#
# inference on the result. It does not raise, a numeric array
# is returned. GH#23829
result = [other[n] / self[n] for n in range(len(self))]
return np.array(result)

else:
raise TypeError("Cannot divide {dtype} data by {cls}"
.format(dtype=other.dtype,
cls=type(self).__name__))

if compat.PY2:
__div__ = __truediv__
__rdiv__ = __rtruediv__

# Note: TimedeltaIndex overrides this in call to cls._add_numeric_methods
def __neg__(self):
Expand Down
13 changes: 8 additions & 5 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5021,11 +5021,14 @@ def _add_numeric_methods_binary(cls):
cls.__mod__ = _make_arithmetic_op(operator.mod, cls)
cls.__floordiv__ = _make_arithmetic_op(operator.floordiv, cls)
cls.__rfloordiv__ = _make_arithmetic_op(ops.rfloordiv, cls)
cls.__truediv__ = _make_arithmetic_op(operator.truediv, cls)
cls.__rtruediv__ = _make_arithmetic_op(ops.rtruediv, cls)
if not compat.PY3:
cls.__div__ = _make_arithmetic_op(operator.div, cls)
cls.__rdiv__ = _make_arithmetic_op(ops.rdiv, cls)

if not issubclass(cls, ABCTimedeltaIndex):
# GH#23829 TimedeltaIndex defines these directly
cls.__truediv__ = _make_arithmetic_op(operator.truediv, cls)
cls.__rtruediv__ = _make_arithmetic_op(ops.rtruediv, cls)
if not compat.PY3:
cls.__div__ = _make_arithmetic_op(operator.div, cls)
cls.__rdiv__ = _make_arithmetic_op(ops.rdiv, cls)

cls.__divmod__ = _make_arithmetic_op(divmod, cls)

Expand Down
23 changes: 20 additions & 3 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,11 +248,8 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):

__mul__ = Index.__mul__
__rmul__ = Index.__rmul__
__truediv__ = Index.__truediv__
__floordiv__ = Index.__floordiv__
__rfloordiv__ = Index.__rfloordiv__
if compat.PY2:
__div__ = Index.__div__

days = wrap_field_accessor(TimedeltaArray.days)
seconds = wrap_field_accessor(TimedeltaArray.seconds)
Expand All @@ -261,6 +258,26 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):

total_seconds = wrap_array_method(TimedeltaArray.total_seconds, True)

def __truediv__(self, other):
oth = other
if isinstance(other, Index):
# TimedeltaArray defers, so we need to unwrap
oth = other._values
result = TimedeltaArray.__truediv__(self, oth)
return wrap_arithmetic_op(self, other, result)

def __rtruediv__(self, other):
oth = other
if isinstance(other, Index):
# TimedeltaArray defers, so we need to unwrap
oth = other._values
result = TimedeltaArray.__rtruediv__(self, oth)
return wrap_arithmetic_op(self, other, result)

if compat.PY2:
__div__ = __truediv__
__rdiv__ = __rtruediv__

# Compat for frequency inference, see GH#23789
_is_monotonic_increasing = Index.is_monotonic_increasing
_is_monotonic_decreasing = Index.is_monotonic_decreasing
Expand Down
Loading