Skip to content

API: PeriodIndex subtraction to return object Index of DateOffsets #20049

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jun 29, 2018
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -710,6 +710,7 @@ Other API Changes
- ``Categorical.fillna`` now validates its ``value`` and ``method`` keyword arguments. It now raises when both or none are specified, matching the behavior of :meth:`Series.fillna` (:issue:`19682`)
- ``pd.to_datetime('today')`` now returns a datetime, consistent with ``pd.Timestamp('today')``; previously ``pd.to_datetime('today')`` returned a ``.normalized()`` datetime (:issue:`19935`)
- :func:`Series.str.replace` now takes an optional `regex` keyword which, when set to ``False``, uses literal string replacement rather than regex replacement (:issue:`16808`)
- :class:`PeriodIndex` subtraction of another ``PeriodIndex`` will now return a numeric ``Index`` instead of raising a ``TypeErrorr`` (:issue:`20049`)
Copy link
Member

@jschendel jschendel Mar 8, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extra "r" at the end of "TypeErrorr"


.. _whatsnew_0230.deprecations:

Expand Down
45 changes: 43 additions & 2 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
import numpy as np

from pandas._libs import lib, iNaT, NaT
from pandas._libs.tslibs.period import Period
from pandas._libs.tslibs.period import (Period, IncompatibleFrequency,
_DIFFERENT_FREQ_INDEX)
from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds
from pandas._libs.tslibs.timestamps import round_ns

Expand Down Expand Up @@ -667,6 +668,43 @@ def _sub_nat(self):
def _sub_period(self, other):
return NotImplemented

def _sub_period_array(self, other):
"""
Subtract one PeriodIndex from another. This is only valid if they
have the same frequency.

Parameters
----------
other : PeriodIndex

Returns
-------
result : np.ndarray
dtype is np.int64 if there are no NaTs in self or other,
otherwise np.float64
"""
if not is_period_dtype(self):
raise TypeError("cannot subtract {dtype}-dtype to {cls}"
.format(dtype=other.dtype,
cls=type(self).__name__))

if not len(self) == len(other):
raise ValueError("cannot add indices of unequal length")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might have missed it, but is there a test that hits this? And should it say "subtract" instead of "add"?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catches on both counts. Will fix.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just pushed with fix to add-->subtract. test_pi_sub_pi, test_pi_sub_pi_with_nat, test_pi_sub_isub_pi each go through this method, though I don't think any go through this particular line

if self.freq != other.freq:
msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr)
raise IncompatibleFrequency(msg)

new_values = checked_add_with_arr(self.asi8, -other.asi8,
arr_mask=self._isnan,
b_mask=other._isnan)
new_values = new_values
if self.hasnans or other.hasnans:
# we have to convert to float to represent NaNs
mask = (self._isnan) | (other._isnan)
new_values = new_values.astype(np.float64)
new_values[mask] = np.nan
return new_values

def _add_offset(self, offset):
raise com.AbstractMethodError(self)

Expand Down Expand Up @@ -740,7 +778,7 @@ def __add__(self, other):
elif is_integer_dtype(other) and self.freq is None:
# GH#19123
raise NullFrequencyError("Cannot shift with no freq")
elif is_float_dtype(other):
elif is_float_dtype(other) or is_period_dtype(other):
# Explicitly catch invalid dtypes
raise TypeError("cannot add {dtype}-dtype to {cls}"
.format(dtype=other.dtype,
Expand Down Expand Up @@ -799,6 +837,9 @@ def __sub__(self, other):
elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
# DatetimeIndex, ndarray[datetime64]
result = self._sub_datelike(other)
elif is_period_dtype(other):
# PeriodIndex
result = self._sub_period_array(other)
elif isinstance(other, Index):
raise TypeError("cannot subtract {cls} and {typ}"
.format(cls=type(self).__name__,
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/indexes/datetimes/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,17 @@ def test_sub_period(self, freq):
with pytest.raises(TypeError):
p - idx

@pytest.mark.parametrize('op', [operator.add, ops.radd,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add (maybe another test) add/sub with a pi and another dtype (I don't think we have this test?)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just checking, you mean to add cases over in the period test_arithmetic file, not here, right?

operator.sub, ops.rsub])
@pytest.mark.parametrize('pi_freq', ['D', 'W', 'Q', 'H'])
@pytest.mark.parametrize('dti_freq', [None, 'D'])
def test_dti_sub_pi(self, dti_freq, pi_freq, op):
# GH#20049subtracting PeriodIndex should raise TypeError
dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq=dti_freq)
pi = dti.to_period(pi_freq)
with pytest.raises(TypeError):
op(dti, pi)

def test_ufunc_coercions(self):
idx = date_range('2011-01-01', periods=3, freq='2D', name='x')

Expand Down
63 changes: 45 additions & 18 deletions pandas/tests/indexes/period/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,42 @@ def test_comp_nat(self, dtype):


class TestPeriodIndexArithmetic(object):
# ---------------------------------------------------------------
# __add__/__sub__ with PeriodIndex

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you give a 1-2 line on what are allowed ops on PI

def test_pi_add_iadd_pi_raises(self):
rng = pd.period_range('1/1/2000', freq='D', periods=5)
other = pd.period_range('1/6/2000', freq='D', periods=5)

# previously performed setop union, now raises TypeError (GH14164)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a pretty old comment, can you improve

with pytest.raises(TypeError):
rng + other

with pytest.raises(TypeError):
rng += other

def test_pi_sub_pi(self):
rng = pd.period_range('1/1/2000', freq='D', periods=5)
other = pd.period_range('1/6/2000', freq='D', periods=5)

result = rng - other
expected = pd.Int64Index([-5, -5, -5, -5, -5])
tm.assert_index_equal(result, expected)

def test_pi_sub_pi_with_nat(self):
rng = pd.period_range('1/1/2000', freq='D', periods=5)
other = rng[1:].insert(0, pd.NaT)
assert other[1:].equals(rng[1:])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this assert necessary? I don't immediately see why it's needed, but could be missing something. If it is needed, can assert_index_equal be used instead?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is checking that I haven't already screwed up, i.e. that the test below is indeed testing what I think it is testing.


result = rng - other
expected = pd.Float64Index([np.nan, 0, 0, 0, 0])
tm.assert_index_equal(result, expected)

def test_pi_sub_pi_mismatched_freq(self):
rng = pd.period_range('1/1/2000', freq='D', periods=5)
other = pd.period_range('1/6/2000', freq='H', periods=5)
with pytest.raises(period.IncompatibleFrequency):
rng - other

# -------------------------------------------------------------
# Invalid Operations
Expand Down Expand Up @@ -379,17 +415,6 @@ def test_pi_sub_offset_array(self, box):
with tm.assert_produces_warning(PerformanceWarning):
anchored - pi

def test_pi_add_iadd_pi_raises(self):
rng = pd.period_range('1/1/2000', freq='D', periods=5)
other = pd.period_range('1/6/2000', freq='D', periods=5)

# previously performed setop union, now raises TypeError (GH14164)
with pytest.raises(TypeError):
rng + other

with pytest.raises(TypeError):
rng += other

def test_pi_add_iadd_int(self, one):
# Variants of `one` for #19012
rng = pd.period_range('2000-01-01 09:00', freq='H', periods=10)
Expand Down Expand Up @@ -419,17 +444,19 @@ def test_pi_sub_intlike(self, five):
exp = rng + (-five)
tm.assert_index_equal(result, exp)

def test_pi_sub_isub_pi_raises(self):
# previously performed setop, now raises TypeError (GH14164)
# TODO needs to wait on #13077 for decision on result type
def test_pi_sub_isub_pi(self):
# GH#20049
# previously raised TypeError (GH#14164), before that
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you refresh comment

# performed set operation. See discussion in GH#13077
rng = pd.period_range('1/1/2000', freq='D', periods=5)
other = pd.period_range('1/6/2000', freq='D', periods=5)

with pytest.raises(TypeError):
rng - other
expected = pd.Int64Index([-5, -5, -5, -5, -5])
result = rng - other
tm.assert_index_equal(result, expected)

with pytest.raises(TypeError):
rng -= other
rng -= other
tm.assert_index_equal(rng, expected)

def test_pi_sub_isub_offset(self):
# offset
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/indexes/timedeltas/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,18 @@ def test_tdi_sub_period(self, freq):
with pytest.raises(TypeError):
p - idx

@pytest.mark.parametrize('op', [operator.add, ops.radd,
operator.sub, ops.rsub])
@pytest.mark.parametrize('pi_freq', ['D', 'W', 'Q', 'H'])
@pytest.mark.parametrize('tdi_freq', [None, 'H'])
def test_dti_sub_pi(self, tdi_freq, pi_freq, op):
# GH#20049 subtracting PeriodIndex should raise TypeError
tdi = pd.TimedeltaIndex(['1 hours', '2 hours'], freq=tdi_freq)
dti = pd.Timestamp('2018-03-07 17:16:40') + tdi
pi = dti.to_period(pi_freq)
with pytest.raises(TypeError):
op(dti, pi)

# -------------------------------------------------------------
# TimedeltaIndex.shift is used by __add__/__sub__

Expand Down