Skip to content

API: PeriodIndex subtraction to return object Index of DateOffsets #20049

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jun 29, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ Datetimelike API Changes

- For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with non-``None`` ``freq`` attribute, addition or subtraction of integer-dtyped array or ``Index`` will return an object of the same class (:issue:`19959`)
- :class:`DateOffset` objects are now immutable. Attempting to alter one of these will now raise ``AttributeError`` (:issue:`21341`)
- :class:`PeriodIndex` subtraction of another ``PeriodIndex`` will now return an object-dtype :class:`Index` of :class:`DateOffset` objects instead of raising a ``TypeError`` (:issue:`20049`)

.. _whatsnew_0240.api.other:

Expand Down
43 changes: 41 additions & 2 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
import numpy as np

from pandas._libs import lib, iNaT, NaT, Timedelta
from pandas._libs.tslibs.period import Period
from pandas._libs.tslibs.period import (Period, IncompatibleFrequency,
_DIFFERENT_FREQ_INDEX)
from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds
from pandas._libs.tslibs.timestamps import round_ns

Expand Down Expand Up @@ -784,6 +785,41 @@ def _sub_nat(self):
def _sub_period(self, other):
return NotImplemented

def _sub_period_array(self, other):
"""
Subtract one PeriodIndex from another. This is only valid if they
have the same frequency.

Parameters
----------
other : PeriodIndex

Returns
-------
result : np.ndarray[object]
Array of DateOffset objects; nulls represented by NaT
"""
if not is_period_dtype(self):
raise TypeError("cannot subtract {dtype}-dtype to {cls}"
.format(dtype=other.dtype,
cls=type(self).__name__))

if not len(self) == len(other):
raise ValueError("cannot subtract indices of unequal length")
if self.freq != other.freq:
msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr)
raise IncompatibleFrequency(msg)

new_values = checked_add_with_arr(self.asi8, -other.asi8,
arr_mask=self._isnan,
b_mask=other._isnan)

new_values = np.array([self.freq * x for x in new_values])
if self.hasnans or other.hasnans:
mask = (self._isnan) | (other._isnan)
new_values[mask] = NaT
return new_values

def _add_offset(self, offset):
raise com.AbstractMethodError(self)

Expand Down Expand Up @@ -894,7 +930,7 @@ def __add__(self, other):
return self._add_datelike(other)
elif is_integer_dtype(other):
result = self._addsub_int_array(other, operator.add)
elif is_float_dtype(other):
elif is_float_dtype(other) or is_period_dtype(other):
# Explicitly catch invalid dtypes
raise TypeError("cannot add {dtype}-dtype to {cls}"
.format(dtype=other.dtype,
Expand Down Expand Up @@ -953,6 +989,9 @@ def __sub__(self, other):
elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
# DatetimeIndex, ndarray[datetime64]
result = self._sub_datelike(other)
elif is_period_dtype(other):
# PeriodIndex
result = self._sub_period_array(other)
elif is_integer_dtype(other):
result = self._addsub_int_array(other, operator.sub)
elif isinstance(other, Index):
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/indexes/datetimes/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,17 @@ def test_sub_period(self, freq):
with pytest.raises(TypeError):
p - idx

@pytest.mark.parametrize('op', [operator.add, ops.radd,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add (maybe another test) add/sub with a pi and another dtype (I don't think we have this test?)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just checking, you mean to add cases over in the period test_arithmetic file, not here, right?

operator.sub, ops.rsub])
@pytest.mark.parametrize('pi_freq', ['D', 'W', 'Q', 'H'])
@pytest.mark.parametrize('dti_freq', [None, 'D'])
def test_dti_sub_pi(self, dti_freq, pi_freq, op):
# GH#20049 subtracting PeriodIndex should raise TypeError
dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq=dti_freq)
pi = dti.to_period(pi_freq)
with pytest.raises(TypeError):
op(dti, pi)

def test_ufunc_coercions(self):
idx = date_range('2011-01-01', periods=3, freq='2D', name='x')

Expand Down
74 changes: 51 additions & 23 deletions pandas/tests/indexes/period/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,57 @@ def test_comp_nat(self, dtype):


class TestPeriodIndexArithmetic(object):
# ---------------------------------------------------------------
# __add__/__sub__ with PeriodIndex
# PeriodIndex + other is defined for integers and timedelta-like others
# PeriodIndex - other is defined for integers, timedelta-like others,
# and PeriodIndex (with matching freq)

def test_pi_add_iadd_pi_raises(self):
rng = pd.period_range('1/1/2000', freq='D', periods=5)
other = pd.period_range('1/6/2000', freq='D', periods=5)

# An earlier implementation of PeriodIndex addition performed
# a set operation (union). This has since been changed to
# raise a TypeError. See GH#14164 and GH#13077 for historical
# reference.
with pytest.raises(TypeError):
rng + other

with pytest.raises(TypeError):
rng += other

def test_pi_sub_isub_pi(self):
# GH#20049
# For historical reference see GH#14164, GH#13077.
# PeriodIndex subtraction originally performed set difference,
# then changed to raise TypeError before being implemented in GH#20049
rng = pd.period_range('1/1/2000', freq='D', periods=5)
other = pd.period_range('1/6/2000', freq='D', periods=5)

off = rng.freq
expected = pd.Index([-5 * off] * 5)
result = rng - other
tm.assert_index_equal(result, expected)

rng -= other
tm.assert_index_equal(rng, expected)

def test_pi_sub_pi_with_nat(self):
rng = pd.period_range('1/1/2000', freq='D', periods=5)
other = rng[1:].insert(0, pd.NaT)
assert other[1:].equals(rng[1:])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this assert necessary? I don't immediately see why it's needed, but could be missing something. If it is needed, can assert_index_equal be used instead?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is checking that I haven't already screwed up, i.e. that the test below is indeed testing what I think it is testing.


result = rng - other
off = rng.freq
expected = pd.Index([pd.NaT, 0 * off, 0 * off, 0 * off, 0 * off])
tm.assert_index_equal(result, expected)

def test_pi_sub_pi_mismatched_freq(self):
rng = pd.period_range('1/1/2000', freq='D', periods=5)
other = pd.period_range('1/6/2000', freq='H', periods=5)
with pytest.raises(period.IncompatibleFrequency):
rng - other

# -------------------------------------------------------------
# Invalid Operations
Expand Down Expand Up @@ -379,17 +430,6 @@ def test_pi_sub_offset_array(self, box):
with tm.assert_produces_warning(PerformanceWarning):
anchored - pi

def test_pi_add_iadd_pi_raises(self):
rng = pd.period_range('1/1/2000', freq='D', periods=5)
other = pd.period_range('1/6/2000', freq='D', periods=5)

# previously performed setop union, now raises TypeError (GH14164)
with pytest.raises(TypeError):
rng + other

with pytest.raises(TypeError):
rng += other

def test_pi_add_iadd_int(self, one):
# Variants of `one` for #19012
rng = pd.period_range('2000-01-01 09:00', freq='H', periods=10)
Expand Down Expand Up @@ -419,18 +459,6 @@ def test_pi_sub_intlike(self, five):
exp = rng + (-five)
tm.assert_index_equal(result, exp)

def test_pi_sub_isub_pi_raises(self):
# previously performed setop, now raises TypeError (GH14164)
# TODO needs to wait on #13077 for decision on result type
rng = pd.period_range('1/1/2000', freq='D', periods=5)
other = pd.period_range('1/6/2000', freq='D', periods=5)

with pytest.raises(TypeError):
rng - other

with pytest.raises(TypeError):
rng -= other

def test_pi_sub_isub_offset(self):
# offset
# DateOffset
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/indexes/timedeltas/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,18 @@ def test_tdi_sub_period(self, freq):
with pytest.raises(TypeError):
p - idx

@pytest.mark.parametrize('op', [operator.add, ops.radd,
operator.sub, ops.rsub])
@pytest.mark.parametrize('pi_freq', ['D', 'W', 'Q', 'H'])
@pytest.mark.parametrize('tdi_freq', [None, 'H'])
def test_dti_sub_pi(self, tdi_freq, pi_freq, op):
# GH#20049 subtracting PeriodIndex should raise TypeError
tdi = pd.TimedeltaIndex(['1 hours', '2 hours'], freq=tdi_freq)
dti = pd.Timestamp('2018-03-07 17:16:40') + tdi
pi = dti.to_period(pi_freq)
with pytest.raises(TypeError):
op(dti, pi)

# -------------------------------------------------------------
# TimedeltaIndex.shift is used by __add__/__sub__

Expand Down