From 1241994116e8057eee5cb10b73a0db13b2480a0f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 8 Jan 2018 20:24:52 -0800 Subject: [PATCH 1/9] Implement NullFrequencyError to fix exception raised by Series[datetime64]+int --- pandas/core/indexes/datetimelike.py | 10 +++++-- pandas/core/ops.py | 22 ++++++++++++-- pandas/errors/__init__.py | 8 +++++ pandas/tests/series/test_operators.py | 43 +++++++++++++++++++++++---- pandas/tseries/offsets.py | 6 ++-- 5 files changed, 76 insertions(+), 13 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 0ee2f8ebce011..7bb6708e03421 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -32,6 +32,7 @@ from pandas.core import common as com, algorithms from pandas.core.algorithms import checked_add_with_arr from pandas.core.common import AbstractMethodError +from pandas.errors import NullFrequencyError import pandas.io.formats.printing as printing from pandas._libs import lib, iNaT, NaT @@ -692,6 +693,9 @@ def __add__(self, other): return self._add_datelike(other) elif isinstance(other, Index): return self._add_datelike(other) + elif is_integer_dtype(other) and self.freq is None: + # GH#19123 + raise NullFrequencyError("Cannot shift with no freq") else: # pragma: no cover return NotImplemented @@ -731,7 +735,9 @@ def __sub__(self, other): raise TypeError("cannot subtract {typ1} and {typ2}" .format(typ1=type(self).__name__, typ2=type(other).__name__)) - + elif is_integer_dtype(other) and self.freq is None: + # GH#19123 + raise NullFrequencyError("Cannot shift with no freq") else: # pragma: no cover return NotImplemented @@ -831,7 +837,7 @@ def shift(self, n, freq=None): return self if self.freq is None: - raise ValueError("Cannot shift with no freq") + raise NullFrequencyError("Cannot shift with no freq") start = self[0] + n * self.freq end = self[-1] + n * self.freq diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 99f7e7309d463..60382974628f8 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -20,7 +20,7 @@ from pandas.compat import bind_method import pandas.core.missing as missing -from pandas.errors import PerformanceWarning +from pandas.errors import PerformanceWarning, NullFrequencyError from pandas.core.common import _values_from_object, _maybe_match_name from pandas.core.dtypes.missing import notna, isna from pandas.core.dtypes.common import ( @@ -672,9 +672,8 @@ def wrapper(left, right, name=name, na_op=na_op): left, right = _align_method_SERIES(left, right) if is_datetime64_dtype(left) or is_datetime64tz_dtype(left): - result = op(pd.DatetimeIndex(left), right) + result = _dispatch_to_index_op(op, left, right, pd.DatetimeIndex) res_name = _get_series_op_result_name(left, right) - result.name = res_name # needs to be overriden if None return construct_result(left, result, index=left.index, name=res_name, dtype=result.dtype) @@ -703,6 +702,23 @@ def wrapper(left, right, name=name, na_op=na_op): return wrapper +def _dispatch_to_index_op(op, left, right, index_class): + """ + Defer to DatetimeIndex implementations for type + checking and timezone handling. + """ + left_idx = index_class(left) + left_idx.freq = None # avoid accidentally allowing integer add/sub + try: + result = op(left_idx, right) + except NullFrequencyError: + # DatetimeIndex and TimedeltaIndex with freq == None raise ValueError + # on add/sub of integers (or int-like). We re-raise as a TypeError. + raise TypeError('incompatible type for a datetime/timedelta ' + 'operation [{name}]'.format(name=op.__name__)) + return result + + def _get_series_op_result_name(left, right): # `left` is always a pd.Series if isinstance(right, (ABCSeries, pd.Index)): diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index b3d1ce31d66ae..b77bc76204236 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -65,3 +65,11 @@ class MergeError(ValueError): Error raised when problems arise during merging due to problems with input data. Subclass of `ValueError`. """ + + +class NullFrequencyError(ValueError): + """ + Error raised when a null `freq` attribute is used in an operation + that needs a non-null frequency, particularly `DatetimeIndex.shift`, + `TimedeltaIndex.shift`, `PeriodIndex.shift`. + """ diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index dbfeb9715c59e..93c64ca05e5e2 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -680,6 +680,25 @@ def test_timedelta_series_ops(self): assert_series_equal(ts - s, expected2) assert_series_equal(ts + (-s), expected2) + def test_td64series_add_intlike(self): + # GH#19123 + tdi = pd.TimedeltaIndex(['59 days', '59 days', 'NaT']) + ser = Series(tdi) + + other = Series([20, 30, 40], dtype='uint8') + + pytest.raises(TypeError, ser.__add__, 1) + pytest.raises(TypeError, ser.__sub__, 1) + + pytest.raises(TypeError, ser.__add__, other) + pytest.raises(TypeError, ser.__sub__, other) + + pytest.raises(TypeError, ser.__add__, other.values) + pytest.raises(TypeError, ser.__sub__, other.values) + + pytest.raises(TypeError, ser.__add__, pd.Index(other)) + pytest.raises(TypeError, ser.__sub__, pd.Index(other)) + def test_timedelta64_operations_with_integers(self): # GH 4521 # divide/multiply by integers @@ -739,12 +758,6 @@ def test_timedelta64_operations_with_integers(self): Series([Timedelta('29 days 12:00:00'), Timedelta( '29 days 12:00:00'), Timedelta('NaT')])) - for op in ['__add__', '__sub__']: - sop = getattr(s1, op, None) - if sop is not None: - pytest.raises(TypeError, sop, 1) - pytest.raises(TypeError, sop, s2.values) - def test_timedelta64_operations_with_DateOffset(self): # GH 10699 td = Series([timedelta(minutes=5, seconds=3)] * 3) @@ -1428,6 +1441,24 @@ def test_dt64series_arith_overflow(self): res = dt - ser tm.assert_series_equal(res, -expected) + def test_dt64series_add_intlike(self): + # GH#19123 + dti = pd.DatetimeIndex(['2016-01-02', '2016-02-03', 'NaT']) + ser = Series(dti) + + other = Series([20, 30, 40], dtype='uint8') + + pytest.raises(TypeError, ser.__add__, 1) + pytest.raises(TypeError, ser.__sub__, 1) + + pytest.raises(TypeError, ser.__add__, other) + pytest.raises(TypeError, ser.__sub__, other) + + pytest.raises(TypeError, ser.__add__, other.values) + pytest.raises(TypeError, ser.__sub__, other.values) + + pytest.raises(TypeError, ser.__add__, pd.Index(other)) + pytest.raises(TypeError, ser.__sub__, pd.Index(other)) class TestSeriesOperators(TestData): def test_op_method(self): diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 7c5fe2f0314e4..c029b9e42d09a 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1226,7 +1226,8 @@ def _get_roll(self, i, before_day_of_month, after_day_of_month): return roll def _apply_index_days(self, i, roll): - i += (roll % 2) * Timedelta(days=self.day_of_month).value + nanos = (roll % 2) * Timedelta(days=self.day_of_month).value + i += nanos.astype('timedelta64[ns]') return i + Timedelta(days=-1) @@ -1271,7 +1272,8 @@ def _get_roll(self, i, before_day_of_month, after_day_of_month): return roll def _apply_index_days(self, i, roll): - return i + (roll % 2) * Timedelta(days=self.day_of_month - 1).value + nanos = (roll % 2) * Timedelta(days=self.day_of_month - 1).value + return i + nanos.astype('timedelta64[ns]') # --------------------------------------------------------------------- From 1fe5732bf9b57bc641cee291753db597b7184bd4 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 9 Jan 2018 08:34:46 -0800 Subject: [PATCH 2/9] deprivatize, docstring, comment --- pandas/core/ops.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 60382974628f8..4c594e2cb0b3f 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -672,7 +672,7 @@ def wrapper(left, right, name=name, na_op=na_op): left, right = _align_method_SERIES(left, right) if is_datetime64_dtype(left) or is_datetime64tz_dtype(left): - result = _dispatch_to_index_op(op, left, right, pd.DatetimeIndex) + result = dispatch_to_index_op(op, left, right, pd.DatetimeIndex) res_name = _get_series_op_result_name(left, right) return construct_result(left, result, index=left.index, name=res_name, @@ -702,13 +702,29 @@ def wrapper(left, right, name=name, na_op=na_op): return wrapper -def _dispatch_to_index_op(op, left, right, index_class): +def dispatch_to_index_op(op, left, right, index_class): """ - Defer to DatetimeIndex implementations for type - checking and timezone handling. + Wrap Series left in the given index_class to delegate the operation op + to the index implementation. DatetimeIndex and TimedeltaIndex perform + type checking, timezone handling, overflow checks, etc. + + Parameters + ---------- + op : binary operator (operator.add, operator.sub, ...) + left : Series + right : object + index_class : DatetimeIndex or TimedeltaIndex + + Returns + ------- + result : object, usually DatetimeIndex, TimedeltaIndex, or Series """ left_idx = index_class(left) - left_idx.freq = None # avoid accidentally allowing integer add/sub + + # avoid accidentally allowing integer add/sub. For datetime64[tz] dtypes, + # left_idx may inherit a freq from a cached DatetimeIndex. + # See discussion in GH#19147. + left_idx.freq = None try: result = op(left_idx, right) except NullFrequencyError: From 4fd68c149ac0ef1dc52f9ece4f6bb439c2643add Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 10 Jan 2018 12:40:07 -0800 Subject: [PATCH 3/9] Flake8 whitespace fixup --- pandas/tests/series/test_operators.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 8c7f6355aab05..2a6e89fbfac42 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -1460,6 +1460,7 @@ def test_dt64series_add_intlike(self): pytest.raises(TypeError, ser.__add__, pd.Index(other)) pytest.raises(TypeError, ser.__sub__, pd.Index(other)) + class TestSeriesOperators(TestData): def test_op_method(self): def check(series, other, check_reverse=False): From df545a0d65444c685201aa336a0e0c009785d507 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 10 Jan 2018 17:57:31 -0800 Subject: [PATCH 4/9] docstring, tests for NullFrequencyError in shift --- pandas/tests/indexes/datetimes/test_ops.py | 7 ++++++ .../indexes/timedeltas/test_timedelta.py | 7 ++++++ pandas/tseries/offsets.py | 22 +++++++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index a7a6e3caab727..a2a84adbf46c1 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -7,6 +7,7 @@ from itertools import product import pandas as pd +from pandas.errors import NullFrequencyError import pandas._libs.tslib as tslib from pandas._libs.tslibs.offsets import shift_months import pandas.util.testing as tm @@ -593,6 +594,12 @@ def test_nat_new(self): exp = np.array([tslib.iNaT] * 5, dtype=np.int64) tm.assert_numpy_array_equal(result, exp) + def test_shift_no_freq(self): + # GH#19147 + dti = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-01'], freq=None) + with pytest.raises(NullFrequencyError): + dti.shift(2) + def test_shift(self): # GH 9903 for tz in self.tz: diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index e25384ebf7d62..5a4d6dabbde3e 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -4,6 +4,7 @@ from datetime import timedelta import pandas as pd +from pandas.errors import NullFrequencyError import pandas.util.testing as tm from pandas import (timedelta_range, date_range, Series, Timedelta, TimedeltaIndex, Index, DataFrame, @@ -50,6 +51,12 @@ def test_shift(self): '10 days 01:00:03'], freq='D') tm.assert_index_equal(result, expected) + def test_shift_no_freq(self): + # GH#19147 + tdi = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00'], freq=None) + with pytest.raises(NullFrequencyError): + tdi.shift(2) + def test_pickle_compat_construction(self): pass diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 2ae48bbcecd50..b2b513d96c9ac 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1233,6 +1233,17 @@ def _get_roll(self, i, before_day_of_month, after_day_of_month): return roll def _apply_index_days(self, i, roll): + """Add days portion of offset to DatetimeIndex i + + Parameters + ---------- + i : DatetimeIndex + roll : ndarray[int64_t] + + Returns + ------- + result : DatetimeIndex + """ nanos = (roll % 2) * Timedelta(days=self.day_of_month).value i += nanos.astype('timedelta64[ns]') return i + Timedelta(days=-1) @@ -1279,6 +1290,17 @@ def _get_roll(self, i, before_day_of_month, after_day_of_month): return roll def _apply_index_days(self, i, roll): + """Add days portion of offset to DatetimeIndex i + + Parameters + ---------- + i : DatetimeIndex + roll : ndarray[int64_t] + + Returns + ------- + result : DatetimeIndex + """ nanos = (roll % 2) * Timedelta(days=self.day_of_month - 1).value return i + nanos.astype('timedelta64[ns]') From 1a03a68df967ea7c9979d8b5d1f48e76e87f3239 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 11 Jan 2018 10:47:23 -0800 Subject: [PATCH 5/9] dt64_series +/- int test for tzaware --- pandas/tests/series/test_operators.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 2a6e89fbfac42..8a564160e306b 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -680,7 +680,7 @@ def test_timedelta_series_ops(self): assert_series_equal(ts - s, expected2) assert_series_equal(ts + (-s), expected2) - def test_td64series_add_intlike(self): + def test_td64_series_add_intlike(self): # GH#19123 tdi = pd.TimedeltaIndex(['59 days', '59 days', 'NaT']) ser = Series(tdi) @@ -1441,9 +1441,10 @@ def test_dt64series_arith_overflow(self): res = dt - ser tm.assert_series_equal(res, -expected) - def test_dt64series_add_intlike(self): + @pytest.mark.parametrize('tz', [None, 'Asia/Tokyo']) + def test_dt64_series_add_intlike(self, tz): # GH#19123 - dti = pd.DatetimeIndex(['2016-01-02', '2016-02-03', 'NaT']) + dti = pd.DatetimeIndex(['2016-01-02', '2016-02-03', 'NaT'], tz=tz) ser = Series(dti) other = Series([20, 30, 40], dtype='uint8') From fd0ac995b25937552a53db58c624af02ea2ee658 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 12 Jan 2018 10:14:46 -0800 Subject: [PATCH 6/9] fix merge screwup --- pandas/tests/series/test_operators.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index a488134ad5416..c0c0f09e01667 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -847,13 +847,6 @@ def test_td64series_div_numeric_scalar(self, two, tdser): result = tdser / two assert_series_equal(result, expected) - # invalid ops - assert_series_equal(s1 / s2.astype(float), - Series([Timedelta('2 days 22:48:00'), Timedelta( - '1 days 23:12:00'), Timedelta('NaT')])) - assert_series_equal(s1 / 2.0, - Series([Timedelta('29 days 12:00:00'), Timedelta( - '29 days 12:00:00'), Timedelta('NaT')])) class TestTimedeltaSeriesArithmetic(object): def test_td64series_add_sub_timestamp(self): From aab851dce3c8ed7735ebc07d22b93d1ee23baa83 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 14 Jan 2018 18:36:05 -0800 Subject: [PATCH 7/9] shallow_copy instead of changing inplace --- pandas/core/ops.py | 3 ++- pandas/tests/series/test_timeseries.py | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 4c594e2cb0b3f..fc3ea106252db 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -724,7 +724,8 @@ def dispatch_to_index_op(op, left, right, index_class): # avoid accidentally allowing integer add/sub. For datetime64[tz] dtypes, # left_idx may inherit a freq from a cached DatetimeIndex. # See discussion in GH#19147. - left_idx.freq = None + if left_idx.freq is not None: + left_idx = left_idx._shallow_copy(freq=None) try: result = op(left_idx, right) except NullFrequencyError: diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 6e711abf4491b..b9c95c372ab9e 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -11,6 +11,8 @@ import pandas.util._test_decorators as td from pandas._libs.tslib import iNaT from pandas.compat import lrange, StringIO, product +from pandas.errors import NullFrequencyError + from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.indexes.datetimes import DatetimeIndex from pandas.tseries.offsets import BDay, BMonthEnd @@ -123,7 +125,7 @@ def test_shift2(self): tm.assert_index_equal(result.index, exp_index) idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) - pytest.raises(ValueError, idx.shift, 1) + pytest.raises(NullFrequencyError, idx.shift, 1) def test_shift_dst(self): # GH 13926 From 15b5f0827d08c5e0b8830c5ada8d99d3589320f4 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 15 Jan 2018 10:12:56 -0800 Subject: [PATCH 8/9] whatsnew api note --- doc/source/whatsnew/v0.23.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 14949267fc37d..b622a94d8469b 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -273,6 +273,7 @@ Other API Changes - The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`) - ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`) - :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`) +- :func:`DatetimeIndex.shift` and :func:`TimedeltaIndex.shift` will now raise ``NullFrequencyError`` (which subclasses ``ValueError``) when the index object frequency is ``None`` (:issue:`19147`) .. _whatsnew_0230.deprecations: From 63ae03967c20054e4fa9f13b3f7fd055f0471878 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 15 Jan 2018 16:37:52 -0800 Subject: [PATCH 9/9] suggested edit to whatsnew note --- doc/source/whatsnew/v0.23.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index b622a94d8469b..cca147759edbe 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -273,7 +273,7 @@ Other API Changes - The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`) - ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`) - :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`) -- :func:`DatetimeIndex.shift` and :func:`TimedeltaIndex.shift` will now raise ``NullFrequencyError`` (which subclasses ``ValueError``) when the index object frequency is ``None`` (:issue:`19147`) +- :func:`DatetimeIndex.shift` and :func:`TimedeltaIndex.shift` will now raise ``NullFrequencyError`` (which subclasses ``ValueError``, which was raised in older versions) when the index object frequency is ``None`` (:issue:`19147`) .. _whatsnew_0230.deprecations: