From 986fdbc3a8872f80873fd119f672fddf4b4206b6 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 19 Nov 2018 06:55:49 -0800 Subject: [PATCH 01/16] unrelated, change _window->libwindow --- pandas/core/window.py | 55 ++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index f7a60527602a1..1e512d9dec5ba 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -33,7 +33,7 @@ from pandas.core.base import PandasObject, SelectionMixin from pandas.core.groupby.base import GroupByMixin import pandas.core.common as com -import pandas._libs.window as _window +import pandas._libs.window as libwindow from pandas import compat from pandas.compat.numpy import function as nv @@ -688,10 +688,10 @@ def _apply_window(self, mean=True, **kwargs): def f(arg, *args, **kwargs): minp = _use_window(self.min_periods, len(window)) - return _window.roll_window(np.concatenate((arg, - additional_nans)) - if center else arg, window, minp, - avg=mean) + return libwindow.roll_window(np.concatenate((arg, + additional_nans)) + if center else arg, window, minp, + avg=mean) result = np.apply_along_axis(f, self.axis, values) @@ -851,7 +851,7 @@ def _apply(self, func, name=None, window=None, center=None, cfunc = getattr(_window, func, None) if cfunc is None: raise ValueError("we do not support this function " - "in _window.{0}".format(func)) + "in libwindow.{func}".format(func=func)) def func(arg, window, min_periods=None, closed=None): minp = check_minp(min_periods, window) @@ -995,7 +995,7 @@ def f(arg, window, min_periods, closed): minp = _use_window(min_periods, window) if not raw: arg = Series(arg, index=self.obj.index) - return _window.roll_generic( + return libwindow.roll_generic( arg, window, minp, indexi, closed, offset, func, raw, args, kwargs) @@ -1160,8 +1160,8 @@ def std(self, ddof=1, *args, **kwargs): def f(arg, *args, **kwargs): minp = _require_min_periods(1)(self.min_periods, window) - return _zsqrt(_window.roll_var(arg, window, minp, indexi, - self.closed, ddof)) + return _zsqrt(libwindow.roll_var(arg, window, minp, indexi, + self.closed, ddof)) return self._apply(f, 'std', check_minp=_require_min_periods(1), ddof=ddof, **kwargs) @@ -1331,15 +1331,15 @@ def quantile(self, quantile, interpolation='linear', **kwargs): def f(arg, *args, **kwargs): minp = _use_window(self.min_periods, window) if quantile == 1.0: - return _window.roll_max(arg, window, minp, indexi, - self.closed) + return libwindow.roll_max(arg, window, minp, indexi, + self.closed) elif quantile == 0.0: - return _window.roll_min(arg, window, minp, indexi, - self.closed) + return libwindow.roll_min(arg, window, minp, indexi, + self.closed) else: - return _window.roll_quantile(arg, window, minp, indexi, - self.closed, quantile, - interpolation) + return libwindow.roll_quantile(arg, window, minp, indexi, + self.closed, quantile, + interpolation) return self._apply(f, 'quantile', quantile=quantile, **kwargs) @@ -2265,7 +2265,7 @@ def _apply(self, func, **kwargs): cfunc = getattr(_window, func, None) if cfunc is None: raise ValueError("we do not support this function " - "in _window.{0}".format(func)) + "in libwindow.{func}".format(func=func)) def func(arg): return cfunc(arg, self.com, int(self.adjust), @@ -2300,9 +2300,9 @@ def var(self, bias=False, *args, **kwargs): nv.validate_window_func('var', args, kwargs) def f(arg): - return _window.ewmcov(arg, arg, self.com, int(self.adjust), - int(self.ignore_na), int(self.min_periods), - int(bias)) + return libwindow.ewmcov(arg, arg, self.com, int(self.adjust), + int(self.ignore_na), int(self.min_periods), + int(bias)) return self._apply(f, **kwargs) @@ -2320,9 +2320,10 @@ def cov(self, other=None, pairwise=None, bias=False, **kwargs): def _get_cov(X, Y): X = self._shallow_copy(X) Y = self._shallow_copy(Y) - cov = _window.ewmcov(X._prep_values(), Y._prep_values(), self.com, - int(self.adjust), int(self.ignore_na), - int(self.min_periods), int(bias)) + cov = libwindow.ewmcov(X._prep_values(), Y._prep_values(), + self.com, int(self.adjust), + int(self.ignore_na), int(self.min_periods), + int(bias)) return X._wrap_result(cov) return _flex_binary_moment(self._selected_obj, other._selected_obj, @@ -2344,10 +2345,10 @@ def _get_corr(X, Y): Y = self._shallow_copy(Y) def _cov(x, y): - return _window.ewmcov(x, y, self.com, int(self.adjust), - int(self.ignore_na), - int(self.min_periods), - 1) + return libwindow.ewmcov(x, y, self.com, int(self.adjust), + int(self.ignore_na), + int(self.min_periods), + 1) x_values = X._prep_values() y_values = Y._prep_values() From fd759315d17b5883faed6cefc1d34a3a8b23af12 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 19 Nov 2018 09:50:21 -0800 Subject: [PATCH 02/16] revert non-central --- pandas/core/window.py | 55 +++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 1e512d9dec5ba..f7a60527602a1 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -33,7 +33,7 @@ from pandas.core.base import PandasObject, SelectionMixin from pandas.core.groupby.base import GroupByMixin import pandas.core.common as com -import pandas._libs.window as libwindow +import pandas._libs.window as _window from pandas import compat from pandas.compat.numpy import function as nv @@ -688,10 +688,10 @@ def _apply_window(self, mean=True, **kwargs): def f(arg, *args, **kwargs): minp = _use_window(self.min_periods, len(window)) - return libwindow.roll_window(np.concatenate((arg, - additional_nans)) - if center else arg, window, minp, - avg=mean) + return _window.roll_window(np.concatenate((arg, + additional_nans)) + if center else arg, window, minp, + avg=mean) result = np.apply_along_axis(f, self.axis, values) @@ -851,7 +851,7 @@ def _apply(self, func, name=None, window=None, center=None, cfunc = getattr(_window, func, None) if cfunc is None: raise ValueError("we do not support this function " - "in libwindow.{func}".format(func=func)) + "in _window.{0}".format(func)) def func(arg, window, min_periods=None, closed=None): minp = check_minp(min_periods, window) @@ -995,7 +995,7 @@ def f(arg, window, min_periods, closed): minp = _use_window(min_periods, window) if not raw: arg = Series(arg, index=self.obj.index) - return libwindow.roll_generic( + return _window.roll_generic( arg, window, minp, indexi, closed, offset, func, raw, args, kwargs) @@ -1160,8 +1160,8 @@ def std(self, ddof=1, *args, **kwargs): def f(arg, *args, **kwargs): minp = _require_min_periods(1)(self.min_periods, window) - return _zsqrt(libwindow.roll_var(arg, window, minp, indexi, - self.closed, ddof)) + return _zsqrt(_window.roll_var(arg, window, minp, indexi, + self.closed, ddof)) return self._apply(f, 'std', check_minp=_require_min_periods(1), ddof=ddof, **kwargs) @@ -1331,15 +1331,15 @@ def quantile(self, quantile, interpolation='linear', **kwargs): def f(arg, *args, **kwargs): minp = _use_window(self.min_periods, window) if quantile == 1.0: - return libwindow.roll_max(arg, window, minp, indexi, - self.closed) + return _window.roll_max(arg, window, minp, indexi, + self.closed) elif quantile == 0.0: - return libwindow.roll_min(arg, window, minp, indexi, - self.closed) + return _window.roll_min(arg, window, minp, indexi, + self.closed) else: - return libwindow.roll_quantile(arg, window, minp, indexi, - self.closed, quantile, - interpolation) + return _window.roll_quantile(arg, window, minp, indexi, + self.closed, quantile, + interpolation) return self._apply(f, 'quantile', quantile=quantile, **kwargs) @@ -2265,7 +2265,7 @@ def _apply(self, func, **kwargs): cfunc = getattr(_window, func, None) if cfunc is None: raise ValueError("we do not support this function " - "in libwindow.{func}".format(func=func)) + "in _window.{0}".format(func)) def func(arg): return cfunc(arg, self.com, int(self.adjust), @@ -2300,9 +2300,9 @@ def var(self, bias=False, *args, **kwargs): nv.validate_window_func('var', args, kwargs) def f(arg): - return libwindow.ewmcov(arg, arg, self.com, int(self.adjust), - int(self.ignore_na), int(self.min_periods), - int(bias)) + return _window.ewmcov(arg, arg, self.com, int(self.adjust), + int(self.ignore_na), int(self.min_periods), + int(bias)) return self._apply(f, **kwargs) @@ -2320,10 +2320,9 @@ def cov(self, other=None, pairwise=None, bias=False, **kwargs): def _get_cov(X, Y): X = self._shallow_copy(X) Y = self._shallow_copy(Y) - cov = libwindow.ewmcov(X._prep_values(), Y._prep_values(), - self.com, int(self.adjust), - int(self.ignore_na), int(self.min_periods), - int(bias)) + cov = _window.ewmcov(X._prep_values(), Y._prep_values(), self.com, + int(self.adjust), int(self.ignore_na), + int(self.min_periods), int(bias)) return X._wrap_result(cov) return _flex_binary_moment(self._selected_obj, other._selected_obj, @@ -2345,10 +2344,10 @@ def _get_corr(X, Y): Y = self._shallow_copy(Y) def _cov(x, y): - return libwindow.ewmcov(x, y, self.com, int(self.adjust), - int(self.ignore_na), - int(self.min_periods), - 1) + return _window.ewmcov(x, y, self.com, int(self.adjust), + int(self.ignore_na), + int(self.min_periods), + 1) x_values = X._prep_values() y_values = Y._prep_values() From 66c866bd8e00d5ed1e6f29a9fe6b1dc5eb26ae3e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 19 Nov 2018 10:29:25 -0800 Subject: [PATCH 03/16] implement remaining methods to fix dateoffset arithmetic with DTA/TDA --- pandas/_libs/tslibs/timestamps.pyx | 6 +- pandas/core/arrays/timedeltas.py | 40 +- pandas/core/indexes/timedeltas.py | 4 + pandas/tests/arithmetic/test_datetime64.py | 442 ++++++++++---------- pandas/tests/arithmetic/test_timedelta64.py | 4 - pandas/tseries/offsets.py | 50 ++- 6 files changed, 300 insertions(+), 246 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index f6a6257f92e7c..0bbccaa622dde 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -376,13 +376,15 @@ cdef class _Timestamp(datetime): neg_other = -other return self + neg_other + typ = getattr(other, '_typ', None) + # a Timestamp-DatetimeIndex -> yields a negative TimedeltaIndex - elif getattr(other, '_typ', None) == 'datetimeindex': + if typ == ('datetimeindex', 'datetimearray'): # timezone comparison is performed in DatetimeIndex._sub_datelike return -other.__sub__(self) # a Timestamp-TimedeltaIndex -> yields a negative TimedeltaIndex - elif getattr(other, '_typ', None) == 'timedeltaindex': + elif typ in ('timedeltaindex', 'timedeltaarray'): return (-other).__add__(self) elif other is NaT: diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index cb630c9b66c20..b8f3340c4e3d5 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -4,7 +4,7 @@ import numpy as np -from pandas._libs import tslibs +from pandas._libs import algos, tslibs from pandas._libs.tslibs import Timedelta, Timestamp, NaT, iNaT from pandas._libs.tslibs.fields import get_timedelta_field from pandas._libs.tslibs.timedeltas import ( @@ -27,7 +27,7 @@ from pandas.core.dtypes.missing import isna import pandas.core.common as com -from pandas.core.algorithms import checked_add_with_arr +from pandas.core.algorithms import checked_add_with_arr, unique1d from pandas.tseries.offsets import Tick from pandas.tseries.frequencies import to_offset @@ -140,15 +140,31 @@ def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE): result._freq = freq return result - def __new__(cls, values, freq=None, dtype=_TD_DTYPE): + def __new__(cls, values, freq=None, dtype=_TD_DTYPE, copy=False): + verify_integrity = True freq, freq_infer = dtl.maybe_infer_freq(freq) - values = np.array(values, copy=False) - if values.dtype == np.object_: - values = array_to_timedelta64(values) + values, inferred_freq = sequence_to_td64ns(values, copy=copy, + unit=None) + if inferred_freq is not None: + if freq is not None and freq != inferred_freq: + raise ValueError('Inferred frequency {inferred} from passed ' + 'values does not conform to passed frequency ' + '{passed}' + .format(inferred=inferred_freq, + passed=freq.freqstr)) + elif freq_infer: + freq = inferred_freq + freq_infer = False + verify_integrity = False result = cls._simple_new(values, freq=freq) + # check that we are matching freqs + if verify_integrity and len(result) > 0: + if freq is not None and not freq_infer: + cls._validate_frequency(result, freq) + if freq_infer: result.freq = to_offset(result.inferred_freq) @@ -205,6 +221,18 @@ def _validate_fill_value(self, fill_value): "Got '{got}'.".format(got=fill_value)) return fill_value + @property + def is_monotonic_increasing(self): + return algos.is_monotonic(self.asi8, timelike=True)[0] + + @property + def is_monotonic_decreasing(self): + return algos.is_monotonic(self.asi8, timelike=True)[1] + + @property + def is_unique(self): + return len(unique1d(self.asi8)) == len(self) + # ---------------------------------------------------------------- # Arithmetic Methods diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 4a0d1231444dc..bbb0f50c673e1 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -227,6 +227,10 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): # ------------------------------------------------------------------- # Wrapping TimedeltaArray + is_monotonic_increasing = Index.is_monotonic_increasing + is_monotonic_decreasing = Index.is_monotonic_decreasing + is_unique = Index.is_unique + days = wrap_field_accessor(TimedeltaArray.days) seconds = wrap_field_accessor(TimedeltaArray.seconds) microseconds = wrap_field_accessor(TimedeltaArray.microseconds) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 873c7c92cbaf6..f3ad77713628c 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1058,9 +1058,228 @@ def test_dti_add_tick_tzaware(self, tz_aware_fixture, box_with_array): # ------------------------------------------------------------- # RelativeDelta DateOffsets + def test_dt64arr_add_sub_relativedelta_offsets(self, box_with_array): + # GH#10699 + vec = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), + Timestamp('2000-01-31 00:23:00'), + Timestamp('2000-01-01'), + Timestamp('2000-03-31'), + Timestamp('2000-02-29'), + Timestamp('2000-12-31'), + Timestamp('2000-05-15'), + Timestamp('2001-06-15')]) + vec = tm.box_expected(vec, box_with_array) + vec_items = vec.squeeze() if box_with_array is pd.DataFrame else vec + + # DateOffset relativedelta fastpath + relative_kwargs = [('years', 2), ('months', 5), ('days', 3), + ('hours', 5), ('minutes', 10), ('seconds', 2), + ('microseconds', 5)] + for i, kwd in enumerate(relative_kwargs): + off = pd.DateOffset(**dict([kwd])) + + expected = DatetimeIndex([x + off for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec + off) + + expected = DatetimeIndex([x - off for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec - off) + + off = pd.DateOffset(**dict(relative_kwargs[:i + 1])) + + expected = DatetimeIndex([x + off for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec + off) + + expected = DatetimeIndex([x - off for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec - off) + + with pytest.raises(TypeError): + off - vec + # ------------------------------------------------------------- # Non-Tick, Non-RelativeDelta DateOffsets + # TODO: redundant with test_dt64arr_add_sub_DateOffset? that includes + # tz-aware cases which this does not + @pytest.mark.parametrize('cls_and_kwargs', [ + 'YearBegin', ('YearBegin', {'month': 5}), + 'YearEnd', ('YearEnd', {'month': 5}), + 'MonthBegin', 'MonthEnd', + 'SemiMonthEnd', 'SemiMonthBegin', + 'Week', ('Week', {'weekday': 3}), + 'Week', ('Week', {'weekday': 6}), + 'BusinessDay', 'BDay', 'QuarterEnd', 'QuarterBegin', + 'CustomBusinessDay', 'CDay', 'CBMonthEnd', + 'CBMonthBegin', 'BMonthBegin', 'BMonthEnd', + 'BusinessHour', 'BYearBegin', 'BYearEnd', + 'BQuarterBegin', ('LastWeekOfMonth', {'weekday': 2}), + ('FY5253Quarter', {'qtr_with_extra_week': 1, + 'startingMonth': 1, + 'weekday': 2, + 'variation': 'nearest'}), + ('FY5253', {'weekday': 0, 'startingMonth': 2, 'variation': 'nearest'}), + ('WeekOfMonth', {'weekday': 2, 'week': 2}), + 'Easter', ('DateOffset', {'day': 4}), + ('DateOffset', {'month': 5})]) + @pytest.mark.parametrize('normalize', [True, False]) + def test_dt64arr_add_sub_DateOffsets(self, box_with_array, + normalize, cls_and_kwargs): + # GH#10699 + # assert these are equal on a piecewise basis + vec = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), + Timestamp('2000-01-31 00:23:00'), + Timestamp('2000-01-01'), + Timestamp('2000-03-31'), + Timestamp('2000-02-29'), + Timestamp('2000-12-31'), + Timestamp('2000-05-15'), + Timestamp('2001-06-15')]) + vec = tm.box_expected(vec, box_with_array) + vec_items = vec.squeeze() if box_with_array is pd.DataFrame else vec + + if isinstance(cls_and_kwargs, tuple): + # If cls_name param is a tuple, then 2nd entry is kwargs for + # the offset constructor + cls_name, kwargs = cls_and_kwargs + else: + cls_name = cls_and_kwargs + kwargs = {} + + offset_cls = getattr(pd.offsets, cls_name) + + with warnings.catch_warnings(record=True): + # pandas.errors.PerformanceWarning: Non-vectorized DateOffset being + # applied to Series or DatetimeIndex + # we aren't testing that here, so ignore. + warnings.simplefilter("ignore", PerformanceWarning) + for n in [0, 5]: + if (cls_name in ['WeekOfMonth', 'LastWeekOfMonth', + 'FY5253Quarter', 'FY5253'] and n == 0): + # passing n = 0 is invalid for these offset classes + continue + + offset = offset_cls(n, normalize=normalize, **kwargs) + + expected = DatetimeIndex([x + offset for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec + offset) + + expected = DatetimeIndex([x - offset for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec - offset) + + expected = DatetimeIndex([offset + x for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, offset + vec) + + with pytest.raises(TypeError): + offset - vec + + def test_dt64arr_add_sub_DateOffset(self, box_with_array): + # GH#10699 + s = date_range('2000-01-01', '2000-01-31', name='a') + s = tm.box_expected(s, box_with_array) + result = s + pd.DateOffset(years=1) + result2 = pd.DateOffset(years=1) + s + exp = date_range('2001-01-01', '2001-01-31', name='a') + exp = tm.box_expected(exp, box_with_array) + tm.assert_equal(result, exp) + tm.assert_equal(result2, exp) + + result = s - pd.DateOffset(years=1) + exp = date_range('1999-01-01', '1999-01-31', name='a') + exp = tm.box_expected(exp, box_with_array) + tm.assert_equal(result, exp) + + s = DatetimeIndex([Timestamp('2000-01-15 00:15:00', tz='US/Central'), + Timestamp('2000-02-15', tz='US/Central')], name='a') + # FIXME: ValueError with tzaware DataFrame transpose + s = tm.box_expected(s, box_with_array, transpose=False) + result = s + pd.offsets.Day() + result2 = pd.offsets.Day() + s + exp = DatetimeIndex([Timestamp('2000-01-16 00:15:00', tz='US/Central'), + Timestamp('2000-02-16', tz='US/Central')], + name='a') + exp = tm.box_expected(exp, box_with_array, transpose=False) + tm.assert_equal(result, exp) + tm.assert_equal(result2, exp) + + s = DatetimeIndex([Timestamp('2000-01-15 00:15:00', tz='US/Central'), + Timestamp('2000-02-15', tz='US/Central')], name='a') + s = tm.box_expected(s, box_with_array, transpose=False) + result = s + pd.offsets.MonthEnd() + result2 = pd.offsets.MonthEnd() + s + exp = DatetimeIndex([Timestamp('2000-01-31 00:15:00', tz='US/Central'), + Timestamp('2000-02-29', tz='US/Central')], + name='a') + exp = tm.box_expected(exp, box_with_array, transpose=False) + tm.assert_equal(result, exp) + tm.assert_equal(result2, exp) + + # TODO: __sub__, __rsub__ + def test_dt64arr_add_mixed_offset_array(self, box_with_array): + # GH#10699 + # array of offsets + s = DatetimeIndex([Timestamp('2000-1-1'), Timestamp('2000-2-1')]) + s = tm.box_expected(s, box_with_array) + + warn = None if box_with_array is pd.DataFrame else PerformanceWarning + with tm.assert_produces_warning(warn, + clear=[pd.core.arrays.datetimelike]): + other = pd.Index([pd.offsets.DateOffset(years=1), + pd.offsets.MonthEnd()]) + other = tm.box_expected(other, box_with_array) + result = s + other + exp = DatetimeIndex([Timestamp('2001-1-1'), + Timestamp('2000-2-29')]) + exp = tm.box_expected(exp, box_with_array) + tm.assert_equal(result, exp) + + # same offset + other = pd.Index([pd.offsets.DateOffset(years=1), + pd.offsets.DateOffset(years=1)]) + other = tm.box_expected(other, box_with_array) + result = s + other + exp = DatetimeIndex([Timestamp('2001-1-1'), + Timestamp('2001-2-1')]) + exp = tm.box_expected(exp, box_with_array) + tm.assert_equal(result, exp) + + # TODO: overlap with test_dt64arr_add_mixed_offset_array? + def test_dt64arr_add_sub_offset_ndarray(self, tz_naive_fixture, + box_with_array): + # GH#18849 + tz = tz_naive_fixture + dti = pd.date_range('2017-01-01', periods=2, tz=tz) + dtarr = tm.box_expected(dti, box_with_array) + + other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) + + warn = None if box_with_array is pd.DataFrame else PerformanceWarning + with tm.assert_produces_warning(warn, + clear=[pd.core.arrays.datetimelike]): + res = dtarr + other + expected = DatetimeIndex([dti[n] + other[n] for n in range(len(dti))], + name=dti.name, freq='infer') + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(res, expected) + + with tm.assert_produces_warning(warn, + clear=[pd.core.arrays.datetimelike]): + res2 = other + dtarr + tm.assert_equal(res2, expected) + + with tm.assert_produces_warning(warn, + clear=[pd.core.arrays.datetimelike]): + res = dtarr - other + expected = DatetimeIndex([dti[n] - other[n] for n in range(len(dti))], + name=dti.name, freq='infer') + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(res, expected) + class TestDatetime64OverflowHandling(object): # TODO: box + de-duplicate @@ -1823,24 +2042,6 @@ def test_dti_add_series(self, tz, names): result4 = index + ser.values tm.assert_index_equal(result4, expected) - def test_dti_add_offset_array(self, tz_naive_fixture): - # GH#18849 - tz = tz_naive_fixture - dti = pd.date_range('2017-01-01', periods=2, tz=tz) - other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) - - with tm.assert_produces_warning(PerformanceWarning, - clear=[pd.core.arrays.datetimelike]): - res = dti + other - expected = DatetimeIndex([dti[n] + other[n] for n in range(len(dti))], - name=dti.name, freq='infer') - tm.assert_index_equal(res, expected) - - with tm.assert_produces_warning(PerformanceWarning, - clear=[pd.core.arrays.datetimelike]): - res2 = other + dti - tm.assert_index_equal(res2, expected) - @pytest.mark.parametrize('names', [(None, None, None), ('foo', 'bar', None), ('foo', 'foo', 'foo')]) @@ -1863,19 +2064,6 @@ def test_dti_add_offset_index(self, tz_naive_fixture, names): res2 = other + dti tm.assert_index_equal(res2, expected) - def test_dti_sub_offset_array(self, tz_naive_fixture): - # GH#18824 - tz = tz_naive_fixture - dti = pd.date_range('2017-01-01', periods=2, tz=tz) - other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) - - with tm.assert_produces_warning(PerformanceWarning, - clear=[pd.core.arrays.datetimelike]): - res = dti - other - expected = DatetimeIndex([dti[n] - other[n] for n in range(len(dti))], - name=dti.name, freq='infer') - tm.assert_index_equal(res, expected) - @pytest.mark.parametrize('names', [(None, None, None), ('foo', 'bar', None), ('foo', 'foo', 'foo')]) @@ -1925,198 +2113,6 @@ def test_dti_with_offset_series(self, tz_naive_fixture, names): tm.assert_series_equal(res3, expected_sub) -def test_dt64_with_offset_array(box_with_array): - # GH#10699 - # array of offsets - s = DatetimeIndex([Timestamp('2000-1-1'), Timestamp('2000-2-1')]) - s = tm.box_expected(s, box_with_array) - - warn = PerformanceWarning if box_with_array is not pd.DataFrame else None - with tm.assert_produces_warning(warn, - clear=[pd.core.arrays.datetimelike]): - other = pd.Index([pd.offsets.DateOffset(years=1), - pd.offsets.MonthEnd()]) - other = tm.box_expected(other, box_with_array) - result = s + other - exp = DatetimeIndex([Timestamp('2001-1-1'), Timestamp('2000-2-29')]) - exp = tm.box_expected(exp, box_with_array) - tm.assert_equal(result, exp) - - # same offset - other = pd.Index([pd.offsets.DateOffset(years=1), - pd.offsets.DateOffset(years=1)]) - other = tm.box_expected(other, box_with_array) - result = s + other - exp = DatetimeIndex([Timestamp('2001-1-1'), Timestamp('2001-2-1')]) - exp = tm.box_expected(exp, box_with_array) - tm.assert_equal(result, exp) - - -def test_dt64_with_DateOffsets_relativedelta(box_with_array): - # GH#10699 - if box_with_array is tm.to_array: - pytest.xfail("apply_index implementations are Index-specific") - - vec = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), - Timestamp('2000-01-31 00:23:00'), - Timestamp('2000-01-01'), - Timestamp('2000-03-31'), - Timestamp('2000-02-29'), - Timestamp('2000-12-31'), - Timestamp('2000-05-15'), - Timestamp('2001-06-15')]) - vec = tm.box_expected(vec, box_with_array) - vec_items = vec.squeeze() if box_with_array is pd.DataFrame else vec - - # DateOffset relativedelta fastpath - relative_kwargs = [('years', 2), ('months', 5), ('days', 3), - ('hours', 5), ('minutes', 10), ('seconds', 2), - ('microseconds', 5)] - for i, kwd in enumerate(relative_kwargs): - off = pd.DateOffset(**dict([kwd])) - - expected = DatetimeIndex([x + off for x in vec_items]) - expected = tm.box_expected(expected, box_with_array) - tm.assert_equal(expected, vec + off) - - expected = DatetimeIndex([x - off for x in vec_items]) - expected = tm.box_expected(expected, box_with_array) - tm.assert_equal(expected, vec - off) - - off = pd.DateOffset(**dict(relative_kwargs[:i + 1])) - - expected = DatetimeIndex([x + off for x in vec_items]) - expected = tm.box_expected(expected, box_with_array) - tm.assert_equal(expected, vec + off) - - expected = DatetimeIndex([x - off for x in vec_items]) - expected = tm.box_expected(expected, box_with_array) - tm.assert_equal(expected, vec - off) - - with pytest.raises(TypeError): - off - vec - - -@pytest.mark.parametrize('cls_and_kwargs', [ - 'YearBegin', ('YearBegin', {'month': 5}), - 'YearEnd', ('YearEnd', {'month': 5}), - 'MonthBegin', 'MonthEnd', - 'SemiMonthEnd', 'SemiMonthBegin', - 'Week', ('Week', {'weekday': 3}), - 'Week', ('Week', {'weekday': 6}), - 'BusinessDay', 'BDay', 'QuarterEnd', 'QuarterBegin', - 'CustomBusinessDay', 'CDay', 'CBMonthEnd', - 'CBMonthBegin', 'BMonthBegin', 'BMonthEnd', - 'BusinessHour', 'BYearBegin', 'BYearEnd', - 'BQuarterBegin', ('LastWeekOfMonth', {'weekday': 2}), - ('FY5253Quarter', {'qtr_with_extra_week': 1, - 'startingMonth': 1, - 'weekday': 2, - 'variation': 'nearest'}), - ('FY5253', {'weekday': 0, 'startingMonth': 2, 'variation': 'nearest'}), - ('WeekOfMonth', {'weekday': 2, 'week': 2}), - 'Easter', ('DateOffset', {'day': 4}), - ('DateOffset', {'month': 5})]) -@pytest.mark.parametrize('normalize', [True, False]) -def test_dt64_with_DateOffsets(box_with_array, normalize, cls_and_kwargs): - # GH#10699 - # assert these are equal on a piecewise basis - if box_with_array is tm.to_array: - pytest.xfail("apply_index implementations are Index-specific") - - vec = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), - Timestamp('2000-01-31 00:23:00'), - Timestamp('2000-01-01'), - Timestamp('2000-03-31'), - Timestamp('2000-02-29'), - Timestamp('2000-12-31'), - Timestamp('2000-05-15'), - Timestamp('2001-06-15')]) - vec = tm.box_expected(vec, box_with_array) - vec_items = vec.squeeze() if box_with_array is pd.DataFrame else vec - - if isinstance(cls_and_kwargs, tuple): - # If cls_name param is a tuple, then 2nd entry is kwargs for - # the offset constructor - cls_name, kwargs = cls_and_kwargs - else: - cls_name = cls_and_kwargs - kwargs = {} - - offset_cls = getattr(pd.offsets, cls_name) - - with warnings.catch_warnings(record=True): - # pandas.errors.PerformanceWarning: Non-vectorized DateOffset being - # applied to Series or DatetimeIndex - # we aren't testing that here, so ignore. - warnings.simplefilter("ignore", PerformanceWarning) - for n in [0, 5]: - if (cls_name in ['WeekOfMonth', 'LastWeekOfMonth', - 'FY5253Quarter', 'FY5253'] and n == 0): - # passing n = 0 is invalid for these offset classes - continue - - offset = offset_cls(n, normalize=normalize, **kwargs) - - expected = DatetimeIndex([x + offset for x in vec_items]) - expected = tm.box_expected(expected, box_with_array) - tm.assert_equal(expected, vec + offset) - - expected = DatetimeIndex([x - offset for x in vec_items]) - expected = tm.box_expected(expected, box_with_array) - tm.assert_equal(expected, vec - offset) - - expected = DatetimeIndex([offset + x for x in vec_items]) - expected = tm.box_expected(expected, box_with_array) - tm.assert_equal(expected, offset + vec) - - with pytest.raises(TypeError): - offset - vec - - -def test_datetime64_with_DateOffset(box_with_array): - # GH#10699 - if box_with_array is tm.to_array: - pytest.xfail("DateOffset.apply_index uses _shallow_copy") - - s = date_range('2000-01-01', '2000-01-31', name='a') - s = tm.box_expected(s, box_with_array) - result = s + pd.DateOffset(years=1) - result2 = pd.DateOffset(years=1) + s - exp = date_range('2001-01-01', '2001-01-31', name='a') - exp = tm.box_expected(exp, box_with_array) - tm.assert_equal(result, exp) - tm.assert_equal(result2, exp) - - result = s - pd.DateOffset(years=1) - exp = date_range('1999-01-01', '1999-01-31', name='a') - exp = tm.box_expected(exp, box_with_array) - tm.assert_equal(result, exp) - - s = DatetimeIndex([Timestamp('2000-01-15 00:15:00', tz='US/Central'), - Timestamp('2000-02-15', tz='US/Central')], name='a') - # FIXME: ValueError with tzaware DataFrame transpose - s = tm.box_expected(s, box_with_array, transpose=False) - result = s + pd.offsets.Day() - result2 = pd.offsets.Day() + s - exp = DatetimeIndex([Timestamp('2000-01-16 00:15:00', tz='US/Central'), - Timestamp('2000-02-16', tz='US/Central')], name='a') - exp = tm.box_expected(exp, box_with_array, transpose=False) - tm.assert_equal(result, exp) - tm.assert_equal(result2, exp) - - s = DatetimeIndex([Timestamp('2000-01-15 00:15:00', tz='US/Central'), - Timestamp('2000-02-15', tz='US/Central')], name='a') - s = tm.box_expected(s, box_with_array, transpose=False) - result = s + pd.offsets.MonthEnd() - result2 = pd.offsets.MonthEnd() + s - exp = DatetimeIndex([Timestamp('2000-01-31 00:15:00', tz='US/Central'), - Timestamp('2000-02-29', tz='US/Central')], name='a') - exp = tm.box_expected(exp, box_with_array, transpose=False) - tm.assert_equal(result, exp) - tm.assert_equal(result2, exp) - - @pytest.mark.parametrize('years', [-1, 0, 1]) @pytest.mark.parametrize('months', [-2, 0, 2]) def test_shift_months(years, months): diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 07c48554c65b8..f5a088abd014e 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -437,10 +437,6 @@ def test_td64arr_add_timestamp(self, box_with_array, tz_naive_fixture): def test_td64arr_add_sub_timestamp(self, box_with_array): # GH#11925 - if box_with_array is tm.to_array: - pytest.xfail("DatetimeArray.__sub__ returns ndarray instead " - "of TimedeltaArray") - ts = Timestamp('2012-01-01') # TODO: parametrize over types of datetime scalar? diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 440a6a3558d9e..67bb569f1495f 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -284,14 +284,17 @@ def apply_index(self, i): kwds.get('months', 0)) * self.n) if months: shifted = liboffsets.shift_months(i.asi8, months) - i = i._shallow_copy(shifted) + i = type(i)(shifted, freq=i.freq, dtype=i.dtype) weeks = (kwds.get('weeks', 0)) * self.n if weeks: # integer addition on PeriodIndex is deprecated, # so we directly use _time_shift instead asper = i.to_period('W') - shifted = asper._data._time_shift(weeks) + if not isinstance(asper._data, np.ndarray): + # unwrap PeriodIndex --> PeriodArray + asper = asper._data + shifted = asper._time_shift(weeks) i = shifted.to_timestamp() + i.to_perioddelta('W') timedelta_kwds = {k: v for k, v in kwds.items() @@ -540,17 +543,21 @@ def apply_index(self, i): # to_period rolls forward to next BDay; track and # reduce n where it does when rolling forward asper = i.to_period('B') + if not isinstance(asper._data, np.ndarray): + # unwrap PeriodIndex --> PeriodArray + asper = asper._data + if self.n > 0: shifted = (i.to_perioddelta('B') - time).asi8 != 0 # Integer-array addition is deprecated, so we use # _time_shift directly roll = np.where(shifted, self.n - 1, self.n) - shifted = asper._data._addsub_int_array(roll, operator.add) + shifted = asper._addsub_int_array(roll, operator.add) else: # Integer addition is deprecated, so we use _time_shift directly roll = self.n - shifted = asper._data._time_shift(roll) + shifted = asper._time_shift(roll) result = shifted.to_timestamp() + time return result @@ -923,7 +930,9 @@ def apply(self, other): @apply_index_wraps def apply_index(self, i): shifted = liboffsets.shift_months(i.asi8, self.n, self._day_opt) - return i._shallow_copy(shifted) + # TODO: going through __new__ raises on call to _validate_frequency; + # are we passing incorrect freq? + return type(i)._simple_new(shifted, freq=i.freq, dtype=i.dtype) class MonthEnd(MonthOffset): @@ -1123,7 +1132,11 @@ def apply_index(self, i): # integer-array addition on PeriodIndex is deprecated, # so we use _addsub_int_array directly asper = i.to_period('M') - shifted = asper._data._addsub_int_array(roll // 2, operator.add) + if not isinstance(asper._data, np.ndarray): + # unwrap PeriodIndex --> PeriodArray + asper = asper._data + + shifted = asper._addsub_int_array(roll // 2, operator.add) i = type(dti)(shifted.to_timestamp()) # apply the correct day @@ -1307,7 +1320,12 @@ def apply_index(self, i): if self.weekday is None: # integer addition on PeriodIndex is deprecated, # so we use _time_shift directly - shifted = i.to_period('W')._data._time_shift(self.n) + asper = i.to_period('W') + if not isinstance(asper._data, np.ndarray): + # unwrap PeriodIndex --> PeriodArray + asper = asper._data + + shifted = asper._time_shift(self.n) return shifted.to_timestamp() + i.to_perioddelta('W') else: return self._end_apply_index(i) @@ -1328,6 +1346,10 @@ def _end_apply_index(self, dtindex): base, mult = libfrequencies.get_freq_code(self.freqstr) base_period = dtindex.to_period(base) + if not isinstance(base_period._data, np.ndarray): + # unwrap PeriodIndex --> PeriodArray + base_period = base_period._data + if self.n > 0: # when adding, dates on end roll to next normed = dtindex - off + Timedelta(1, 'D') - Timedelta(1, 'ns') @@ -1335,13 +1357,13 @@ def _end_apply_index(self, dtindex): self.n, self.n - 1) # integer-array addition on PeriodIndex is deprecated, # so we use _addsub_int_array directly - shifted = base_period._data._addsub_int_array(roll, operator.add) + shifted = base_period._addsub_int_array(roll, operator.add) base = shifted.to_timestamp(how='end') else: # integer addition on PeriodIndex is deprecated, # so we use _time_shift directly roll = self.n - base = base_period._data._time_shift(roll).to_timestamp(how='end') + base = base_period._time_shift(roll).to_timestamp(how='end') return base + off + Timedelta(1, 'ns') - Timedelta(1, 'D') @@ -1591,7 +1613,10 @@ def onOffset(self, dt): def apply_index(self, dtindex): shifted = liboffsets.shift_quarters(dtindex.asi8, self.n, self.startingMonth, self._day_opt) - return dtindex._shallow_copy(shifted) + # TODO: going through __new__ raises on call to _validate_frequency; + # are we passing incorrect freq? + return type(dtindex)._simple_new(shifted, freq=dtindex.freq, + dtype=dtindex.dtype) class BQuarterEnd(QuarterOffset): @@ -1662,7 +1687,10 @@ def apply_index(self, dtindex): shifted = liboffsets.shift_quarters(dtindex.asi8, self.n, self.month, self._day_opt, modby=12) - return dtindex._shallow_copy(shifted) + # TODO: going through __new__ raises on call to _validate_frequency; + # are we passing incorrect freq? + return type(dtindex)._simple_new(shifted, freq=dtindex.freq, + dtype=dtindex.dtype) def onOffset(self, dt): if self.normalize and not _is_normalized(dt): From 4dc17e2f0fb0565283bbfc3d1dc7657a1abd7651 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 19 Nov 2018 13:26:38 -0800 Subject: [PATCH 04/16] xfail dataframe case --- pandas/_libs/tslibs/timestamps.pyx | 2 +- pandas/tests/arithmetic/test_datetime64.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 0bbccaa622dde..7b83a9c723669 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -379,7 +379,7 @@ cdef class _Timestamp(datetime): typ = getattr(other, '_typ', None) # a Timestamp-DatetimeIndex -> yields a negative TimedeltaIndex - if typ == ('datetimeindex', 'datetimearray'): + if typ in ('datetimeindex', 'datetimearray'): # timezone comparison is performed in DatetimeIndex._sub_datelike return -other.__sub__(self) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index f3ad77713628c..89edadbba2cb6 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1252,6 +1252,10 @@ def test_dt64arr_add_mixed_offset_array(self, box_with_array): def test_dt64arr_add_sub_offset_ndarray(self, tz_naive_fixture, box_with_array): # GH#18849 + if box_with_array is pd.DataFrame: + pytest.xfail("FIXME: ValueError with transpose; " + "alignment error without") + tz = tz_naive_fixture dti = pd.date_range('2017-01-01', periods=2, tz=tz) dtarr = tm.box_expected(dti, box_with_array) From dd7e87396fef2a30c7d6018c0fd305c1bdd5e0a0 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 19 Nov 2018 18:05:56 -0800 Subject: [PATCH 05/16] dont use verify_integrity, push one more level of test into parametrize --- pandas/core/arrays/timedeltas.py | 8 ++-- pandas/tests/arithmetic/test_datetime64.py | 56 +++++++++++----------- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 5b5870a0d8ea7..d2eb04f6e0eaa 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -134,12 +134,11 @@ def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE): return result def __new__(cls, values, freq=None, dtype=_TD_DTYPE, copy=False): - verify_integrity = True freq, freq_infer = dtl.maybe_infer_freq(freq) - values, inferred_freq = sequence_to_td64ns(values, copy=copy, - unit=None) + values, inferred_freq = sequence_to_td64ns( + values, copy=copy, unit=None) if inferred_freq is not None: if freq is not None and freq != inferred_freq: raise ValueError('Inferred frequency {inferred} from passed ' @@ -150,11 +149,10 @@ def __new__(cls, values, freq=None, dtype=_TD_DTYPE, copy=False): elif freq_infer: freq = inferred_freq freq_infer = False - verify_integrity = False result = cls._simple_new(values, freq=freq) # check that we are matching freqs - if verify_integrity and len(result) > 0: + if inferred_freq is None and len(result) > 0: if freq is not None and not freq_infer: cls._validate_frequency(result, freq) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 89edadbba2cb6..57cf23a39a944 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1125,10 +1125,25 @@ def test_dt64arr_add_sub_relativedelta_offsets(self, box_with_array): 'Easter', ('DateOffset', {'day': 4}), ('DateOffset', {'month': 5})]) @pytest.mark.parametrize('normalize', [True, False]) + @pytest.mark.parametrize('n', [0, 5]) def test_dt64arr_add_sub_DateOffsets(self, box_with_array, - normalize, cls_and_kwargs): + n, normalize, cls_and_kwargs): # GH#10699 - # assert these are equal on a piecewise basis + # assert vectorized operation matches pointwise operations + + if isinstance(cls_and_kwargs, tuple): + # If cls_name param is a tuple, then 2nd entry is kwargs for + # the offset constructor + cls_name, kwargs = cls_and_kwargs + else: + cls_name = cls_and_kwargs + kwargs = {} + + if n == 0 and cls_name in ['WeekOfMonth', 'LastWeekOfMonth', + 'FY5253Quarter', 'FY5253']: + # passing n = 0 is invalid for these offset classes + return + vec = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), Timestamp('2000-01-31 00:23:00'), Timestamp('2000-01-01'), @@ -1140,14 +1155,6 @@ def test_dt64arr_add_sub_DateOffsets(self, box_with_array, vec = tm.box_expected(vec, box_with_array) vec_items = vec.squeeze() if box_with_array is pd.DataFrame else vec - if isinstance(cls_and_kwargs, tuple): - # If cls_name param is a tuple, then 2nd entry is kwargs for - # the offset constructor - cls_name, kwargs = cls_and_kwargs - else: - cls_name = cls_and_kwargs - kwargs = {} - offset_cls = getattr(pd.offsets, cls_name) with warnings.catch_warnings(record=True): @@ -1155,28 +1162,23 @@ def test_dt64arr_add_sub_DateOffsets(self, box_with_array, # applied to Series or DatetimeIndex # we aren't testing that here, so ignore. warnings.simplefilter("ignore", PerformanceWarning) - for n in [0, 5]: - if (cls_name in ['WeekOfMonth', 'LastWeekOfMonth', - 'FY5253Quarter', 'FY5253'] and n == 0): - # passing n = 0 is invalid for these offset classes - continue - offset = offset_cls(n, normalize=normalize, **kwargs) + offset = offset_cls(n, normalize=normalize, **kwargs) - expected = DatetimeIndex([x + offset for x in vec_items]) - expected = tm.box_expected(expected, box_with_array) - tm.assert_equal(expected, vec + offset) + expected = DatetimeIndex([x + offset for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec + offset) - expected = DatetimeIndex([x - offset for x in vec_items]) - expected = tm.box_expected(expected, box_with_array) - tm.assert_equal(expected, vec - offset) + expected = DatetimeIndex([x - offset for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec - offset) - expected = DatetimeIndex([offset + x for x in vec_items]) - expected = tm.box_expected(expected, box_with_array) - tm.assert_equal(expected, offset + vec) + expected = DatetimeIndex([offset + x for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, offset + vec) - with pytest.raises(TypeError): - offset - vec + with pytest.raises(TypeError): + offset - vec def test_dt64arr_add_sub_DateOffset(self, box_with_array): # GH#10699 From 23a25d138bdd0a285d49db42c9690003f061ace5 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 20 Nov 2018 13:01:10 -0800 Subject: [PATCH 06/16] fix broken tests --- pandas/core/arrays/timedeltas.py | 5 +++-- pandas/tests/arithmetic/test_timedelta64.py | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index a56bd20f08e55..e7dd6e6dcaf3e 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -175,7 +175,8 @@ def __new__(cls, values, freq=None, dtype=_TD_DTYPE, copy=False): '{passed}' .format(inferred=inferred_freq, passed=freq.freqstr)) - elif freq_infer: + elif freq is None: + # TODO: should this be the stronger condition `if freq_infer`? freq = inferred_freq freq_infer = False @@ -307,7 +308,7 @@ def _add_datetimelike_scalar(self, other): result = checked_add_with_arr(i8, other.value, arr_mask=self._isnan) result = self._maybe_mask_results(result) - return DatetimeArrayMixin(result, tz=other.tz) + return DatetimeArrayMixin(result, tz=other.tz, freq=self.freq) def _addsub_offset_array(self, other, op): # Add or subtract Array-like of DateOffset objects diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index f38785e67ce9f..2b300cb101201 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1101,6 +1101,7 @@ def test_tdi_rmul_arraylike(self, other, box_with_array): tdi = TimedeltaIndex(['1 Day'] * 10) expected = timedelta_range('1 days', '10 days') + expected._freq = None tdi = tm.box_expected(tdi, box) expected = tm.box_expected(expected, xbox) From b4ae28818bfe67bef09ffafc44b24fd0804f2140 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 22 Nov 2018 09:01:25 -0800 Subject: [PATCH 07/16] comment clarification --- pandas/core/arrays/timedeltas.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index e7dd6e6dcaf3e..28e151508732a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -177,6 +177,8 @@ def __new__(cls, values, freq=None, dtype=_TD_DTYPE, copy=False): passed=freq.freqstr)) elif freq is None: # TODO: should this be the stronger condition `if freq_infer`? + # i.e what if the user passed `freq=None` and specifically + # wanted freq=None in the result? freq = inferred_freq freq_infer = False From 711ee61014d5f546c68a1cbcbfbfe77be93d56b1 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 22 Nov 2018 12:21:22 -0800 Subject: [PATCH 08/16] dummy commit to force CI From 5fbe9c8483b7730d4882da10091d47427f11fd8c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 24 Nov 2018 19:07:05 -0800 Subject: [PATCH 09/16] comments about cached and non-cached implementations --- pandas/core/arrays/timedeltas.py | 13 +++++++------ pandas/core/indexes/timedeltas.py | 2 ++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 28e151508732a..4cc9fc31ae64d 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -176,9 +176,6 @@ def __new__(cls, values, freq=None, dtype=_TD_DTYPE, copy=False): .format(inferred=inferred_freq, passed=freq.freqstr)) elif freq is None: - # TODO: should this be the stronger condition `if freq_infer`? - # i.e what if the user passed `freq=None` and specifically - # wanted freq=None in the result? freq = inferred_freq freq_infer = False @@ -244,15 +241,19 @@ def _validate_fill_value(self, fill_value): "Got '{got}'.".format(got=fill_value)) return fill_value - @property + # is_monotonic_increasing, is_monotonic_decreasing, and is_unique + # are needed by `frequencies.infer_freq`, which is called when accessing + # the `inferred_freq` property inside the TimedeltaArray constructor + + @property # NB: override with cache_readonly in immutable subclasses def is_monotonic_increasing(self): return algos.is_monotonic(self.asi8, timelike=True)[0] - @property + @property # NB: override with cache_readonly in immutable subclasses def is_monotonic_decreasing(self): return algos.is_monotonic(self.asi8, timelike=True)[1] - @property + @property # NB: override with cache_readonly in immutable subclasses def is_unique(self): return len(unique1d(self.asi8)) == len(self) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 3ac0e6acb95b6..1c2eab97d7204 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -227,6 +227,8 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): # ------------------------------------------------------------------- # Wrapping TimedeltaArray + # override non-caching implementations from TimedeltaArray with + # _engine-based implementations that take advantage of Index immutability is_monotonic_increasing = Index.is_monotonic_increasing is_monotonic_decreasing = Index.is_monotonic_decreasing is_unique = Index.is_unique From a4f9733ded9d72b913398b1b181f59bf9e39517f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 26 Nov 2018 09:16:05 -0800 Subject: [PATCH 10/16] coverage --- pandas/tests/indexes/timedeltas/test_construction.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index 074c8904b55b1..125f85ef27f13 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -6,6 +6,7 @@ import pandas as pd import pandas.util.testing as tm from pandas import TimedeltaIndex, timedelta_range, to_timedelta, Timedelta +from pandas.core.arrays import TimedeltaArrayMixin as TimdeltaArray class TestTimedeltaIndex(object): @@ -41,6 +42,10 @@ def test_infer_from_tdi_mismatch(self): with pytest.raises(ValueError, match=msg): TimedeltaIndex(tdi, freq='D') + with pytest.raises(ValueError, match=msg): + # GH#23789 + TimedeltaArray(tdi, freq='D') + def test_dt64_data_invalid(self): # GH#23539 # passing tz-aware DatetimeIndex raises, naive or ndarray[datetime64] From b50fedf5933089afd45f311d3cd6e768edf6d59a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 26 Nov 2018 09:17:06 -0800 Subject: [PATCH 11/16] special casing in freq_infer --- pandas/core/arrays/timedeltas.py | 20 ++------------------ pandas/tseries/frequencies.py | 19 +++++++++++++++---- 2 files changed, 17 insertions(+), 22 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 4cc9fc31ae64d..e1a4e390980d9 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -7,7 +7,7 @@ import numpy as np -from pandas._libs import algos, tslibs +from pandas._libs import tslibs from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT from pandas._libs.tslibs.fields import get_timedelta_field from pandas._libs.tslibs.timedeltas import ( @@ -24,7 +24,7 @@ from pandas.core.dtypes.missing import isna from pandas.core import ops -from pandas.core.algorithms import checked_add_with_arr, unique1d +from pandas.core.algorithms import checked_add_with_arr import pandas.core.common as com from pandas.tseries.frequencies import to_offset @@ -241,22 +241,6 @@ def _validate_fill_value(self, fill_value): "Got '{got}'.".format(got=fill_value)) return fill_value - # is_monotonic_increasing, is_monotonic_decreasing, and is_unique - # are needed by `frequencies.infer_freq`, which is called when accessing - # the `inferred_freq` property inside the TimedeltaArray constructor - - @property # NB: override with cache_readonly in immutable subclasses - def is_monotonic_increasing(self): - return algos.is_monotonic(self.asi8, timelike=True)[0] - - @property # NB: override with cache_readonly in immutable subclasses - def is_monotonic_decreasing(self): - return algos.is_monotonic(self.asi8, timelike=True)[1] - - @property # NB: override with cache_readonly in immutable subclasses - def is_unique(self): - return len(unique1d(self.asi8)) == len(self) - # ---------------------------------------------------------------- # Arithmetic Methods diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 97ef91a02dfb8..c25f47805e37c 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -5,7 +5,7 @@ import numpy as np from pytz import AmbiguousTimeError -from pandas._libs.algos import unique_deltas +from pandas._libs.algos import is_monotonic, unique_deltas from pandas._libs.tslibs import Timedelta, Timestamp from pandas._libs.tslibs.ccalendar import MONTH_ALIASES, int_to_weekday from pandas._libs.tslibs.conversion import tz_convert @@ -295,8 +295,19 @@ def __init__(self, index, warn=True): if len(index) < 3: raise ValueError('Need at least 3 dates to infer frequency') - self.is_monotonic = (self.index.is_monotonic_increasing or - self.index.is_monotonic_decreasing) + if not hasattr(index, "is_monotonic_increasing"): + # i.e. TimedeltaArray, not TimedeltaIndex + increasing, decreasing, strict = is_monotonic(index.asi8, + timelike=True) + self.is_monotonic = increasing or decreasing + self.strictly_monotonic = strict + else: + self.is_monotonic = (index.is_monotonic_increasing or + index.is_monotonic_decreasing) + strict = False + if self.is_monotonic and index.is_unique: + strict = True + self.strictly_monotonic = strict @cache_readonly def deltas(self): @@ -323,7 +334,7 @@ def get_freq(self): # noqa:F811 ------- freqstr : str or None """ - if not self.is_monotonic or not self.index.is_unique: + if not self.strictly_monotonic: return None delta = self.deltas[0] From 7e951e4d5e5ac76247f1f6fc3505be46e71595dd Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 26 Nov 2018 09:17:37 -0800 Subject: [PATCH 12/16] special casing followup --- pandas/core/indexes/timedeltas.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 1c2eab97d7204..8f50b40a20738 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -227,12 +227,6 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): # ------------------------------------------------------------------- # Wrapping TimedeltaArray - # override non-caching implementations from TimedeltaArray with - # _engine-based implementations that take advantage of Index immutability - is_monotonic_increasing = Index.is_monotonic_increasing - is_monotonic_decreasing = Index.is_monotonic_decreasing - is_unique = Index.is_unique - __mul__ = Index.__mul__ __rmul__ = Index.__rmul__ __truediv__ = Index.__truediv__ From 317e1e729557ec02361f7796dfb8e21c363fa240 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 26 Nov 2018 09:59:23 -0800 Subject: [PATCH 13/16] fix simple_new args --- pandas/tseries/offsets.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index df22091c8898f..d19eca8c14493 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -921,7 +921,7 @@ def apply_index(self, i): shifted = liboffsets.shift_months(i.asi8, self.n, self._day_opt) # TODO: going through __new__ raises on call to _validate_frequency; # are we passing incorrect freq? - return type(i)._simple_new(shifted, freq=i.freq, dtype=i.dtype) + return type(i)._simple_new(shifted, freq=i.freq, tz=i.tz) class MonthEnd(MonthOffset): @@ -1604,7 +1604,7 @@ def apply_index(self, dtindex): # TODO: going through __new__ raises on call to _validate_frequency; # are we passing incorrect freq? return type(dtindex)._simple_new(shifted, freq=dtindex.freq, - dtype=dtindex.dtype) + tz=dtindex.tz) class BQuarterEnd(QuarterOffset): @@ -1678,7 +1678,7 @@ def apply_index(self, dtindex): # TODO: going through __new__ raises on call to _validate_frequency; # are we passing incorrect freq? return type(dtindex)._simple_new(shifted, freq=dtindex.freq, - dtype=dtindex.dtype) + tz=dtindex.tz) def onOffset(self, dt): if self.normalize and not _is_normalized(dt): From 5433a71bd10658a1b1119142aeb1f5c7ff3d8ae3 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 27 Nov 2018 06:58:31 -0800 Subject: [PATCH 14/16] privatize --- pandas/core/arrays/timedeltas.py | 19 +++++++++++++++++-- pandas/core/indexes/timedeltas.py | 5 +++++ pandas/tseries/frequencies.py | 19 ++++--------------- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index c73f383070a6f..83cea51cec9f6 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -7,7 +7,7 @@ import numpy as np -from pandas._libs import tslibs +from pandas._libs import algos, tslibs from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT from pandas._libs.tslibs.fields import get_timedelta_field from pandas._libs.tslibs.timedeltas import ( @@ -24,7 +24,7 @@ from pandas.core.dtypes.missing import isna from pandas.core import ops -from pandas.core.algorithms import checked_add_with_arr +from pandas.core.algorithms import checked_add_with_arr, unique1d import pandas.core.common as com from pandas.tseries.frequencies import to_offset @@ -241,6 +241,21 @@ def _validate_fill_value(self, fill_value): "Got '{got}'.".format(got=fill_value)) return fill_value + # monotonicity/uniqueness properties are called via frequencies.infer_freq, + # see GH#23789 + + @property + def _is_monotonic_increasing(self): + return algos.is_monotonic(self.asi8, timelike=True)[0] + + @property + def _is_monotonic_decreasing(self): + return algos.is_monotonic(self.asi8, timelike=True)[1] + + @property + def _is_unique(self): + return len(unique1d(self.asi8)) == len(self) + # ---------------------------------------------------------------- # Arithmetic Methods diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 8f50b40a20738..cb7da9129bebe 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -242,6 +242,11 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): total_seconds = wrap_array_method(TimedeltaArray.total_seconds, True) + # Compat for frequency inference, see GH#23789 + _is_monotonic_increasing = Index.is_monotonic_increasing + _is_monotonic_decreasing = Index.is_monotonic_decreasing + _is_unique = Index.is_unique + # ------------------------------------------------------------------- @Appender(_index_shared_docs['astype']) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index c25f47805e37c..8cdec31d7ce8a 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -5,7 +5,7 @@ import numpy as np from pytz import AmbiguousTimeError -from pandas._libs.algos import is_monotonic, unique_deltas +from pandas._libs.algos import unique_deltas from pandas._libs.tslibs import Timedelta, Timestamp from pandas._libs.tslibs.ccalendar import MONTH_ALIASES, int_to_weekday from pandas._libs.tslibs.conversion import tz_convert @@ -295,19 +295,8 @@ def __init__(self, index, warn=True): if len(index) < 3: raise ValueError('Need at least 3 dates to infer frequency') - if not hasattr(index, "is_monotonic_increasing"): - # i.e. TimedeltaArray, not TimedeltaIndex - increasing, decreasing, strict = is_monotonic(index.asi8, - timelike=True) - self.is_monotonic = increasing or decreasing - self.strictly_monotonic = strict - else: - self.is_monotonic = (index.is_monotonic_increasing or - index.is_monotonic_decreasing) - strict = False - if self.is_monotonic and index.is_unique: - strict = True - self.strictly_monotonic = strict + self.is_monotonic = (self.index._is_monotonic_increasing or + self.index._is_monotonic_decreasing) @cache_readonly def deltas(self): @@ -334,7 +323,7 @@ def get_freq(self): # noqa:F811 ------- freqstr : str or None """ - if not self.strictly_monotonic: + if not self.is_monotonic or not self.index._is_unique: return None delta = self.deltas[0] From 2c65f3bf05b9f9fdbaf010cace677f63405a861a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 27 Nov 2018 08:39:02 -0800 Subject: [PATCH 15/16] more kludges --- pandas/core/indexes/datetimes.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 61e8d6344a0e9..f8d573c528222 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1128,6 +1128,11 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): # -------------------------------------------------------------------- # Wrapping DatetimeArray + # Compat for frequency inference, see GH#23789 + _is_monotonic_increasing = Index.is_monotonic_increasing + _is_monotonic_decreasing = Index.is_monotonic_decreasing + _is_unique = Index.is_unique + _timezone = cache_readonly(DatetimeArray._timezone.fget) is_normalized = cache_readonly(DatetimeArray.is_normalized.fget) _resolution = cache_readonly(DatetimeArray._resolution.fget) From c3d775e3c8d5eb42a949f7a3b994cceb4ee508f2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 27 Nov 2018 10:52:27 -0800 Subject: [PATCH 16/16] typo fixup --- pandas/tests/indexes/timedeltas/test_construction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index 125f85ef27f13..ba20febfeafad 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -6,7 +6,7 @@ import pandas as pd import pandas.util.testing as tm from pandas import TimedeltaIndex, timedelta_range, to_timedelta, Timedelta -from pandas.core.arrays import TimedeltaArrayMixin as TimdeltaArray +from pandas.core.arrays import TimedeltaArrayMixin as TimedeltaArray class TestTimedeltaIndex(object):