diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index a98b0b3bf35f9..df5440006ec36 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -11,7 +11,8 @@ from pandas._libs.tslibs.period import ( Period, DIFFERENT_FREQ_INDEX, IncompatibleFrequency) -from pandas.errors import NullFrequencyError, PerformanceWarning +from pandas.errors import ( + NullFrequencyError, PerformanceWarning, AbstractMethodError) from pandas import compat from pandas.tseries import frequencies @@ -36,7 +37,7 @@ from pandas.core.dtypes.dtypes import DatetimeTZDtype import pandas.core.common as com -from pandas.core.algorithms import checked_add_with_arr +from pandas.core.algorithms import checked_add_with_arr, take from .base import ExtensionOpsMixin from pandas.util._decorators import deprecate_kwarg @@ -77,12 +78,10 @@ class AttributesMixin(object): @property def _attributes(self): # Inheriting subclass should implement _attributes as a list of strings - from pandas.errors import AbstractMethodError raise AbstractMethodError(self) @classmethod def _simple_new(cls, values, **kwargs): - from pandas.errors import AbstractMethodError raise AbstractMethodError(cls) def _get_attributes_dict(self): @@ -119,7 +118,7 @@ def _box_func(self): """ box function to get object from internal representation """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _box_values(self, values): """ @@ -140,6 +139,67 @@ def asi8(self): # do not cache or you'll create a memory leak return self.values.view('i8') + # ------------------------------------------------------------------ + # Extension Array Interface + # TODO: + # _from_sequence + # _from_factorized + # __setitem__ + # _values_for_argsort + # argsort + # fillna + # dropna + # shift + # unique + # _values_for_factorize + # factorize + # _formatting_values + # _reduce + # copy + + def _validate_fill_value(self, fill_value): + """ + If a fill_value is passed to `take` convert it to an i8 representation, + raising ValueError if this is not possible. + + Parameters + ---------- + fill_value : object + + Returns + ------- + fill_value : np.int64 + + Raises + ------ + ValueError + """ + raise AbstractMethodError(self) + + def take(self, indices, allow_fill=False, fill_value=None): + + if allow_fill: + fill_value = self._validate_fill_value(fill_value) + + new_values = take(self._data, + indices, + allow_fill=allow_fill, + fill_value=fill_value) + + # TODO: use "infer"? Why does not passing freq cause + # failures in py37 but not py27? + freq = self.freq if is_period_dtype(self) else None + return self._shallow_copy(new_values, freq=freq) + + @classmethod + def _concat_same_type(cls, to_concat): + # for TimedeltaArray and PeriodArray; DatetimeArray overrides + freqs = {x.freq for x in to_concat} + assert len(freqs) == 1 + freq = list(freqs)[0] + values = np.concatenate([x._data for x in to_concat]) + return cls._simple_new(values, freq=freq) + # ------------------------------------------------------------------ # Array-like Methods @@ -211,6 +271,10 @@ def astype(self, dtype, copy=True): # ------------------------------------------------------------------ # Null Handling + def isna(self): + # EA Interface + return self._isnan + @property # NB: override with cache_readonly in immutable subclasses def _isnan(self): """ return if each value is nan""" @@ -352,13 +416,13 @@ def _add_datelike(self, other): typ=type(other).__name__)) def _sub_datelike(self, other): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _sub_period(self, other): return NotImplemented def _add_offset(self, offset): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _add_delta(self, other): return NotImplemented @@ -380,7 +444,7 @@ def _add_delta_tdi(self, other): Add a delta of a TimedeltaIndex return the i8 result view """ - if not len(self) == len(other): + if len(self) != len(other): raise ValueError("cannot add indices of unequal length") if isinstance(other, np.ndarray): diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 4cc33d7afd6c8..248cc7e8dcdd5 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -12,7 +12,7 @@ conversion, fields, timezones, resolution as libresolution) -from pandas.util._decorators import cache_readonly +from pandas.util._decorators import cache_readonly, Appender from pandas.errors import PerformanceWarning from pandas import compat @@ -298,6 +298,35 @@ def _generate_range(cls, start, end, periods, freq, tz=None, return cls._simple_new(index.values, freq=freq, tz=tz) + # ---------------------------------------------------------------- + # Extension Array Interface + + @Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__) + def _validate_fill_value(self, fill_value): + if isna(fill_value): + fill_value = iNaT + elif isinstance(fill_value, (datetime, np.datetime64)): + self._assert_tzawareness_compat(fill_value) + fill_value = Timestamp(fill_value).value + else: + raise ValueError("'fill_value' should be a Timestamp. " + "Got '{got}'.".format(got=fill_value)) + return fill_value + + @classmethod + def _concat_same_type(cls, to_concat): + # for TimedeltaArray and PeriodArray; DatetimeArray requires tz + freqs = {x.freq for x in to_concat} + assert len(freqs) == 1 + freq = list(freqs)[0] + + tzs = {x.tz for x in to_concat} + assert len(tzs) == 1 + tz = list(tzs)[0] + + values = np.concatenate([x._data for x in to_concat]) + return cls._simple_new(values, freq=freq, tz=tz) + # ----------------------------------------------------------------- # Descriptive Properties diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 8624ddd8965e8..655f50d6d04cf 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -15,13 +15,14 @@ from pandas._libs.tslibs.fields import isleapyear_arr from pandas import compat -from pandas.util._decorators import (cache_readonly, deprecate_kwarg) +from pandas.util._decorators import cache_readonly, deprecate_kwarg, Appender from pandas.core.dtypes.common import ( is_integer_dtype, is_float_dtype, is_period_dtype, is_timedelta64_dtype, is_datetime64_dtype, _TD_DTYPE) from pandas.core.dtypes.dtypes import PeriodDtype from pandas.core.dtypes.generic import ABCSeries +from pandas.core.dtypes.missing import isna import pandas.core.common as com @@ -193,6 +194,23 @@ def _generate_range(cls, start, end, periods, freq, fields): return subarr, freq + # -------------------------------------------------------------------- + # ExtensionArray Interface + + @Appender(DatetimeLikeArrayMixin._validate_fill_value.__doc__) + def _validate_fill_value(self, fill_value): + if isna(fill_value): + fill_value = iNaT + elif isinstance(fill_value, Period): + if fill_value.freq != self.freq: + raise ValueError("'fill_value' freq must match own " + "freq ({freq})".format(freq=self.freq)) + fill_value = fill_value.ordinal + else: + raise ValueError("'fill_value' should be a Period. " + "Got '{got}'.".format(got=fill_value)) + return fill_value + # -------------------------------------------------------------------- # Vectorized analogues of Period properties diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 4904a90ab7b2b..d55ff1a41bc93 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -8,6 +8,8 @@ from pandas._libs.tslibs.fields import get_timedelta_field from pandas._libs.tslibs.timedeltas import array_to_timedelta64 +from pandas.util._decorators import Appender + from pandas import compat from pandas.core.dtypes.common import ( @@ -180,6 +182,20 @@ def _generate_range(cls, start, end, periods, freq, closed=None, **kwargs): return index + # ---------------------------------------------------------------- + # Extension Array Interface + + @Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__) + def _validate_fill_value(self, fill_value): + if isna(fill_value): + fill_value = iNaT + elif isinstance(fill_value, (timedelta, np.timedelta64, Tick)): + fill_value = Timedelta(fill_value).value + else: + raise ValueError("'fill_value' should be a Timedelta. " + "Got '{got}'.".format(got=fill_value)) + return fill_value + # ---------------------------------------------------------------- # Arithmetic Methods diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 6bb4241451b3f..48d87648c09c1 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -45,7 +45,116 @@ def datetime_index(request): return pi -class TestDatetimeArray(object): +@pytest.fixture +def timedelta_index(request): + """ + A fixture to provide TimedeltaIndex objects with different frequencies. + Most TimedeltaArray behavior is already tested in TimedeltaIndex tests, + so here we just test that the TimedeltaArray behavior matches + the TimedeltaIndex behavior. + """ + # TODO: flesh this out + return pd.TimedeltaIndex(['1 Day', '3 Hours', 'NaT']) + + +def index_to_array(index): + """ + Helper function to construct a Datetime/Timedelta/Period Array from an + instance of the corresponding Index subclass. + """ + if isinstance(index, pd.DatetimeIndex): + return DatetimeArrayMixin(index) + elif isinstance(index, pd.TimedeltaIndex): + return TimedeltaArrayMixin(index) + elif isinstance(index, pd.PeriodIndex): + return PeriodArrayMixin(index) + else: + raise TypeError(type(index)) + + +class SharedTests(object): + index_cls = None + + def test_take(self): + data = np.arange(100, dtype='i8') + np.random.shuffle(data) + + idx = self.index_cls._simple_new(data, freq='D') + arr = index_to_array(idx) + + takers = [1, 4, 94] + result = arr.take(takers) + expected = idx.take(takers) + + tm.assert_index_equal(self.index_cls(result), expected) + + takers = np.array([1, 4, 94]) + result = arr.take(takers) + expected = idx.take(takers) + + tm.assert_index_equal(self.index_cls(result), expected) + + def test_take_fill(self): + data = np.arange(10, dtype='i8') + + idx = self.index_cls._simple_new(data, freq='D') + arr = index_to_array(idx) + + result = arr.take([-1, 1], allow_fill=True, fill_value=None) + assert result[0] is pd.NaT + + result = arr.take([-1, 1], allow_fill=True, fill_value=np.nan) + assert result[0] is pd.NaT + + result = arr.take([-1, 1], allow_fill=True, fill_value=pd.NaT) + assert result[0] is pd.NaT + + with pytest.raises(ValueError): + arr.take([0, 1], allow_fill=True, fill_value=2) + + with pytest.raises(ValueError): + arr.take([0, 1], allow_fill=True, fill_value=2.0) + + with pytest.raises(ValueError): + arr.take([0, 1], allow_fill=True, + fill_value=pd.Timestamp.now().time) + + def test_concat_same_type(self): + data = np.arange(10, dtype='i8') + + idx = self.index_cls._simple_new(data, freq='D').insert(0, pd.NaT) + arr = index_to_array(idx) + + result = arr._concat_same_type([arr[:-1], arr[1:], arr]) + expected = idx._concat_same_dtype([idx[:-1], idx[1:], idx], None) + + tm.assert_index_equal(self.index_cls(result), expected) + + +class TestDatetimeArray(SharedTests): + index_cls = pd.DatetimeIndex + + def test_take_fill_valid(self, datetime_index, tz_naive_fixture): + dti = datetime_index.tz_localize(tz_naive_fixture) + arr = index_to_array(dti) + + now = pd.Timestamp.now().tz_localize(dti.tz) + result = arr.take([-1, 1], allow_fill=True, fill_value=now) + assert result[0] == now + + with pytest.raises(ValueError): + # fill_value Timedelta invalid + arr.take([-1, 1], allow_fill=True, fill_value=now - now) + + with pytest.raises(ValueError): + # fill_value Period invalid + arr.take([-1, 1], allow_fill=True, fill_value=pd.Period('2014Q1')) + + tz = None if dti.tz is not None else 'US/Eastern' + now = pd.Timestamp.now().tz_localize(tz) + with pytest.raises(TypeError): + # Timestamp with mismatched tz-awareness + arr.take([-1, 1], allow_fill=True, fill_value=now) def test_from_dti(self, tz_naive_fixture): tz = tz_naive_fixture @@ -102,8 +211,40 @@ def test_int_properties(self, datetime_index, propname): tm.assert_numpy_array_equal(result, expected) + def test_concat_same_type_invalid(self, datetime_index): + # different timezones + dti = datetime_index + arr = DatetimeArrayMixin(dti) + + if arr.tz is None: + other = arr.tz_localize('UTC') + else: + other = arr.tz_localize(None) + + with pytest.raises(AssertionError): + arr._concat_same_type([arr, other]) + + +class TestTimedeltaArray(SharedTests): + index_cls = pd.TimedeltaIndex + + def test_take_fill_valid(self, timedelta_index): + tdi = timedelta_index + arr = index_to_array(tdi) + + td1 = pd.Timedelta(days=1) + result = arr.take([-1, 1], allow_fill=True, fill_value=td1) + assert result[0] == td1 + + now = pd.Timestamp.now() + with pytest.raises(ValueError): + # fill_value Timestamp invalid + arr.take([0, 1], allow_fill=True, fill_value=now) + + with pytest.raises(ValueError): + # fill_value Period invalid + arr.take([0, 1], allow_fill=True, fill_value=now.to_period('D')) -class TestTimedeltaArray(object): def test_from_tdi(self): tdi = pd.TimedeltaIndex(['1 Day', '3 Hours']) arr = TimedeltaArrayMixin(tdi) @@ -122,8 +263,43 @@ def test_astype_object(self): assert asobj.dtype == 'O' assert list(asobj) == list(tdi) + def test_concat_same_type_invalid(self, timedelta_index): + # different freqs + tdi = timedelta_index + arr = TimedeltaArrayMixin(tdi) -class TestPeriodArray(object): + other = pd.timedelta_range('1D', periods=5, freq='2D') + # FIXME: TimedeltaArray should inherit freq='2D' without specifying it + other = TimedeltaArrayMixin(other, freq='2D') + assert other.freq != arr.freq + + with pytest.raises(AssertionError): + arr._concat_same_type([arr, other]) + + +class TestPeriodArray(SharedTests): + index_cls = pd.PeriodIndex + + def test_take_fill_valid(self, period_index): + pi = period_index + arr = index_to_array(pi) + + now = pd.Timestamp.now().to_period(pi.freq) + result = arr.take([-1, 1], allow_fill=True, fill_value=now) + assert result[0] == now + + with pytest.raises(ValueError): + # fill_value Period with mis-matched freq invalid + arr.take([0, 1], allow_fill=True, + fill_value=pd.Timestamp.now().to_period(2 * pi.freq)) + + with pytest.raises(ValueError): + # fill_value Timedelta invalid + arr.take([0, 1], allow_fill=True, fill_value=pd.Timedelta(days=1)) + + with pytest.raises(ValueError): + # fill_value Timestamp invalid + arr.take([0, 1], allow_fill=True, fill_value=now.to_timestamp()) def test_from_pi(self, period_index): pi = period_index @@ -176,3 +352,15 @@ def test_int_properties(self, period_index, propname): expected = np.array(getattr(pi, propname)) tm.assert_numpy_array_equal(result, expected) + + def test_concat_same_type_invalid(self, period_index): + # different freqs + pi = period_index + arr = PeriodArrayMixin(pi) + + other = pd.period_range('2016Q3', periods=5, freq='3Q') + other = PeriodArrayMixin(other) + assert other.freq != arr.freq + + with pytest.raises(AssertionError): + arr._concat_same_type([arr, other])