From bda34669e8dd77c001921e4ce9c1bb7bc4d8c59c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Dec 2018 16:37:35 -0600 Subject: [PATCH 1/3] Datetimelike __setitem__ Split from #24024. --- pandas/core/arrays/datetimelike.py | 46 ++++++++++++++++++++++++ pandas/core/arrays/datetimes.py | 3 ++ pandas/core/arrays/period.py | 42 ---------------------- pandas/core/arrays/timedeltas.py | 3 ++ pandas/tests/arrays/test_datetimelike.py | 25 +++++++++++++ pandas/tests/arrays/test_datetimes.py | 16 +++++++++ pandas/tests/arrays/test_timedeltas.py | 5 +++ 7 files changed, 98 insertions(+), 42 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index df2b5977bbe7c..dbbbe7c91f3cc 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -478,6 +478,52 @@ def __getitem__(self, key): return self._simple_new(result, **attribs) + def __setitem__( + self, + key, # type: Union[int, Sequence[int], Sequence[bool], slice] + value, # type: Union[NaTType, Scalar, Sequence[Scalar]] + ): + # type: (...) -> None + # I'm fudging the types a bit here. The "Scalar" above really depends + # on type(self). For PeriodArray, it's Period (or stuff coercible + # to a period in from_sequence). For DatetimeArray, it's Timestamp... + # I don't know if mypy can do that, possibly with Generics. + # https://mypy.readthedocs.io/en/latest/generics.html + + if is_list_like(value): + is_slice = isinstance(key, slice) + if (not is_slice + and len(key) != len(value) + and not com.is_bool_indexer(key)): + msg = ("shape mismatch: value array of length '{}' does not " + "match indexing result of length '{}'.") + raise ValueError(msg.format(len(key), len(value))) + if not is_slice and len(key) == 0: + return + + value = type(self)._from_sequence(value, dtype=self.dtype) + self._check_compatible_with(value) + value = value.asi8 + elif isinstance(value, self._scalar_type): + self._check_compatible_with(value) + value = self._unbox_scalar(value) + elif isna(value) or value == iNaT: + value = iNaT + else: + msg = ( + "'value' should be a '{scalar}', 'NaT', or array of those. " + "Got '{typ}' instead." + ) + raise TypeError(msg.format(scalar=self._scalar_type.__name__, + typ=type(value).__name__)) + self._data[key] = value + self._maybe_clear_freq() + + def _maybe_clear_freq(self): + # inplace operations like __setitem__ may invalidate the freq of + # DatetimeArray and TimedeltaArray + pass + def astype(self, dtype, copy=True): # Some notes on cases we don't have to handle here in the base class: # 1. PeriodArray.astype handles period -> period diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 79dcc677973cc..6c73f0ec16c15 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -368,6 +368,9 @@ def _check_compatible_with(self, other): raise ValueError("Timezones don't match. '{own} != {other}'" .format(own=self.tz, other=other.tz)) + def _maybe_clear_freq(self): + self._freq = None + # ----------------------------------------------------------------- # Descriptive Properties diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 2a7422aedb8a3..5ff244b5fd7ae 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -371,48 +371,6 @@ def _formatter(self, boxed=False): return str return "'{}'".format - def __setitem__( - self, - key, # type: Union[int, Sequence[int], Sequence[bool], slice] - value # type: Union[NaTType, Period, Sequence[Period]] - ): - # type: (...) -> None - # n.b. the type on `value` is a bit too restrictive. - # we also accept a sequence of stuff coercible to a PeriodArray - # by period_array, which includes things like ndarray[object], - # ndarray[datetime64ns]. I think ndarray[int] / ndarray[str] won't - # work, since the freq can't be inferred. - if is_list_like(value): - is_slice = isinstance(key, slice) - if (not is_slice - and len(key) != len(value) - and not com.is_bool_indexer(key)): - msg = ("shape mismatch: value array of length '{}' does not " - "match indexing result of length '{}'.") - raise ValueError(msg.format(len(key), len(value))) - if not is_slice and len(key) == 0: - return - - value = period_array(value) - - if self.freqstr != value.freqstr: - _raise_on_incompatible(self, value) - - value = value.asi8 - elif isinstance(value, Period): - - if self.freqstr != value.freqstr: - _raise_on_incompatible(self, value) - - value = value.ordinal - elif isna(value): - value = iNaT - else: - msg = ("'value' should be a 'Period', 'NaT', or array of those. " - "Got '{}' instead.".format(type(value).__name__)) - raise TypeError(msg) - self._data[key] = value - @Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__) def _validate_fill_value(self, fill_value): if isna(fill_value): diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 376c99df080d8..3611e3696e390 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -238,6 +238,9 @@ def _check_compatible_with(self, other): # we don't have anything to validate. pass + def _maybe_clear_freq(self): + self._freq = None + # ---------------------------------------------------------------- # Array-Like / EA-Interface Methods diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 483f25513775e..6bf48aad96f07 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -182,6 +182,31 @@ def test_searchsorted(self): result = arr.searchsorted(pd.NaT) assert result == 0 + def test_setitem(self): + data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9 + arr = self.array_cls(data, freq='D') + + arr[0] = arr[1] + expected = np.arange(10, dtype='i8') * 24 * 3600 * 10**9 + expected[0] = expected[1] + + tm.assert_numpy_array_equal(arr.asi8, expected) + + arr[:2] = arr[-2:] + expected[:2] = expected[-2:] + tm.assert_numpy_array_equal(arr.asi8, expected) + + def test_setitem_raises(self): + data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9 + arr = self.array_cls(data, freq='D') + val = arr[0] + + with pytest.raises(IndexError, match="index 12 is out of bounds"): + arr[12] = val + + with pytest.raises(TypeError, match="'value' should be a.* 'object'"): + arr[0] = object() + class TestDatetimeArray(SharedTests): index_cls = pd.DatetimeIndex diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 871bc440825bf..80c87665236d3 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -74,3 +74,19 @@ def test_tz_setter_raises(self): arr = DatetimeArray._from_sequence(['2000'], tz='US/Central') with pytest.raises(AttributeError, match='tz_localize'): arr.tz = 'UTC' + + def test_setitem_different_tz_raises(self): + data = np.array([1, 2, 3], dtype='M8[ns]') + arr = DatetimeArray(data, copy=False, + dtype=DatetimeTZDtype(tz="US/Central")) + with pytest.raises(ValueError, match="None"): + arr[0] = pd.Timestamp('2000') + + with pytest.raises(ValueError, match="US/Central"): + arr[0] = pd.Timestamp('2000', tz="US/Eastern") + + def test_setitem_clears_freq(self): + a = DatetimeArray(pd.date_range('2000', periods=2, freq='D', + tz='US/Central')) + a[0] = pd.Timestamp("2000", tz="US/Central") + assert a.freq is None diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 287079165284b..3264550404642 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -72,3 +72,8 @@ def test_astype_int(self, dtype): assert result.dtype == expected_dtype tm.assert_numpy_array_equal(result, expected) + + def test_setitem_clears_freq(self): + a = TimedeltaArray(pd.timedelta_range('1H', periods=2, freq='H')) + a[0] = pd.Timedelta("1H") + assert a.freq is None From 2dc85b7b1fd1483a74998f1f08e5901446c9bd54 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Dec 2018 21:00:07 -0600 Subject: [PATCH 2/3] Updates * Failing test for setting a sequence into a scalar * Fixed implementation --- pandas/core/arrays/datetimelike.py | 4 ++++ pandas/tests/extension/base/setitem.py | 5 +++++ pandas/tests/extension/decimal/array.py | 2 ++ 3 files changed, 11 insertions(+) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index dbbbe7c91f3cc..a6f603d16affe 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -492,6 +492,10 @@ def __setitem__( if is_list_like(value): is_slice = isinstance(key, slice) + + if lib.is_scalar(key): + raise ValueError("setting an array element with a sequence.") + if (not is_slice and len(key) != len(value) and not com.is_bool_indexer(key)): diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 5c767c28643c9..42fda982f7339 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -182,3 +182,8 @@ def test_setitem_slice_array(self, data): arr = data[:5].copy() arr[:5] = data[-5:] self.assert_extension_array_equal(arr, data[-5:]) + + def test_setitem_scalar_key_sequence_raise(self, data): + arr = data[:5].copy() + with pytest.raises(ValueError): + arr[0] = arr[[0, 1]] diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 05671bdf13318..7e618dfd2b92e 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -108,6 +108,8 @@ def astype(self, dtype, copy=True): def __setitem__(self, key, value): if pd.api.types.is_list_like(value): + if pd.api.types.is_scalar(key): + raise ValueError("setting an array element with a sequence.") value = [decimal.Decimal(v) for v in value] else: value = decimal.Decimal(value) From e1b7d1da98bbf4bb37a986b17c542e280bbdd677 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 29 Dec 2018 06:17:23 -0600 Subject: [PATCH 3/3] Override Index --- pandas/core/indexes/datetimelike.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 0fe8f73977e6b..4dccf4be4edad 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -38,6 +38,7 @@ class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): # override DatetimeLikeArrayMixin method copy = Index.copy view = Index.view + __setitem__ = Index.__setitem__ # DatetimeLikeArrayMixin assumes subclasses are mutable, so these are # properties there. They can be made into cache_readonly for Index