Skip to content

Datetimelike __setitem__ #24477

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 29, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,52 @@ def __getitem__(self, key):

return self._simple_new(result, **attribs)

def __setitem__(
self,
key, # type: Union[int, Sequence[int], Sequence[bool], slice]
value, # type: Union[NaTType, Scalar, Sequence[Scalar]]
):
# type: (...) -> None
# I'm fudging the types a bit here. The "Scalar" above really depends
# on type(self). For PeriodArray, it's Period (or stuff coercible
# to a period in from_sequence). For DatetimeArray, it's Timestamp...
# I don't know if mypy can do that, possibly with Generics.
# https://mypy.readthedocs.io/en/latest/generics.html

if is_list_like(value):
is_slice = isinstance(key, slice)
if (not is_slice
and len(key) != len(value)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

key could be a scalar here right? (in which case u will get an odd exception about len of unsized object)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hrm... yeah

In [3]: pd.period_range('2000', periods=2)._data
Out[3]:
<PeriodArray>
['2000-01-01', '2000-01-02']
Length: 2, dtype: period[D]

In [4]: arr = pd.period_range('2000', periods=2)._data

In [5]: arr[0] = arr[[0, 1]]
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-5-9a9ffe90069d> in <module>
----> 1 arr[0] = arr[[0, 1]]

~/sandbox/pandas/pandas/core/arrays/datetimelike.py in __setitem__(self, key, value)
    494             is_slice = isinstance(key, slice)
    495             if (not is_slice
--> 496                     and len(key) != len(value)
    497                     and not com.is_bool_indexer(key)):
    498                 msg = ("shape mismatch: value array of length '{}' does not "

TypeError: object of type 'int' has no len()

This is a gap in the base extension tests as well. I'll ad one. that should be a... what? ValueError for trying to set list-like into a single element?

Copy link
Contributor Author

@TomAugspurger TomAugspurger Dec 29, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NumPy raises with ValueError: setting an array element with a sequence. which seems like a fine error message to me.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep seems good
maybe just let it fall thru this if will work?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seemed a tad easier to explicitly check for a scalar key here.

Actually, grr, this is kinda annoying but NumPy allows setting a sequence of length 1.

In [2]: x = np.array([1, 2])

In [3]: x[0] = x[[0]]

but now I would raise on that.

In [10]: arr = pd.period_range('2000', periods=2)._data

In [11]: arr[0] = arr[[0]]
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-11-666653b9c59a> in <module>
----> 1 arr[0] = arr[[0]]

~/sandbox/pandas/pandas/core/arrays/datetimelike.py in __setitem__(self, key, value)
    495
    496             if lib.is_scalar(key):
--> 497                 raise ValueError("setting an array element with a sequence.")
    498
    499             if (not is_slice

ValueError: setting an array element with a sequence.

Thoughts? I think this should raise, since a length-1 sequence is more like a sequence than a scalar.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah i think we raise on this

this is why setitem in Block is so complicated :)

and not com.is_bool_indexer(key)):
msg = ("shape mismatch: value array of length '{}' does not "
"match indexing result of length '{}'.")
raise ValueError(msg.format(len(key), len(value)))
if not is_slice and len(key) == 0:
return

value = type(self)._from_sequence(value, dtype=self.dtype)
self._check_compatible_with(value)
value = value.asi8
elif isinstance(value, self._scalar_type):
self._check_compatible_with(value)
value = self._unbox_scalar(value)
elif isna(value) or value == iNaT:
value = iNaT
else:
msg = (
"'value' should be a '{scalar}', 'NaT', or array of those. "
"Got '{typ}' instead."
)
raise TypeError(msg.format(scalar=self._scalar_type.__name__,
typ=type(value).__name__))
self._data[key] = value
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would be nice to move smoe of this logic a big higher up to EA base if possible and/or make some helper methods to avoid code duplication of EA's implementating setitem, but for another time.

self._maybe_clear_freq()

def _maybe_clear_freq(self):
# inplace operations like __setitem__ may invalidate the freq of
# DatetimeArray and TimedeltaArray
pass

def astype(self, dtype, copy=True):
# Some notes on cases we don't have to handle here in the base class:
# 1. PeriodArray.astype handles period -> period
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,9 @@ def _check_compatible_with(self, other):
raise ValueError("Timezones don't match. '{own} != {other}'"
.format(own=self.tz, other=other.tz))

def _maybe_clear_freq(self):
self._freq = None

# -----------------------------------------------------------------
# Descriptive Properties

Expand Down
42 changes: 0 additions & 42 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,48 +371,6 @@ def _formatter(self, boxed=False):
return str
return "'{}'".format

def __setitem__(
self,
key, # type: Union[int, Sequence[int], Sequence[bool], slice]
value # type: Union[NaTType, Period, Sequence[Period]]
):
# type: (...) -> None
# n.b. the type on `value` is a bit too restrictive.
# we also accept a sequence of stuff coercible to a PeriodArray
# by period_array, which includes things like ndarray[object],
# ndarray[datetime64ns]. I think ndarray[int] / ndarray[str] won't
# work, since the freq can't be inferred.
if is_list_like(value):
is_slice = isinstance(key, slice)
if (not is_slice
and len(key) != len(value)
and not com.is_bool_indexer(key)):
msg = ("shape mismatch: value array of length '{}' does not "
"match indexing result of length '{}'.")
raise ValueError(msg.format(len(key), len(value)))
if not is_slice and len(key) == 0:
return

value = period_array(value)

if self.freqstr != value.freqstr:
_raise_on_incompatible(self, value)

value = value.asi8
elif isinstance(value, Period):

if self.freqstr != value.freqstr:
_raise_on_incompatible(self, value)

value = value.ordinal
elif isna(value):
value = iNaT
else:
msg = ("'value' should be a 'Period', 'NaT', or array of those. "
"Got '{}' instead.".format(type(value).__name__))
raise TypeError(msg)
self._data[key] = value

@Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__)
def _validate_fill_value(self, fill_value):
if isna(fill_value):
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,9 @@ def _check_compatible_with(self, other):
# we don't have anything to validate.
pass

def _maybe_clear_freq(self):
self._freq = None

# ----------------------------------------------------------------
# Array-Like / EA-Interface Methods

Expand Down
25 changes: 25 additions & 0 deletions pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,31 @@ def test_searchsorted(self):
result = arr.searchsorted(pd.NaT)
assert result == 0

def test_setitem(self):
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
arr = self.array_cls(data, freq='D')

arr[0] = arr[1]
expected = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
expected[0] = expected[1]

tm.assert_numpy_array_equal(arr.asi8, expected)

arr[:2] = arr[-2:]
expected[:2] = expected[-2:]
tm.assert_numpy_array_equal(arr.asi8, expected)

def test_setitem_raises(self):
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
arr = self.array_cls(data, freq='D')
val = arr[0]

with pytest.raises(IndexError, match="index 12 is out of bounds"):
arr[12] = val

with pytest.raises(TypeError, match="'value' should be a.* 'object'"):
arr[0] = object()


class TestDatetimeArray(SharedTests):
index_cls = pd.DatetimeIndex
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,19 @@ def test_tz_setter_raises(self):
arr = DatetimeArray._from_sequence(['2000'], tz='US/Central')
with pytest.raises(AttributeError, match='tz_localize'):
arr.tz = 'UTC'

def test_setitem_different_tz_raises(self):
data = np.array([1, 2, 3], dtype='M8[ns]')
arr = DatetimeArray(data, copy=False,
dtype=DatetimeTZDtype(tz="US/Central"))
with pytest.raises(ValueError, match="None"):
arr[0] = pd.Timestamp('2000')

with pytest.raises(ValueError, match="US/Central"):
arr[0] = pd.Timestamp('2000', tz="US/Eastern")

def test_setitem_clears_freq(self):
a = DatetimeArray(pd.date_range('2000', periods=2, freq='D',
tz='US/Central'))
a[0] = pd.Timestamp("2000", tz="US/Central")
assert a.freq is None
5 changes: 5 additions & 0 deletions pandas/tests/arrays/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,8 @@ def test_astype_int(self, dtype):

assert result.dtype == expected_dtype
tm.assert_numpy_array_equal(result, expected)

def test_setitem_clears_freq(self):
a = TimedeltaArray(pd.timedelta_range('1H', periods=2, freq='H'))
a[0] = pd.Timedelta("1H")
assert a.freq is None