Skip to content

BUG: fix mutation of DTI backing Series/DataFrame #24096

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Dec 5, 2018
9 changes: 8 additions & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -860,6 +860,11 @@ def setitem(self, indexer, value):
b = self.astype(dtype)
return b.setitem(indexer, value)

if (self._holder is not None and
issubclass(self._holder, ABCIndexClass)):
# avoid altering Index objects in place
values = values.copy()

# value must be storeable at this moment
arr_value = np.array(value)

Expand Down Expand Up @@ -2923,7 +2928,9 @@ def _try_coerce_result(self, result):
# allow passing of > 1dim if its trivial
if result.ndim > 1:
result = result.reshape(np.prod(result.shape))
result = self.values._shallow_copy(result)

# new values invalidates a frequency
result = self.values._shallow_copy(result, freq=None)

return result

Expand Down
31 changes: 31 additions & 0 deletions pandas/tests/frame/test_block_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,37 @@


class TestDataFrameBlockInternals():
def test_setitem_invalidates_datetime_index_freq(self):
# altering a datetime64tz column inplace invalidates the `freq`
# attribute on the underlying DatetimeIndex

df = DataFrame({'B': date_range('20130101', periods=3,
tz='US/Eastern')})
assert df['B']._values.freq == 'D'

df.iloc[1, 0] = pd.NaT
assert df['B']._values.freq is None

ser = Series(date_range('20130101', periods=3,
tz='US/Eastern'))
ts = ser[1]
dti = ser._values
assert dti.freq == 'D'
ser.iloc[1] = pd.NaT
assert ser._values.freq is None

# check that the DatetimeIndex was not altered in place
assert ser._values is not dti
assert dti[1] == ts

def test_dt64tz_setitem_does_not_mutate_dti(self):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is not a series.test_block_internals; not sure if there is somewhere else this might belong

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pls create one

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure

# GH#21907
dti = pd.date_range('2016-01-01', periods=10, tz='US/Pacific')
ts = dti[0]
ser = pd.Series(dti)
ser[::3] = pd.NaT
assert ser[0] is pd.NaT
assert dti[0] == ts

def test_cast_internals(self, float_frame):
casted = DataFrame(float_frame._data, dtype=int)
Expand Down