Skip to content

BUG: fix mutation of DTI backing Series/DataFrame #24096

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Dec 5, 2018
4 changes: 3 additions & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2923,7 +2923,9 @@ def _try_coerce_result(self, result):
# allow passing of > 1dim if its trivial
if result.ndim > 1:
result = result.reshape(np.prod(result.shape))
result = self.values._shallow_copy(result)

# GH#24096 new values invalidates a frequency
result = self.values._shallow_copy(result, freq=None)

return result

Expand Down
8 changes: 8 additions & 0 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,14 @@ def init_dict(data, index, columns, dtype=None):
arrays.loc[missing] = [v] * missing.sum()

else:

for key in data:
if (isinstance(data[key], ABCDatetimeIndex) and
data[key].tz is not None):
# GH#24096 need copy to be deep for datetime64tz case
# TODO: See if we can avoid these copies
data[key] = data[key].copy(deep=True)

keys = com.dict_keys_to_ordered_list(data)
columns = data_names = Index(keys)
arrays = [data[k] for k in keys]
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
is_extension_array_dtype, is_extension_type, is_hashable, is_integer,
is_iterator, is_list_like, is_scalar, is_string_like, is_timedelta64_dtype)
from pandas.core.dtypes.generic import (
ABCDataFrame, ABCSeries, ABCSparseArray, ABCSparseSeries)
ABCDataFrame, ABCDatetimeIndex, ABCSeries, ABCSparseArray, ABCSparseSeries)
from pandas.core.dtypes.missing import (
isna, na_value_for_dtype, notna, remove_na_arraylike)

Expand Down Expand Up @@ -182,6 +182,11 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
else:
# need to copy to avoid aliasing issues
data = data._values.copy()
if (isinstance(data, ABCDatetimeIndex) and
data.tz is not None):
# GH#24096 need copy to be deep for datetime64tz case
# TODO: See if we can avoid these copies
data = data._values.copy(deep=True)
copy = False

elif isinstance(data, np.ndarray):
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/frame/test_block_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,22 @@


class TestDataFrameBlockInternals():
def test_setitem_invalidates_datetime_index_freq(self):
# GH#24096 altering a datetime64tz column inplace invalidates the
# `freq` attribute on the underlying DatetimeIndex

dti = date_range('20130101', periods=3, tz='US/Eastern')
ts = dti[1]

df = DataFrame({'B': dti})
assert df['B']._values.freq == 'D'

df.iloc[1, 0] = pd.NaT
assert df['B']._values.freq is None

# check that the DatetimeIndex was not altered in place
assert dti.freq == 'D'
assert dti[1] == ts

def test_cast_internals(self, float_frame):
casted = DataFrame(float_frame._data, dtype=int)
Expand Down
42 changes: 42 additions & 0 deletions pandas/tests/series/test_block_internals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-

import pandas as pd

# Segregated collection of methods that require the BlockManager internal data
# structure


class TestSeriesBlockInternals(object):

def test_setitem_invalidates_datetime_index_freq(self):
# GH#24096 altering a datetime64tz Series inplace invalidates the
# `freq` attribute on the underlying DatetimeIndex

dti = pd.date_range('20130101', periods=3, tz='US/Eastern')
ts = dti[1]
ser = pd.Series(dti)
assert ser._values is not dti
assert ser._values._data.base is not dti._data.base
assert dti.freq == 'D'
ser.iloc[1] = pd.NaT
assert ser._values.freq is None

# check that the DatetimeIndex was not altered in place
assert ser._values is not dti
assert ser._values._data.base is not dti._data.base
assert dti[1] == ts
assert dti.freq == 'D'

def test_dt64tz_setitem_does_not_mutate_dti(self):
# GH#21907, GH#24096
dti = pd.date_range('2016-01-01', periods=10, tz='US/Pacific')
ts = dti[0]
ser = pd.Series(dti)
assert ser._values is not dti
assert ser._values._data.base is not dti._data.base
assert ser._data.blocks[0].values is not dti
assert ser._data.blocks[0].values._data.base is not dti._data.base

ser[::3] = pd.NaT
assert ser[0] is pd.NaT
assert dti[0] == ts