diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 198e832ca4603..9c2d4cd5729d2 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2923,7 +2923,9 @@ def _try_coerce_result(self, result): # allow passing of > 1dim if its trivial if result.ndim > 1: result = result.reshape(np.prod(result.shape)) - result = self.values._shallow_copy(result) + + # GH#24096 new values invalidates a frequency + result = self.values._shallow_copy(result, freq=None) return result diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 5859dc9e858b7..910690a986c1c 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -196,6 +196,14 @@ def init_dict(data, index, columns, dtype=None): arrays.loc[missing] = [v] * missing.sum() else: + + for key in data: + if (isinstance(data[key], ABCDatetimeIndex) and + data[key].tz is not None): + # GH#24096 need copy to be deep for datetime64tz case + # TODO: See if we can avoid these copies + data[key] = data[key].copy(deep=True) + keys = com.dict_keys_to_ordered_list(data) columns = data_names = Index(keys) arrays = [data[k] for k in keys] diff --git a/pandas/core/series.py b/pandas/core/series.py index 6b4c9927ef0f1..6f5ab43ff6756 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -21,7 +21,7 @@ is_extension_array_dtype, is_extension_type, is_hashable, is_integer, is_iterator, is_list_like, is_scalar, is_string_like, is_timedelta64_dtype) from pandas.core.dtypes.generic import ( - ABCDataFrame, ABCSeries, ABCSparseArray, ABCSparseSeries) + ABCDataFrame, ABCDatetimeIndex, ABCSeries, ABCSparseArray, ABCSparseSeries) from pandas.core.dtypes.missing import ( isna, na_value_for_dtype, notna, remove_na_arraylike) @@ -182,6 +182,11 @@ def __init__(self, data=None, index=None, dtype=None, name=None, else: # need to copy to avoid aliasing issues data = data._values.copy() + if (isinstance(data, ABCDatetimeIndex) and + data.tz is not None): + # GH#24096 need copy to be deep for datetime64tz case + # TODO: See if we can avoid these copies + data = data._values.copy(deep=True) copy = False elif isinstance(data, np.ndarray): diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 224e56777f6b4..647077a0428f3 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -28,6 +28,22 @@ class TestDataFrameBlockInternals(): + def test_setitem_invalidates_datetime_index_freq(self): + # GH#24096 altering a datetime64tz column inplace invalidates the + # `freq` attribute on the underlying DatetimeIndex + + dti = date_range('20130101', periods=3, tz='US/Eastern') + ts = dti[1] + + df = DataFrame({'B': dti}) + assert df['B']._values.freq == 'D' + + df.iloc[1, 0] = pd.NaT + assert df['B']._values.freq is None + + # check that the DatetimeIndex was not altered in place + assert dti.freq == 'D' + assert dti[1] == ts def test_cast_internals(self, float_frame): casted = DataFrame(float_frame._data, dtype=int) diff --git a/pandas/tests/series/test_block_internals.py b/pandas/tests/series/test_block_internals.py new file mode 100644 index 0000000000000..ccfb169cc2f8d --- /dev/null +++ b/pandas/tests/series/test_block_internals.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- + +import pandas as pd + +# Segregated collection of methods that require the BlockManager internal data +# structure + + +class TestSeriesBlockInternals(object): + + def test_setitem_invalidates_datetime_index_freq(self): + # GH#24096 altering a datetime64tz Series inplace invalidates the + # `freq` attribute on the underlying DatetimeIndex + + dti = pd.date_range('20130101', periods=3, tz='US/Eastern') + ts = dti[1] + ser = pd.Series(dti) + assert ser._values is not dti + assert ser._values._data.base is not dti._data.base + assert dti.freq == 'D' + ser.iloc[1] = pd.NaT + assert ser._values.freq is None + + # check that the DatetimeIndex was not altered in place + assert ser._values is not dti + assert ser._values._data.base is not dti._data.base + assert dti[1] == ts + assert dti.freq == 'D' + + def test_dt64tz_setitem_does_not_mutate_dti(self): + # GH#21907, GH#24096 + dti = pd.date_range('2016-01-01', periods=10, tz='US/Pacific') + ts = dti[0] + ser = pd.Series(dti) + assert ser._values is not dti + assert ser._values._data.base is not dti._data.base + assert ser._data.blocks[0].values is not dti + assert ser._data.blocks[0].values._data.base is not dti._data.base + + ser[::3] = pd.NaT + assert ser[0] is pd.NaT + assert dti[0] == ts