-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: fix mutation of DTI backing Series/DataFrame #24096
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
36a251e
03f4eb4
616aaf7
841c4a5
22b32cf
a9e7e40
a66ee4e
4851ffa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,7 +29,8 @@ | |
is_integer, is_integer_dtype, is_iterator, is_list_like, is_object_dtype, | ||
is_scalar, is_string_like, is_timedelta64_dtype, pandas_dtype) | ||
from pandas.core.dtypes.generic import ( | ||
ABCDataFrame, ABCIndexClass, ABCSeries, ABCSparseArray, ABCSparseSeries) | ||
ABCDataFrame, ABCDatetimeIndex, ABCIndexClass, ABCSeries, ABCSparseArray, | ||
ABCSparseSeries) | ||
from pandas.core.dtypes.missing import ( | ||
isna, na_value_for_dtype, notna, remove_na_arraylike) | ||
|
||
|
@@ -189,6 +190,9 @@ def __init__(self, data=None, index=None, dtype=None, name=None, | |
else: | ||
# need to copy to avoid aliasing issues | ||
data = data._values.copy() | ||
if isinstance(data, ABCDatetimeIndex): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. really?? this is a specific problem to Datetime w/tz, this is too heavy handed |
||
# GH#24096 need copy to be deep for datetime64tz case | ||
data = data._values.copy(deep=True) | ||
copy = False | ||
|
||
elif isinstance(data, np.ndarray): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,6 +28,51 @@ | |
|
||
|
||
class TestDataFrameBlockInternals(): | ||
def test_setitem_invalidates_datetime_index_freq(self): | ||
# GH#24096 altering a datetime64tz column inplace invalidates the | ||
# `freq` attribute on the underlying DatetimeIndex | ||
|
||
dti = date_range('20130101', periods=3, tz='US/Eastern') | ||
ts = dti[1] | ||
|
||
df = DataFrame({'B': dti}) | ||
assert df['B']._values.freq == 'D' | ||
|
||
df.iloc[1, 0] = pd.NaT | ||
assert df['B']._values.freq is None | ||
|
||
# check that the DatetimeIndex was not altered in place | ||
assert dti.freq == 'D' | ||
assert dti[1] == ts | ||
|
||
dti = date_range('20130101', periods=3, tz='US/Eastern') | ||
ts = dti[1] | ||
ser = Series(dti) | ||
assert ser._values is not dti | ||
assert ser._values._data.base is not dti._data.base | ||
assert dti.freq == 'D' | ||
ser.iloc[1] = pd.NaT | ||
assert ser._values.freq is None | ||
|
||
# check that the DatetimeIndex was not altered in place | ||
assert ser._values is not dti | ||
assert ser._values._data.base is not dti._data.base | ||
assert dti[1] == ts | ||
assert dti.freq == 'D' | ||
|
||
def test_dt64tz_setitem_does_not_mutate_dti(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is not a series.test_block_internals; not sure if there is somewhere else this might belong There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pls create one There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure |
||
# GH#21907, GH#24096 | ||
dti = pd.date_range('2016-01-01', periods=10, tz='US/Pacific') | ||
ts = dti[0] | ||
ser = pd.Series(dti) | ||
assert ser._values is not dti | ||
assert ser._values._data.base is not dti._data.base | ||
assert ser._data.blocks[0].values is not dti | ||
assert ser._data.blocks[0].values._data.base is not dti._data.base | ||
|
||
ser[::3] = pd.NaT | ||
assert ser[0] is pd.NaT | ||
assert dti[0] == ts | ||
|
||
def test_cast_internals(self, float_frame): | ||
casted = DataFrame(float_frame._data, dtype=int) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
really? we need to deep copy everything?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can be more specific and just make this
ABCDatetimeIndex
, but I don't see any solution that doesn't involve making a copy here.The first thing I tried was to make a copy in
Block.setitem
(circa line 923 in internals.blocks) if we had aDatetimeIndex
. But that breaks a test because it means when we callser.__setitem__
(orser.loc.__setitem__
, or...), we fail to change other Series that are views onser
.AFAICT the only way to do do this is to avoid ever having a
DatetimeIndex
and aSeries
/column share data. This may be simpler to resolve after Blocks are backed byDatetimeArray
instead ofDatetimeIndex
(I'm not 100% sure on this), but ATM this is a blocker to #24074, which is itself a blocker toDatetimeArray
. We could avoid having this be a blocker for #24074 if you were OK with the pickle workaround currently there.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no, the pickle workaround is much worse. for now let's just do this for DTI w/tz only. otherwise this may be a lot of copying. Pls add a TODO around this though.