Skip to content

BUG: cache coherence issue with chain indexing and setitem (GH7084) #7087

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 9, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,8 @@ Bug Fixes
were being passed to plotting method
- :func:`read_fwf` treats ``None`` in ``colspec`` like regular python slices. It now reads from the beginning
or until the end of the line when ``colspec`` contains a ``None`` (previously raised a ``TypeError``)
- Bug in cache coherence with chained indexing and slicing; add ``_is_view`` property to ``NDFrame`` to correctly predict
views; mark ``is_copy`` on ``xs` only if its an actual copy (and not a view) (:issue:`7084`)

pandas 0.13.1
-------------
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1632,7 +1632,7 @@ def _ixs(self, i, axis=0):
name=label, fastpath=True)

# this is a cached value, mark it so
result._set_as_cached(i, self)
result._set_as_cached(label, self)

return result

Expand Down
9 changes: 8 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1091,6 +1091,11 @@ def _is_cached(self):
cacher = getattr(self, '_cacher', None)
return cacher is not None

@property
def _is_view(self):
""" boolean : return if I am a view of another array """
return self._data.is_view

def _maybe_update_cacher(self, clear=False):
""" see if we need to update our parent cacher
if clear, then clear our cache """
Expand Down Expand Up @@ -1372,7 +1377,9 @@ def xs(self, key, axis=0, level=None, copy=None, drop_level=True):
result = self[loc]
result.index = new_index

result._set_is_copy(self)
# this could be a view
# but only in a single-dtyped view slicable case
result._set_is_copy(self, copy=not result._is_view)
return result

_xs = xs
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2289,6 +2289,13 @@ def is_datelike_mixed_type(self):
self._consolidate_inplace()
return any([block.is_datelike for block in self.blocks])

@property
def is_view(self):
""" return a boolean if we are a single block and are a view """
if len(self.blocks) == 1:
return self.blocks[0].values.base is not None
return False

def get_bool_data(self, copy=False):
"""
Parameters
Expand Down
1 change: 1 addition & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,7 @@ def _set_values(self, key, value):
if isinstance(key, Series):
key = key.values
self._data = self._data.setitem(indexer=key, value=value)
self._maybe_update_cacher()

# help out SparseSeries
_get_val_at = ndarray.__getitem__
Expand Down
29 changes: 26 additions & 3 deletions pandas/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2666,9 +2666,10 @@ def test_cache_updating(self):
df.index = index

# setting via chained assignment
def f():
df.loc[0]['z'].iloc[0] = 1.
self.assertRaises(com.SettingWithCopyError, f)
# but actually works, since everything is a view
df.loc[0]['z'].iloc[0] = 1.
result = df.loc[(0,0),'z']
self.assertEqual(result, 1)

# correct setting
df.loc[(0,0),'z'] = 2
Expand Down Expand Up @@ -2710,6 +2711,28 @@ def test_setitem_cache_updating(self):
self.assertEqual(df.ix[0,'c'], 0.0)
self.assertEqual(df.ix[7,'c'], 1.0)

# GH 7084
# not updating cache on series setting with slices
out = DataFrame({'A': [0, 0, 0]}, index=date_range('5/7/2014', '5/9/2014'))
df = DataFrame({'C': ['A', 'A', 'A'], 'D': [100, 200, 300]})

#loop through df to update out
six = Timestamp('5/7/2014')
eix = Timestamp('5/9/2014')
for ix, row in df.iterrows():
out[row['C']][six:eix] = out[row['C']][six:eix] + row['D']

expected = DataFrame({'A': [600, 600, 600]}, index=date_range('5/7/2014', '5/9/2014'))
assert_frame_equal(out, expected)
assert_series_equal(out['A'], expected['A'])

out = DataFrame({'A': [0, 0, 0]}, index=date_range('5/7/2014', '5/9/2014'))
for ix, row in df.iterrows():
out.loc[six:eix,row['C']] += row['D']

assert_frame_equal(out, expected)
assert_series_equal(out['A'], expected['A'])

def test_setitem_chained_setfault(self):

# GH6026
Expand Down