From da4c89740e9635a8b9be3ff91b75b8bda3b1113a Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 9 May 2014 09:22:44 -0400 Subject: [PATCH] BUG: cache coherence issue with chain indexing and setitem (GH7084) --- doc/source/release.rst | 2 ++ pandas/core/frame.py | 2 +- pandas/core/generic.py | 9 ++++++++- pandas/core/internals.py | 7 +++++++ pandas/core/series.py | 1 + pandas/tests/test_indexing.py | 29 ++++++++++++++++++++++++++--- 6 files changed, 45 insertions(+), 5 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 8422efd4247d1..5eaa0aa469a3c 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -482,6 +482,8 @@ Bug Fixes were being passed to plotting method - :func:`read_fwf` treats ``None`` in ``colspec`` like regular python slices. It now reads from the beginning or until the end of the line when ``colspec`` contains a ``None`` (previously raised a ``TypeError``) +- Bug in cache coherence with chained indexing and slicing; add ``_is_view`` property to ``NDFrame`` to correctly predict + views; mark ``is_copy`` on ``xs` only if its an actual copy (and not a view) (:issue:`7084`) pandas 0.13.1 ------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ba237e5cd04c4..4345154437bf5 100755 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1632,7 +1632,7 @@ def _ixs(self, i, axis=0): name=label, fastpath=True) # this is a cached value, mark it so - result._set_as_cached(i, self) + result._set_as_cached(label, self) return result diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 01af7534d458d..b2e7120a21062 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1091,6 +1091,11 @@ def _is_cached(self): cacher = getattr(self, '_cacher', None) return cacher is not None + @property + def _is_view(self): + """ boolean : return if I am a view of another array """ + return self._data.is_view + def _maybe_update_cacher(self, clear=False): """ see if we need to update our parent cacher if clear, then clear our cache """ @@ -1372,7 +1377,9 @@ def xs(self, key, axis=0, level=None, copy=None, drop_level=True): result = self[loc] result.index = new_index - result._set_is_copy(self) + # this could be a view + # but only in a single-dtyped view slicable case + result._set_is_copy(self, copy=not result._is_view) return result _xs = xs diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 48d047baaa6c0..de93330f10271 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -2289,6 +2289,13 @@ def is_datelike_mixed_type(self): self._consolidate_inplace() return any([block.is_datelike for block in self.blocks]) + @property + def is_view(self): + """ return a boolean if we are a single block and are a view """ + if len(self.blocks) == 1: + return self.blocks[0].values.base is not None + return False + def get_bool_data(self, copy=False): """ Parameters diff --git a/pandas/core/series.py b/pandas/core/series.py index 74f038b2bad23..fc9b9ad936351 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -693,6 +693,7 @@ def _set_values(self, key, value): if isinstance(key, Series): key = key.values self._data = self._data.setitem(indexer=key, value=value) + self._maybe_update_cacher() # help out SparseSeries _get_val_at = ndarray.__getitem__ diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index d42babc7cddbe..e36fdffc8cc31 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -2666,9 +2666,10 @@ def test_cache_updating(self): df.index = index # setting via chained assignment - def f(): - df.loc[0]['z'].iloc[0] = 1. - self.assertRaises(com.SettingWithCopyError, f) + # but actually works, since everything is a view + df.loc[0]['z'].iloc[0] = 1. + result = df.loc[(0,0),'z'] + self.assertEqual(result, 1) # correct setting df.loc[(0,0),'z'] = 2 @@ -2710,6 +2711,28 @@ def test_setitem_cache_updating(self): self.assertEqual(df.ix[0,'c'], 0.0) self.assertEqual(df.ix[7,'c'], 1.0) + # GH 7084 + # not updating cache on series setting with slices + out = DataFrame({'A': [0, 0, 0]}, index=date_range('5/7/2014', '5/9/2014')) + df = DataFrame({'C': ['A', 'A', 'A'], 'D': [100, 200, 300]}) + + #loop through df to update out + six = Timestamp('5/7/2014') + eix = Timestamp('5/9/2014') + for ix, row in df.iterrows(): + out[row['C']][six:eix] = out[row['C']][six:eix] + row['D'] + + expected = DataFrame({'A': [600, 600, 600]}, index=date_range('5/7/2014', '5/9/2014')) + assert_frame_equal(out, expected) + assert_series_equal(out['A'], expected['A']) + + out = DataFrame({'A': [0, 0, 0]}, index=date_range('5/7/2014', '5/9/2014')) + for ix, row in df.iterrows(): + out.loc[six:eix,row['C']] += row['D'] + + assert_frame_equal(out, expected) + assert_series_equal(out['A'], expected['A']) + def test_setitem_chained_setfault(self): # GH6026