Skip to content

Commit ecaa39c

Browse files
committed
Merge pull request #7087 from jreback/series_cache
BUG: cache coherence issue with chain indexing and setitem (GH7084)
2 parents 16a8902 + da4c897 commit ecaa39c

File tree

6 files changed

+45
-5
lines changed

6 files changed

+45
-5
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,8 @@ Bug Fixes
482482
were being passed to plotting method
483483
- :func:`read_fwf` treats ``None`` in ``colspec`` like regular python slices. It now reads from the beginning
484484
or until the end of the line when ``colspec`` contains a ``None`` (previously raised a ``TypeError``)
485+
- Bug in cache coherence with chained indexing and slicing; add ``_is_view`` property to ``NDFrame`` to correctly predict
486+
views; mark ``is_copy`` on ``xs` only if its an actual copy (and not a view) (:issue:`7084`)
485487
486488
pandas 0.13.1
487489
-------------

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1632,7 +1632,7 @@ def _ixs(self, i, axis=0):
16321632
name=label, fastpath=True)
16331633

16341634
# this is a cached value, mark it so
1635-
result._set_as_cached(i, self)
1635+
result._set_as_cached(label, self)
16361636

16371637
return result
16381638

pandas/core/generic.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -1091,6 +1091,11 @@ def _is_cached(self):
10911091
cacher = getattr(self, '_cacher', None)
10921092
return cacher is not None
10931093

1094+
@property
1095+
def _is_view(self):
1096+
""" boolean : return if I am a view of another array """
1097+
return self._data.is_view
1098+
10941099
def _maybe_update_cacher(self, clear=False):
10951100
""" see if we need to update our parent cacher
10961101
if clear, then clear our cache """
@@ -1372,7 +1377,9 @@ def xs(self, key, axis=0, level=None, copy=None, drop_level=True):
13721377
result = self[loc]
13731378
result.index = new_index
13741379

1375-
result._set_is_copy(self)
1380+
# this could be a view
1381+
# but only in a single-dtyped view slicable case
1382+
result._set_is_copy(self, copy=not result._is_view)
13761383
return result
13771384

13781385
_xs = xs

pandas/core/internals.py

+7
Original file line numberDiff line numberDiff line change
@@ -2289,6 +2289,13 @@ def is_datelike_mixed_type(self):
22892289
self._consolidate_inplace()
22902290
return any([block.is_datelike for block in self.blocks])
22912291

2292+
@property
2293+
def is_view(self):
2294+
""" return a boolean if we are a single block and are a view """
2295+
if len(self.blocks) == 1:
2296+
return self.blocks[0].values.base is not None
2297+
return False
2298+
22922299
def get_bool_data(self, copy=False):
22932300
"""
22942301
Parameters

pandas/core/series.py

+1
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,7 @@ def _set_values(self, key, value):
693693
if isinstance(key, Series):
694694
key = key.values
695695
self._data = self._data.setitem(indexer=key, value=value)
696+
self._maybe_update_cacher()
696697

697698
# help out SparseSeries
698699
_get_val_at = ndarray.__getitem__

pandas/tests/test_indexing.py

+26-3
Original file line numberDiff line numberDiff line change
@@ -2666,9 +2666,10 @@ def test_cache_updating(self):
26662666
df.index = index
26672667

26682668
# setting via chained assignment
2669-
def f():
2670-
df.loc[0]['z'].iloc[0] = 1.
2671-
self.assertRaises(com.SettingWithCopyError, f)
2669+
# but actually works, since everything is a view
2670+
df.loc[0]['z'].iloc[0] = 1.
2671+
result = df.loc[(0,0),'z']
2672+
self.assertEqual(result, 1)
26722673

26732674
# correct setting
26742675
df.loc[(0,0),'z'] = 2
@@ -2710,6 +2711,28 @@ def test_setitem_cache_updating(self):
27102711
self.assertEqual(df.ix[0,'c'], 0.0)
27112712
self.assertEqual(df.ix[7,'c'], 1.0)
27122713

2714+
# GH 7084
2715+
# not updating cache on series setting with slices
2716+
out = DataFrame({'A': [0, 0, 0]}, index=date_range('5/7/2014', '5/9/2014'))
2717+
df = DataFrame({'C': ['A', 'A', 'A'], 'D': [100, 200, 300]})
2718+
2719+
#loop through df to update out
2720+
six = Timestamp('5/7/2014')
2721+
eix = Timestamp('5/9/2014')
2722+
for ix, row in df.iterrows():
2723+
out[row['C']][six:eix] = out[row['C']][six:eix] + row['D']
2724+
2725+
expected = DataFrame({'A': [600, 600, 600]}, index=date_range('5/7/2014', '5/9/2014'))
2726+
assert_frame_equal(out, expected)
2727+
assert_series_equal(out['A'], expected['A'])
2728+
2729+
out = DataFrame({'A': [0, 0, 0]}, index=date_range('5/7/2014', '5/9/2014'))
2730+
for ix, row in df.iterrows():
2731+
out.loc[six:eix,row['C']] += row['D']
2732+
2733+
assert_frame_equal(out, expected)
2734+
assert_series_equal(out['A'], expected['A'])
2735+
27132736
def test_setitem_chained_setfault(self):
27142737

27152738
# GH6026

0 commit comments

Comments
 (0)