Skip to content

Commit 507ffb5

Browse files
committed
API/ENH: Detect trying to set inplace on copies in a nicer way, related (GH5597)
1 parent ed5726e commit 507ffb5

File tree

6 files changed

+74
-39
lines changed

6 files changed

+74
-39
lines changed

pandas/core/frame.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -1677,6 +1677,7 @@ def _getitem_multilevel(self, key):
16771677
if self._is_mixed_type:
16781678
result = self.reindex(columns=new_columns)
16791679
result.columns = result_columns
1680+
result.is_copy=True
16801681
else:
16811682
new_values = self.values[:, loc]
16821683
result = DataFrame(new_values, index=self.index,
@@ -2307,7 +2308,6 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
23072308

23082309
if inplace:
23092310
frame = self
2310-
23112311
else:
23122312
frame = self.copy()
23132313

@@ -2552,8 +2552,8 @@ def drop_duplicates(self, cols=None, take_last=False, inplace=False):
25522552

25532553
if inplace:
25542554
inds, = (-duplicated).nonzero()
2555-
self._data = self._data.take(inds)
2556-
self._clear_item_cache()
2555+
new_data = self._data.take(inds)
2556+
self._update_inplace(new_data)
25572557
else:
25582558
return self[-duplicated]
25592559

@@ -2717,13 +2717,12 @@ def trans(v):
27172717

27182718
if inplace:
27192719
if axis == 1:
2720-
self._data = self._data.reindex_items(
2720+
new_data = self._data.reindex_items(
27212721
self._data.items[indexer],
27222722
copy=False)
27232723
elif axis == 0:
2724-
self._data = self._data.take(indexer)
2725-
2726-
self._clear_item_cache()
2724+
new_data = self._data.take(indexer)
2725+
self._update_inplace(new_data)
27272726
else:
27282727
return self.take(indexer, axis=axis, convert=False, is_copy=False)
27292728

@@ -2763,13 +2762,12 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False):
27632762

27642763
if inplace:
27652764
if axis == 1:
2766-
self._data = self._data.reindex_items(
2765+
new_data = self._data.reindex_items(
27672766
self._data.items[indexer],
27682767
copy=False)
27692768
elif axis == 0:
2770-
self._data = self._data.take(indexer)
2771-
2772-
self._clear_item_cache()
2769+
new_data = self._data.take(indexer)
2770+
self._update_inplace(new_data)
27732771
else:
27742772
return self.take(indexer, axis=axis, convert=False, is_copy=False)
27752773

pandas/core/generic.py

+32-20
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def _single_replace(self, to_replace, method, inplace, limit):
5959
dtype=self.dtype).__finalize__(self)
6060

6161
if inplace:
62-
self._data = result._data
62+
self._update_inplace(result._data)
6363
return
6464

6565
return result
@@ -562,9 +562,7 @@ def f(x):
562562
result._clear_item_cache()
563563

564564
if inplace:
565-
self._data = result._data
566-
self._clear_item_cache()
567-
565+
self._update_inplace(result._data)
568566
else:
569567
return result.__finalize__(self)
570568

@@ -994,12 +992,22 @@ def _maybe_update_cacher(self, clear=False):
994992
if clear, then clear our cache """
995993
cacher = getattr(self, '_cacher', None)
996994
if cacher is not None:
997-
try:
998-
cacher[1]()._maybe_cache_changed(cacher[0], self)
999-
except:
995+
ref = cacher[1]()
1000996

1001-
# our referant is dead
997+
# we are trying to reference a dead referant, hence
998+
# a copy
999+
if ref is None:
10021000
del self._cacher
1001+
self.is_copy = True
1002+
self._check_setitem_copy(stacklevel=5, t='referant')
1003+
else:
1004+
try:
1005+
ref._maybe_cache_changed(cacher[0], self)
1006+
except:
1007+
pass
1008+
if ref.is_copy:
1009+
self.is_copy = True
1010+
self._check_setitem_copy(stacklevel=5, t='referant')
10031011

10041012
if clear:
10051013
self._clear_item_cache()
@@ -1019,17 +1027,21 @@ def _setitem_copy(self, copy):
10191027
self.is_copy = copy
10201028
return self
10211029

1022-
def _check_setitem_copy(self, stacklevel=4):
1030+
def _check_setitem_copy(self, stacklevel=4, t='setting'):
10231031
""" validate if we are doing a settitem on a chained copy.
10241032
10251033
If you call this function, be sure to set the stacklevel such that the
10261034
user will see the error *at the level of setting*"""
10271035
if self.is_copy:
10281036
value = config.get_option('mode.chained_assignment')
10291037

1030-
t = ("A value is trying to be set on a copy of a slice from a "
1031-
"DataFrame.\nTry using .loc[row_index,col_indexer] = value "
1032-
"instead")
1038+
if t == 'referant':
1039+
t = ("A value is trying to be set on a copy of a slice from a "
1040+
"DataFrame")
1041+
else:
1042+
t = ("A value is trying to be set on a copy of a slice from a "
1043+
"DataFrame.\nTry using .loc[row_index,col_indexer] = value "
1044+
"instead")
10331045
if value == 'raise':
10341046
raise SettingWithCopyError(t)
10351047
elif value == 'warn':
@@ -1218,7 +1230,7 @@ def _update_inplace(self, result):
12181230
# decision that we may revisit in the future.
12191231
self._reset_cache()
12201232
self._clear_item_cache()
1221-
self._data = result._data
1233+
self._data = getattr(result,'_data',result)
12221234
self._maybe_update_cacher()
12231235

12241236
def add_prefix(self, prefix):
@@ -1910,14 +1922,13 @@ def fillna(self, value=None, method=None, axis=0, inplace=False,
19101922
continue
19111923
obj = result[k]
19121924
obj.fillna(v, inplace=True)
1913-
obj._maybe_update_cacher()
19141925
return result
19151926
else:
19161927
new_data = self._data.fillna(value, inplace=inplace,
19171928
downcast=downcast)
19181929

19191930
if inplace:
1920-
self._data = new_data
1931+
self._update_inplace(new_data)
19211932
else:
19221933
return self._constructor(new_data).__finalize__(self)
19231934

@@ -2165,7 +2176,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
21652176
new_data = new_data.convert(copy=not inplace, convert_numeric=False)
21662177

21672178
if inplace:
2168-
self._data = new_data
2179+
self._update_inplace(new_data)
21692180
else:
21702181
return self._constructor(new_data).__finalize__(self)
21712182

@@ -2272,10 +2283,10 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
22722283

22732284
if inplace:
22742285
if axis == 1:
2275-
self._data = new_data
2286+
self._update_inplace(new_data)
22762287
self = self.T
22772288
else:
2278-
self._data = new_data
2289+
self._update_inplace(new_data)
22792290
else:
22802291
res = self._constructor(new_data).__finalize__(self)
22812292
if axis == 1:
@@ -2856,8 +2867,9 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
28562867
if inplace:
28572868
# we may have different type blocks come out of putmask, so
28582869
# reconstruct the block manager
2859-
self._data = self._data.putmask(cond, other, align=axis is None,
2860-
inplace=True)
2870+
new_data = self._data.putmask(cond, other, align=axis is None,
2871+
inplace=True)
2872+
self._update_inplace(new_data)
28612873

28622874
else:
28632875
new_data = self._data.where(other, cond, align=axis is None,

pandas/tests/test_frame.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -11607,22 +11607,23 @@ def _check_f(base, f):
1160711607
_check_f(data.copy(), f)
1160811608

1160911609
# -----Series-----
11610+
d = data.copy()['c']
1161011611

1161111612
# reset_index
1161211613
f = lambda x: x.reset_index(inplace=True, drop=True)
1161311614
_check_f(data.set_index('a')['c'], f)
1161411615

1161511616
# fillna
1161611617
f = lambda x: x.fillna(0, inplace=True)
11617-
_check_f(data.copy()['c'], f)
11618+
_check_f(d.copy(), f)
1161811619

1161911620
# replace
1162011621
f = lambda x: x.replace(1, 0, inplace=True)
11621-
_check_f(data.copy()['c'], f)
11622+
_check_f(d.copy(), f)
1162211623

1162311624
# rename
1162411625
f = lambda x: x.rename({1: 'foo'}, inplace=True)
11625-
_check_f(data.copy()['c'], f)
11626+
_check_f(d.copy(), f)
1162611627

1162711628
def test_isin(self):
1162811629
# GH #4211

pandas/tests/test_groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@ def f(grp):
365365
return None
366366
return grp.iloc[0].loc['C']
367367
result = df.groupby('A').apply(f)
368-
e = df.groupby('A').first()['C']
368+
e = df.groupby('A').first()['C'].copy()
369369
e.loc['Pony'] = np.nan
370370
e.name = None
371371
assert_series_equal(result,e)

pandas/tests/test_indexing.py

+17-3
Original file line numberDiff line numberDiff line change
@@ -1733,9 +1733,9 @@ def test_cache_updating(self):
17331733
df.index = index
17341734

17351735
# setting via chained assignment
1736-
df.loc[0]['z'].iloc[0] = 1.
1737-
result = df.loc[(0,0),'z']
1738-
self.assert_(result == 1)
1736+
def f():
1737+
df.loc[0]['z'].iloc[0] = 1.
1738+
self.assertRaises(com.SettingWithCopyError, f)
17391739

17401740
# correct setting
17411741
df.loc[(0,0),'z'] = 2
@@ -1891,6 +1891,20 @@ def random_text(nobs=100):
18911891
self.assert_(df.is_copy is False)
18921892
df['a'] += 1
18931893

1894+
# inplace ops
1895+
# original from: http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug
1896+
a = [12, 23]
1897+
b = [123, None]
1898+
c = [1234, 2345]
1899+
d = [12345, 23456]
1900+
tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'), ('ears', 'right')]
1901+
events = {('eyes', 'left'): a, ('eyes', 'right'): b, ('ears', 'left'): c, ('ears', 'right'): d}
1902+
multiind = MultiIndex.from_tuples(tuples, names=['part', 'side'])
1903+
zed = DataFrame(events, index=['a', 'b'], columns=multiind)
1904+
def f():
1905+
zed['eyes']['right'].fillna(value=555, inplace=True)
1906+
self.assertRaises(com.SettingWithCopyError, f)
1907+
18941908
pd.set_option('chained_assignment','warn')
18951909

18961910
def test_float64index_slicing_bug(self):

pandas/tests/test_multilevel.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -1148,7 +1148,17 @@ def test_frame_getitem_view(self):
11481148
# but not if it's mixed-type
11491149
df['foo', 'four'] = 'foo'
11501150
df = df.sortlevel(0, axis=1)
1151-
df['foo']['one'] = 2
1151+
1152+
# this will work, but will raise/warn as its chained assignment
1153+
def f():
1154+
df['foo']['one'] = 2
1155+
return df
1156+
self.assertRaises(com.SettingWithCopyError, f)
1157+
1158+
try:
1159+
df = f()
1160+
except:
1161+
pass
11521162
self.assert_((df['foo', 'one'] == 0).all())
11531163

11541164
def test_frame_getitem_not_sorted(self):

0 commit comments

Comments
 (0)