Skip to content

Commit ceec8bf

Browse files
committed
Merge pull request #5679 from jreback/inplace
API/ENH: Detect trying to set inplace on copies in a nicer way, related (GH5597)
2 parents ed5726e + 222bc9d commit ceec8bf

File tree

7 files changed

+89
-52
lines changed

7 files changed

+89
-52
lines changed

pandas/core/frame.py

+18-16
Original file line numberDiff line numberDiff line change
@@ -1566,11 +1566,14 @@ def _ixs(self, i, axis=0, copy=False):
15661566

15671567
# a location index by definition
15681568
i = _maybe_convert_indices(i, len(self._get_axis(axis)))
1569-
return self.reindex(i, takeable=True)._setitem_copy(True)
1569+
result = self.reindex(i, takeable=True)
1570+
copy=True
15701571
else:
15711572
new_values, copy = self._data.fast_2d_xs(i, copy=copy)
1572-
return Series(new_values, index=self.columns,
1573-
name=self.index[i])._setitem_copy(copy)
1573+
result = Series(new_values, index=self.columns,
1574+
name=self.index[i])
1575+
result.is_copy=copy
1576+
return result
15741577

15751578
# icol
15761579
else:
@@ -1680,7 +1683,7 @@ def _getitem_multilevel(self, key):
16801683
else:
16811684
new_values = self.values[:, loc]
16821685
result = DataFrame(new_values, index=self.index,
1683-
columns=result_columns)
1686+
columns=result_columns).__finalize__(self)
16841687
if len(result.columns) == 1:
16851688
top = result.columns[0]
16861689
if ((type(top) == str and top == '') or
@@ -1689,6 +1692,7 @@ def _getitem_multilevel(self, key):
16891692
if isinstance(result, Series):
16901693
result = Series(result, index=self.index, name=key)
16911694

1695+
result.is_copy=True
16921696
return result
16931697
else:
16941698
return self._get_item_cache(key)
@@ -2136,7 +2140,8 @@ def xs(self, key, axis=0, level=None, copy=True, drop_level=True):
21362140

21372141
new_values, copy = self._data.fast_2d_xs(loc, copy=copy)
21382142
result = Series(new_values, index=self.columns,
2139-
name=self.index[loc])._setitem_copy(copy)
2143+
name=self.index[loc])
2144+
result.is_copy=True
21402145

21412146
else:
21422147
result = self[loc]
@@ -2307,7 +2312,6 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
23072312

23082313
if inplace:
23092314
frame = self
2310-
23112315
else:
23122316
frame = self.copy()
23132317

@@ -2552,8 +2556,8 @@ def drop_duplicates(self, cols=None, take_last=False, inplace=False):
25522556

25532557
if inplace:
25542558
inds, = (-duplicated).nonzero()
2555-
self._data = self._data.take(inds)
2556-
self._clear_item_cache()
2559+
new_data = self._data.take(inds)
2560+
self._update_inplace(new_data)
25572561
else:
25582562
return self[-duplicated]
25592563

@@ -2717,13 +2721,12 @@ def trans(v):
27172721

27182722
if inplace:
27192723
if axis == 1:
2720-
self._data = self._data.reindex_items(
2724+
new_data = self._data.reindex_items(
27212725
self._data.items[indexer],
27222726
copy=False)
27232727
elif axis == 0:
2724-
self._data = self._data.take(indexer)
2725-
2726-
self._clear_item_cache()
2728+
new_data = self._data.take(indexer)
2729+
self._update_inplace(new_data)
27272730
else:
27282731
return self.take(indexer, axis=axis, convert=False, is_copy=False)
27292732

@@ -2763,13 +2766,12 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False):
27632766

27642767
if inplace:
27652768
if axis == 1:
2766-
self._data = self._data.reindex_items(
2769+
new_data = self._data.reindex_items(
27672770
self._data.items[indexer],
27682771
copy=False)
27692772
elif axis == 0:
2770-
self._data = self._data.take(indexer)
2771-
2772-
self._clear_item_cache()
2773+
new_data = self._data.take(indexer)
2774+
self._update_inplace(new_data)
27732775
else:
27742776
return self.take(indexer, axis=axis, convert=False, is_copy=False)
27752777

pandas/core/generic.py

+33-26
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def _single_replace(self, to_replace, method, inplace, limit):
5959
dtype=self.dtype).__finalize__(self)
6060

6161
if inplace:
62-
self._data = result._data
62+
self._update_inplace(result._data)
6363
return
6464

6565
return result
@@ -562,9 +562,7 @@ def f(x):
562562
result._clear_item_cache()
563563

564564
if inplace:
565-
self._data = result._data
566-
self._clear_item_cache()
567-
565+
self._update_inplace(result._data)
568566
else:
569567
return result.__finalize__(self)
570568

@@ -994,12 +992,22 @@ def _maybe_update_cacher(self, clear=False):
994992
if clear, then clear our cache """
995993
cacher = getattr(self, '_cacher', None)
996994
if cacher is not None:
997-
try:
998-
cacher[1]()._maybe_cache_changed(cacher[0], self)
999-
except:
995+
ref = cacher[1]()
1000996

1001-
# our referant is dead
997+
# we are trying to reference a dead referant, hence
998+
# a copy
999+
if ref is None:
10021000
del self._cacher
1001+
self.is_copy = True
1002+
self._check_setitem_copy(stacklevel=5, t='referant')
1003+
else:
1004+
try:
1005+
ref._maybe_cache_changed(cacher[0], self)
1006+
except:
1007+
pass
1008+
if ref.is_copy:
1009+
self.is_copy = True
1010+
self._check_setitem_copy(stacklevel=5, t='referant')
10031011

10041012
if clear:
10051013
self._clear_item_cache()
@@ -1014,22 +1022,21 @@ def _set_item(self, key, value):
10141022
self._data.set(key, value)
10151023
self._clear_item_cache()
10161024

1017-
def _setitem_copy(self, copy):
1018-
""" set the _is_copy of the iiem """
1019-
self.is_copy = copy
1020-
return self
1021-
1022-
def _check_setitem_copy(self, stacklevel=4):
1025+
def _check_setitem_copy(self, stacklevel=4, t='setting'):
10231026
""" validate if we are doing a settitem on a chained copy.
10241027
10251028
If you call this function, be sure to set the stacklevel such that the
10261029
user will see the error *at the level of setting*"""
10271030
if self.is_copy:
10281031
value = config.get_option('mode.chained_assignment')
10291032

1030-
t = ("A value is trying to be set on a copy of a slice from a "
1031-
"DataFrame.\nTry using .loc[row_index,col_indexer] = value "
1032-
"instead")
1033+
if t == 'referant':
1034+
t = ("A value is trying to be set on a copy of a slice from a "
1035+
"DataFrame")
1036+
else:
1037+
t = ("A value is trying to be set on a copy of a slice from a "
1038+
"DataFrame.\nTry using .loc[row_index,col_indexer] = value "
1039+
"instead")
10331040
if value == 'raise':
10341041
raise SettingWithCopyError(t)
10351042
elif value == 'warn':
@@ -1103,7 +1110,7 @@ def take(self, indices, axis=0, convert=True, is_copy=True):
11031110

11041111
# maybe set copy if we didn't actually change the index
11051112
if is_copy and not result._get_axis(axis).equals(self._get_axis(axis)):
1106-
result = result._setitem_copy(is_copy)
1113+
result.is_copy=is_copy
11071114

11081115
return result
11091116

@@ -1218,7 +1225,7 @@ def _update_inplace(self, result):
12181225
# decision that we may revisit in the future.
12191226
self._reset_cache()
12201227
self._clear_item_cache()
1221-
self._data = result._data
1228+
self._data = getattr(result,'_data',result)
12221229
self._maybe_update_cacher()
12231230

12241231
def add_prefix(self, prefix):
@@ -1910,14 +1917,13 @@ def fillna(self, value=None, method=None, axis=0, inplace=False,
19101917
continue
19111918
obj = result[k]
19121919
obj.fillna(v, inplace=True)
1913-
obj._maybe_update_cacher()
19141920
return result
19151921
else:
19161922
new_data = self._data.fillna(value, inplace=inplace,
19171923
downcast=downcast)
19181924

19191925
if inplace:
1920-
self._data = new_data
1926+
self._update_inplace(new_data)
19211927
else:
19221928
return self._constructor(new_data).__finalize__(self)
19231929

@@ -2165,7 +2171,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
21652171
new_data = new_data.convert(copy=not inplace, convert_numeric=False)
21662172

21672173
if inplace:
2168-
self._data = new_data
2174+
self._update_inplace(new_data)
21692175
else:
21702176
return self._constructor(new_data).__finalize__(self)
21712177

@@ -2272,10 +2278,10 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
22722278

22732279
if inplace:
22742280
if axis == 1:
2275-
self._data = new_data
2281+
self._update_inplace(new_data)
22762282
self = self.T
22772283
else:
2278-
self._data = new_data
2284+
self._update_inplace(new_data)
22792285
else:
22802286
res = self._constructor(new_data).__finalize__(self)
22812287
if axis == 1:
@@ -2856,8 +2862,9 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
28562862
if inplace:
28572863
# we may have different type blocks come out of putmask, so
28582864
# reconstruct the block manager
2859-
self._data = self._data.putmask(cond, other, align=axis is None,
2860-
inplace=True)
2865+
new_data = self._data.putmask(cond, other, align=axis is None,
2866+
inplace=True)
2867+
self._update_inplace(new_data)
28612868

28622869
else:
28632870
new_data = self._data.where(other, cond, align=axis is None,

pandas/core/indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ def _setitem_with_indexer(self, indexer, value):
209209
labels = _safe_append_to_index(index, key)
210210
self.obj._data = self.obj.reindex_axis(labels, i)._data
211211
self.obj._maybe_update_cacher(clear=True)
212-
self.obj._setitem_copy(False)
212+
self.obj.is_copy=False
213213

214214
if isinstance(labels, MultiIndex):
215215
self.obj.sortlevel(inplace=True)

pandas/tests/test_frame.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -11607,22 +11607,23 @@ def _check_f(base, f):
1160711607
_check_f(data.copy(), f)
1160811608

1160911609
# -----Series-----
11610+
d = data.copy()['c']
1161011611

1161111612
# reset_index
1161211613
f = lambda x: x.reset_index(inplace=True, drop=True)
1161311614
_check_f(data.set_index('a')['c'], f)
1161411615

1161511616
# fillna
1161611617
f = lambda x: x.fillna(0, inplace=True)
11617-
_check_f(data.copy()['c'], f)
11618+
_check_f(d.copy(), f)
1161811619

1161911620
# replace
1162011621
f = lambda x: x.replace(1, 0, inplace=True)
11621-
_check_f(data.copy()['c'], f)
11622+
_check_f(d.copy(), f)
1162211623

1162311624
# rename
1162411625
f = lambda x: x.rename({1: 'foo'}, inplace=True)
11625-
_check_f(data.copy()['c'], f)
11626+
_check_f(d.copy(), f)
1162611627

1162711628
def test_isin(self):
1162811629
# GH #4211

pandas/tests/test_groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@ def f(grp):
365365
return None
366366
return grp.iloc[0].loc['C']
367367
result = df.groupby('A').apply(f)
368-
e = df.groupby('A').first()['C']
368+
e = df.groupby('A').first()['C'].copy()
369369
e.loc['Pony'] = np.nan
370370
e.name = None
371371
assert_series_equal(result,e)

pandas/tests/test_indexing.py

+17-3
Original file line numberDiff line numberDiff line change
@@ -1733,9 +1733,9 @@ def test_cache_updating(self):
17331733
df.index = index
17341734

17351735
# setting via chained assignment
1736-
df.loc[0]['z'].iloc[0] = 1.
1737-
result = df.loc[(0,0),'z']
1738-
self.assert_(result == 1)
1736+
def f():
1737+
df.loc[0]['z'].iloc[0] = 1.
1738+
self.assertRaises(com.SettingWithCopyError, f)
17391739

17401740
# correct setting
17411741
df.loc[(0,0),'z'] = 2
@@ -1891,6 +1891,20 @@ def random_text(nobs=100):
18911891
self.assert_(df.is_copy is False)
18921892
df['a'] += 1
18931893

1894+
# inplace ops
1895+
# original from: http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug
1896+
a = [12, 23]
1897+
b = [123, None]
1898+
c = [1234, 2345]
1899+
d = [12345, 23456]
1900+
tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'), ('ears', 'right')]
1901+
events = {('eyes', 'left'): a, ('eyes', 'right'): b, ('ears', 'left'): c, ('ears', 'right'): d}
1902+
multiind = MultiIndex.from_tuples(tuples, names=['part', 'side'])
1903+
zed = DataFrame(events, index=['a', 'b'], columns=multiind)
1904+
def f():
1905+
zed['eyes']['right'].fillna(value=555, inplace=True)
1906+
self.assertRaises(com.SettingWithCopyError, f)
1907+
18941908
pd.set_option('chained_assignment','warn')
18951909

18961910
def test_float64index_slicing_bug(self):

pandas/tests/test_multilevel.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -1141,14 +1141,27 @@ def test_is_lexsorted(self):
11411141
self.assert_(index.lexsort_depth == 0)
11421142

11431143
def test_frame_getitem_view(self):
1144-
df = self.frame.T
1144+
df = self.frame.T.copy()
1145+
1146+
# this works because we are modifying the underlying array
1147+
# really a no-no
11451148
df['foo'].values[:] = 0
11461149
self.assert_((df['foo'].values == 0).all())
11471150

11481151
# but not if it's mixed-type
11491152
df['foo', 'four'] = 'foo'
11501153
df = df.sortlevel(0, axis=1)
1151-
df['foo']['one'] = 2
1154+
1155+
# this will work, but will raise/warn as its chained assignment
1156+
def f():
1157+
df['foo']['one'] = 2
1158+
return df
1159+
self.assertRaises(com.SettingWithCopyError, f)
1160+
1161+
try:
1162+
df = f()
1163+
except:
1164+
pass
11521165
self.assert_((df['foo', 'one'] == 0).all())
11531166

11541167
def test_frame_getitem_not_sorted(self):

0 commit comments

Comments
 (0)