Skip to content

API/ENH: Detect trying to set inplace on copies in a nicer way, related (GH5597) #5679

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 11, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 18 additions & 16 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1566,11 +1566,14 @@ def _ixs(self, i, axis=0, copy=False):

# a location index by definition
i = _maybe_convert_indices(i, len(self._get_axis(axis)))
return self.reindex(i, takeable=True)._setitem_copy(True)
result = self.reindex(i, takeable=True)
copy=True
else:
new_values, copy = self._data.fast_2d_xs(i, copy=copy)
return Series(new_values, index=self.columns,
name=self.index[i])._setitem_copy(copy)
result = Series(new_values, index=self.columns,
name=self.index[i])
result.is_copy=copy
return result

# icol
else:
Expand Down Expand Up @@ -1680,7 +1683,7 @@ def _getitem_multilevel(self, key):
else:
new_values = self.values[:, loc]
result = DataFrame(new_values, index=self.index,
columns=result_columns)
columns=result_columns).__finalize__(self)
if len(result.columns) == 1:
top = result.columns[0]
if ((type(top) == str and top == '') or
Expand All @@ -1689,6 +1692,7 @@ def _getitem_multilevel(self, key):
if isinstance(result, Series):
result = Series(result, index=self.index, name=key)

result.is_copy=True
return result
else:
return self._get_item_cache(key)
Expand Down Expand Up @@ -2136,7 +2140,8 @@ def xs(self, key, axis=0, level=None, copy=True, drop_level=True):

new_values, copy = self._data.fast_2d_xs(loc, copy=copy)
result = Series(new_values, index=self.columns,
name=self.index[loc])._setitem_copy(copy)
name=self.index[loc])
result.is_copy=True

else:
result = self[loc]
Expand Down Expand Up @@ -2307,7 +2312,6 @@ def set_index(self, keys, drop=True, append=False, inplace=False,

if inplace:
frame = self

else:
frame = self.copy()

Expand Down Expand Up @@ -2552,8 +2556,8 @@ def drop_duplicates(self, cols=None, take_last=False, inplace=False):

if inplace:
inds, = (-duplicated).nonzero()
self._data = self._data.take(inds)
self._clear_item_cache()
new_data = self._data.take(inds)
self._update_inplace(new_data)
else:
return self[-duplicated]

Expand Down Expand Up @@ -2717,13 +2721,12 @@ def trans(v):

if inplace:
if axis == 1:
self._data = self._data.reindex_items(
new_data = self._data.reindex_items(
self._data.items[indexer],
copy=False)
elif axis == 0:
self._data = self._data.take(indexer)

self._clear_item_cache()
new_data = self._data.take(indexer)
self._update_inplace(new_data)
else:
return self.take(indexer, axis=axis, convert=False, is_copy=False)

Expand Down Expand Up @@ -2763,13 +2766,12 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False):

if inplace:
if axis == 1:
self._data = self._data.reindex_items(
new_data = self._data.reindex_items(
self._data.items[indexer],
copy=False)
elif axis == 0:
self._data = self._data.take(indexer)

self._clear_item_cache()
new_data = self._data.take(indexer)
self._update_inplace(new_data)
else:
return self.take(indexer, axis=axis, convert=False, is_copy=False)

Expand Down
59 changes: 33 additions & 26 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def _single_replace(self, to_replace, method, inplace, limit):
dtype=self.dtype).__finalize__(self)

if inplace:
self._data = result._data
self._update_inplace(result._data)
return

return result
Expand Down Expand Up @@ -562,9 +562,7 @@ def f(x):
result._clear_item_cache()

if inplace:
self._data = result._data
self._clear_item_cache()

self._update_inplace(result._data)
else:
return result.__finalize__(self)

Expand Down Expand Up @@ -994,12 +992,22 @@ def _maybe_update_cacher(self, clear=False):
if clear, then clear our cache """
cacher = getattr(self, '_cacher', None)
if cacher is not None:
try:
cacher[1]()._maybe_cache_changed(cacher[0], self)
except:
ref = cacher[1]()

# our referant is dead
# we are trying to reference a dead referant, hence
# a copy
if ref is None:
del self._cacher
self.is_copy = True
self._check_setitem_copy(stacklevel=5, t='referant')
else:
try:
ref._maybe_cache_changed(cacher[0], self)
except:
pass
if ref.is_copy:
self.is_copy = True
self._check_setitem_copy(stacklevel=5, t='referant')

if clear:
self._clear_item_cache()
Expand All @@ -1014,22 +1022,21 @@ def _set_item(self, key, value):
self._data.set(key, value)
self._clear_item_cache()

def _setitem_copy(self, copy):
""" set the _is_copy of the iiem """
self.is_copy = copy
return self

def _check_setitem_copy(self, stacklevel=4):
def _check_setitem_copy(self, stacklevel=4, t='setting'):
""" validate if we are doing a settitem on a chained copy.

If you call this function, be sure to set the stacklevel such that the
user will see the error *at the level of setting*"""
if self.is_copy:
value = config.get_option('mode.chained_assignment')

t = ("A value is trying to be set on a copy of a slice from a "
"DataFrame.\nTry using .loc[row_index,col_indexer] = value "
"instead")
if t == 'referant':
t = ("A value is trying to be set on a copy of a slice from a "
"DataFrame")
else:
t = ("A value is trying to be set on a copy of a slice from a "
"DataFrame.\nTry using .loc[row_index,col_indexer] = value "
"instead")
if value == 'raise':
raise SettingWithCopyError(t)
elif value == 'warn':
Expand Down Expand Up @@ -1103,7 +1110,7 @@ def take(self, indices, axis=0, convert=True, is_copy=True):

# maybe set copy if we didn't actually change the index
if is_copy and not result._get_axis(axis).equals(self._get_axis(axis)):
result = result._setitem_copy(is_copy)
result.is_copy=is_copy

return result

Expand Down Expand Up @@ -1218,7 +1225,7 @@ def _update_inplace(self, result):
# decision that we may revisit in the future.
self._reset_cache()
self._clear_item_cache()
self._data = result._data
self._data = getattr(result,'_data',result)
self._maybe_update_cacher()

def add_prefix(self, prefix):
Expand Down Expand Up @@ -1910,14 +1917,13 @@ def fillna(self, value=None, method=None, axis=0, inplace=False,
continue
obj = result[k]
obj.fillna(v, inplace=True)
obj._maybe_update_cacher()
return result
else:
new_data = self._data.fillna(value, inplace=inplace,
downcast=downcast)

if inplace:
self._data = new_data
self._update_inplace(new_data)
else:
return self._constructor(new_data).__finalize__(self)

Expand Down Expand Up @@ -2165,7 +2171,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
new_data = new_data.convert(copy=not inplace, convert_numeric=False)

if inplace:
self._data = new_data
self._update_inplace(new_data)
else:
return self._constructor(new_data).__finalize__(self)

Expand Down Expand Up @@ -2272,10 +2278,10 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False,

if inplace:
if axis == 1:
self._data = new_data
self._update_inplace(new_data)
self = self.T
else:
self._data = new_data
self._update_inplace(new_data)
else:
res = self._constructor(new_data).__finalize__(self)
if axis == 1:
Expand Down Expand Up @@ -2856,8 +2862,9 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
if inplace:
# we may have different type blocks come out of putmask, so
# reconstruct the block manager
self._data = self._data.putmask(cond, other, align=axis is None,
inplace=True)
new_data = self._data.putmask(cond, other, align=axis is None,
inplace=True)
self._update_inplace(new_data)

else:
new_data = self._data.where(other, cond, align=axis is None,
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def _setitem_with_indexer(self, indexer, value):
labels = _safe_append_to_index(index, key)
self.obj._data = self.obj.reindex_axis(labels, i)._data
self.obj._maybe_update_cacher(clear=True)
self.obj._setitem_copy(False)
self.obj.is_copy=False

if isinstance(labels, MultiIndex):
self.obj.sortlevel(inplace=True)
Expand Down
7 changes: 4 additions & 3 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -11607,22 +11607,23 @@ def _check_f(base, f):
_check_f(data.copy(), f)

# -----Series-----
d = data.copy()['c']

# reset_index
f = lambda x: x.reset_index(inplace=True, drop=True)
_check_f(data.set_index('a')['c'], f)

# fillna
f = lambda x: x.fillna(0, inplace=True)
_check_f(data.copy()['c'], f)
_check_f(d.copy(), f)

# replace
f = lambda x: x.replace(1, 0, inplace=True)
_check_f(data.copy()['c'], f)
_check_f(d.copy(), f)

# rename
f = lambda x: x.rename({1: 'foo'}, inplace=True)
_check_f(data.copy()['c'], f)
_check_f(d.copy(), f)

def test_isin(self):
# GH #4211
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ def f(grp):
return None
return grp.iloc[0].loc['C']
result = df.groupby('A').apply(f)
e = df.groupby('A').first()['C']
e = df.groupby('A').first()['C'].copy()
e.loc['Pony'] = np.nan
e.name = None
assert_series_equal(result,e)
Expand Down
20 changes: 17 additions & 3 deletions pandas/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1733,9 +1733,9 @@ def test_cache_updating(self):
df.index = index

# setting via chained assignment
df.loc[0]['z'].iloc[0] = 1.
result = df.loc[(0,0),'z']
self.assert_(result == 1)
def f():
df.loc[0]['z'].iloc[0] = 1.
self.assertRaises(com.SettingWithCopyError, f)

# correct setting
df.loc[(0,0),'z'] = 2
Expand Down Expand Up @@ -1891,6 +1891,20 @@ def random_text(nobs=100):
self.assert_(df.is_copy is False)
df['a'] += 1

# inplace ops
# original from: http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug
a = [12, 23]
b = [123, None]
c = [1234, 2345]
d = [12345, 23456]
tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'), ('ears', 'right')]
events = {('eyes', 'left'): a, ('eyes', 'right'): b, ('ears', 'left'): c, ('ears', 'right'): d}
multiind = MultiIndex.from_tuples(tuples, names=['part', 'side'])
zed = DataFrame(events, index=['a', 'b'], columns=multiind)
def f():
zed['eyes']['right'].fillna(value=555, inplace=True)
self.assertRaises(com.SettingWithCopyError, f)

pd.set_option('chained_assignment','warn')

def test_float64index_slicing_bug(self):
Expand Down
17 changes: 15 additions & 2 deletions pandas/tests/test_multilevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1141,14 +1141,27 @@ def test_is_lexsorted(self):
self.assert_(index.lexsort_depth == 0)

def test_frame_getitem_view(self):
df = self.frame.T
df = self.frame.T.copy()

# this works because we are modifying the underlying array
# really a no-no
df['foo'].values[:] = 0
self.assert_((df['foo'].values == 0).all())

# but not if it's mixed-type
df['foo', 'four'] = 'foo'
df = df.sortlevel(0, axis=1)
df['foo']['one'] = 2

# this will work, but will raise/warn as its chained assignment
def f():
df['foo']['one'] = 2
return df
self.assertRaises(com.SettingWithCopyError, f)

try:
df = f()
except:
pass
self.assert_((df['foo', 'one'] == 0).all())

def test_frame_getitem_not_sorted(self):
Expand Down