diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 93587cd11b597..90641a833f2a7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1566,11 +1566,14 @@ def _ixs(self, i, axis=0, copy=False): # a location index by definition i = _maybe_convert_indices(i, len(self._get_axis(axis))) - return self.reindex(i, takeable=True)._setitem_copy(True) + result = self.reindex(i, takeable=True) + copy=True else: new_values, copy = self._data.fast_2d_xs(i, copy=copy) - return Series(new_values, index=self.columns, - name=self.index[i])._setitem_copy(copy) + result = Series(new_values, index=self.columns, + name=self.index[i]) + result.is_copy=copy + return result # icol else: @@ -1680,7 +1683,7 @@ def _getitem_multilevel(self, key): else: new_values = self.values[:, loc] result = DataFrame(new_values, index=self.index, - columns=result_columns) + columns=result_columns).__finalize__(self) if len(result.columns) == 1: top = result.columns[0] if ((type(top) == str and top == '') or @@ -1689,6 +1692,7 @@ def _getitem_multilevel(self, key): if isinstance(result, Series): result = Series(result, index=self.index, name=key) + result.is_copy=True return result else: return self._get_item_cache(key) @@ -2136,7 +2140,8 @@ def xs(self, key, axis=0, level=None, copy=True, drop_level=True): new_values, copy = self._data.fast_2d_xs(loc, copy=copy) result = Series(new_values, index=self.columns, - name=self.index[loc])._setitem_copy(copy) + name=self.index[loc]) + result.is_copy=True else: result = self[loc] @@ -2307,7 +2312,6 @@ def set_index(self, keys, drop=True, append=False, inplace=False, if inplace: frame = self - else: frame = self.copy() @@ -2552,8 +2556,8 @@ def drop_duplicates(self, cols=None, take_last=False, inplace=False): if inplace: inds, = (-duplicated).nonzero() - self._data = self._data.take(inds) - self._clear_item_cache() + new_data = self._data.take(inds) + self._update_inplace(new_data) else: return self[-duplicated] @@ -2717,13 +2721,12 @@ def trans(v): if inplace: if axis == 1: - self._data = self._data.reindex_items( + new_data = self._data.reindex_items( self._data.items[indexer], copy=False) elif axis == 0: - self._data = self._data.take(indexer) - - self._clear_item_cache() + new_data = self._data.take(indexer) + self._update_inplace(new_data) else: return self.take(indexer, axis=axis, convert=False, is_copy=False) @@ -2763,13 +2766,12 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False): if inplace: if axis == 1: - self._data = self._data.reindex_items( + new_data = self._data.reindex_items( self._data.items[indexer], copy=False) elif axis == 0: - self._data = self._data.take(indexer) - - self._clear_item_cache() + new_data = self._data.take(indexer) + self._update_inplace(new_data) else: return self.take(indexer, axis=axis, convert=False, is_copy=False) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 624384e484dc0..253136b9a11c3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -59,7 +59,7 @@ def _single_replace(self, to_replace, method, inplace, limit): dtype=self.dtype).__finalize__(self) if inplace: - self._data = result._data + self._update_inplace(result._data) return return result @@ -562,9 +562,7 @@ def f(x): result._clear_item_cache() if inplace: - self._data = result._data - self._clear_item_cache() - + self._update_inplace(result._data) else: return result.__finalize__(self) @@ -994,12 +992,22 @@ def _maybe_update_cacher(self, clear=False): if clear, then clear our cache """ cacher = getattr(self, '_cacher', None) if cacher is not None: - try: - cacher[1]()._maybe_cache_changed(cacher[0], self) - except: + ref = cacher[1]() - # our referant is dead + # we are trying to reference a dead referant, hence + # a copy + if ref is None: del self._cacher + self.is_copy = True + self._check_setitem_copy(stacklevel=5, t='referant') + else: + try: + ref._maybe_cache_changed(cacher[0], self) + except: + pass + if ref.is_copy: + self.is_copy = True + self._check_setitem_copy(stacklevel=5, t='referant') if clear: self._clear_item_cache() @@ -1014,12 +1022,7 @@ def _set_item(self, key, value): self._data.set(key, value) self._clear_item_cache() - def _setitem_copy(self, copy): - """ set the _is_copy of the iiem """ - self.is_copy = copy - return self - - def _check_setitem_copy(self, stacklevel=4): + def _check_setitem_copy(self, stacklevel=4, t='setting'): """ validate if we are doing a settitem on a chained copy. If you call this function, be sure to set the stacklevel such that the @@ -1027,9 +1030,13 @@ def _check_setitem_copy(self, stacklevel=4): if self.is_copy: value = config.get_option('mode.chained_assignment') - t = ("A value is trying to be set on a copy of a slice from a " - "DataFrame.\nTry using .loc[row_index,col_indexer] = value " - "instead") + if t == 'referant': + t = ("A value is trying to be set on a copy of a slice from a " + "DataFrame") + else: + t = ("A value is trying to be set on a copy of a slice from a " + "DataFrame.\nTry using .loc[row_index,col_indexer] = value " + "instead") if value == 'raise': raise SettingWithCopyError(t) elif value == 'warn': @@ -1103,7 +1110,7 @@ def take(self, indices, axis=0, convert=True, is_copy=True): # maybe set copy if we didn't actually change the index if is_copy and not result._get_axis(axis).equals(self._get_axis(axis)): - result = result._setitem_copy(is_copy) + result.is_copy=is_copy return result @@ -1218,7 +1225,7 @@ def _update_inplace(self, result): # decision that we may revisit in the future. self._reset_cache() self._clear_item_cache() - self._data = result._data + self._data = getattr(result,'_data',result) self._maybe_update_cacher() def add_prefix(self, prefix): @@ -1910,14 +1917,13 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, continue obj = result[k] obj.fillna(v, inplace=True) - obj._maybe_update_cacher() return result else: new_data = self._data.fillna(value, inplace=inplace, downcast=downcast) if inplace: - self._data = new_data + self._update_inplace(new_data) else: return self._constructor(new_data).__finalize__(self) @@ -2165,7 +2171,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, new_data = new_data.convert(copy=not inplace, convert_numeric=False) if inplace: - self._data = new_data + self._update_inplace(new_data) else: return self._constructor(new_data).__finalize__(self) @@ -2272,10 +2278,10 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, if inplace: if axis == 1: - self._data = new_data + self._update_inplace(new_data) self = self.T else: - self._data = new_data + self._update_inplace(new_data) else: res = self._constructor(new_data).__finalize__(self) if axis == 1: @@ -2856,8 +2862,9 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, if inplace: # we may have different type blocks come out of putmask, so # reconstruct the block manager - self._data = self._data.putmask(cond, other, align=axis is None, - inplace=True) + new_data = self._data.putmask(cond, other, align=axis is None, + inplace=True) + self._update_inplace(new_data) else: new_data = self._data.where(other, cond, align=axis is None, diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a4e273c43e483..8444c7a9b2a00 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -209,7 +209,7 @@ def _setitem_with_indexer(self, indexer, value): labels = _safe_append_to_index(index, key) self.obj._data = self.obj.reindex_axis(labels, i)._data self.obj._maybe_update_cacher(clear=True) - self.obj._setitem_copy(False) + self.obj.is_copy=False if isinstance(labels, MultiIndex): self.obj.sortlevel(inplace=True) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index ffc30c81ededd..5b501de026c57 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -11607,6 +11607,7 @@ def _check_f(base, f): _check_f(data.copy(), f) # -----Series----- + d = data.copy()['c'] # reset_index f = lambda x: x.reset_index(inplace=True, drop=True) @@ -11614,15 +11615,15 @@ def _check_f(base, f): # fillna f = lambda x: x.fillna(0, inplace=True) - _check_f(data.copy()['c'], f) + _check_f(d.copy(), f) # replace f = lambda x: x.replace(1, 0, inplace=True) - _check_f(data.copy()['c'], f) + _check_f(d.copy(), f) # rename f = lambda x: x.rename({1: 'foo'}, inplace=True) - _check_f(data.copy()['c'], f) + _check_f(d.copy(), f) def test_isin(self): # GH #4211 diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 22c72e1e5d82e..f834094475c1b 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -365,7 +365,7 @@ def f(grp): return None return grp.iloc[0].loc['C'] result = df.groupby('A').apply(f) - e = df.groupby('A').first()['C'] + e = df.groupby('A').first()['C'].copy() e.loc['Pony'] = np.nan e.name = None assert_series_equal(result,e) diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index d941224fec52e..59c7bda35c544 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -1733,9 +1733,9 @@ def test_cache_updating(self): df.index = index # setting via chained assignment - df.loc[0]['z'].iloc[0] = 1. - result = df.loc[(0,0),'z'] - self.assert_(result == 1) + def f(): + df.loc[0]['z'].iloc[0] = 1. + self.assertRaises(com.SettingWithCopyError, f) # correct setting df.loc[(0,0),'z'] = 2 @@ -1891,6 +1891,20 @@ def random_text(nobs=100): self.assert_(df.is_copy is False) df['a'] += 1 + # inplace ops + # original from: http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug + a = [12, 23] + b = [123, None] + c = [1234, 2345] + d = [12345, 23456] + tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'), ('ears', 'right')] + events = {('eyes', 'left'): a, ('eyes', 'right'): b, ('ears', 'left'): c, ('ears', 'right'): d} + multiind = MultiIndex.from_tuples(tuples, names=['part', 'side']) + zed = DataFrame(events, index=['a', 'b'], columns=multiind) + def f(): + zed['eyes']['right'].fillna(value=555, inplace=True) + self.assertRaises(com.SettingWithCopyError, f) + pd.set_option('chained_assignment','warn') def test_float64index_slicing_bug(self): diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 151f222d7357a..7dd9dbd51d730 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1141,14 +1141,27 @@ def test_is_lexsorted(self): self.assert_(index.lexsort_depth == 0) def test_frame_getitem_view(self): - df = self.frame.T + df = self.frame.T.copy() + + # this works because we are modifying the underlying array + # really a no-no df['foo'].values[:] = 0 self.assert_((df['foo'].values == 0).all()) # but not if it's mixed-type df['foo', 'four'] = 'foo' df = df.sortlevel(0, axis=1) - df['foo']['one'] = 2 + + # this will work, but will raise/warn as its chained assignment + def f(): + df['foo']['one'] = 2 + return df + self.assertRaises(com.SettingWithCopyError, f) + + try: + df = f() + except: + pass self.assert_((df['foo', 'one'] == 0).all()) def test_frame_getitem_not_sorted(self):