Skip to content

Commit fcfaa7d

Browse files
committed
Merge pull request #6042 from jreback/setting_with_copy
BUG: less false positives with SettingWithCopy (GH6025)
2 parents 6044e92 + 9b90af7 commit fcfaa7d

File tree

8 files changed

+73
-29
lines changed

8 files changed

+73
-29
lines changed

doc/source/release.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ API Changes
5959
- ``Series.sort`` will raise a ``ValueError`` (rather than a ``TypeError``) on sorting an
6060
object that is a view of another (:issue:`5856`, :issue:`5853`)
6161
- Raise/Warn ``SettingWithCopyError`` (according to the option ``chained_assignment`` in more cases,
62-
when detecting chained assignment, related (:issue:`5938`)
62+
when detecting chained assignment, related (:issue:`5938`, :issue:`6025`)
6363
- DataFrame.head(0) returns self instead of empty frame (:issue:`5846`)
6464
- ``autocorrelation_plot`` now accepts ``**kwargs``. (:issue:`5623`)
6565
- ``convert_objects`` now accepts a ``convert_timedeltas='coerce'`` argument to allow forced dtype conversion of

pandas/core/frame.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1582,7 +1582,7 @@ def _ixs(self, i, axis=0, copy=False):
15821582
new_values, copy = self._data.fast_2d_xs(i, copy=copy)
15831583
result = Series(new_values, index=self.columns,
15841584
name=self.index[i], dtype=new_values.dtype)
1585-
result.is_copy=copy
1585+
result._set_is_copy(self, copy=copy)
15861586
return result
15871587

15881588
# icol
@@ -1707,7 +1707,7 @@ def _getitem_multilevel(self, key):
17071707
if isinstance(result, Series):
17081708
result = Series(result, index=self.index, name=key)
17091709

1710-
result.is_copy=True
1710+
result._set_is_copy(self)
17111711
return result
17121712
else:
17131713
return self._get_item_cache(key)
@@ -1878,6 +1878,7 @@ def __setitem__(self, key, value):
18781878
self._set_item(key, value)
18791879

18801880
def _setitem_slice(self, key, value):
1881+
self._check_setitem_copy()
18811882
self.ix._setitem_with_indexer(key, value)
18821883

18831884
def _setitem_array(self, key, value):
@@ -1912,6 +1913,7 @@ def _setitem_frame(self, key, value):
19121913
raise TypeError(
19131914
'Cannot do boolean setting on mixed-type frame')
19141915

1916+
self._check_setitem_copy()
19151917
self.where(-key, value, inplace=True)
19161918

19171919
def _ensure_valid_index(self, value):

pandas/core/generic.py

+32-8
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import warnings
33
import operator
44
import weakref
5+
import gc
56
import numpy as np
67
import pandas.lib as lib
78

@@ -97,7 +98,7 @@ def __init__(self, data, axes=None, copy=False, dtype=None,
9798
for i, ax in enumerate(axes):
9899
data = data.reindex_axis(ax, axis=i)
99100

100-
object.__setattr__(self, 'is_copy', False)
101+
object.__setattr__(self, 'is_copy', None)
101102
object.__setattr__(self, '_data', data)
102103
object.__setattr__(self, '_item_cache', {})
103104

@@ -994,6 +995,9 @@ def _get_item_cache(self, item):
994995
res = self._box_item_values(item, values)
995996
cache[item] = res
996997
res._set_as_cached(item, self)
998+
999+
# for a chain
1000+
res.is_copy = self.is_copy
9971001
return res
9981002

9991003
def _set_as_cached(self, item, cacher):
@@ -1031,16 +1035,14 @@ def _maybe_update_cacher(self, clear=False):
10311035
# a copy
10321036
if ref is None:
10331037
del self._cacher
1034-
self.is_copy = True
1035-
self._check_setitem_copy(stacklevel=5, t='referant')
10361038
else:
10371039
try:
10381040
ref._maybe_cache_changed(cacher[0], self)
10391041
except:
10401042
pass
1041-
if ref.is_copy:
1042-
self.is_copy = True
1043-
self._check_setitem_copy(stacklevel=5, t='referant')
1043+
1044+
# check if we are a copy
1045+
self._check_setitem_copy(stacklevel=5, t='referant')
10441046

10451047
if clear:
10461048
self._clear_item_cache()
@@ -1055,13 +1057,35 @@ def _set_item(self, key, value):
10551057
self._data.set(key, value)
10561058
self._clear_item_cache()
10571059

1060+
def _set_is_copy(self, ref=None, copy=True):
1061+
if not copy:
1062+
self.is_copy = None
1063+
else:
1064+
if ref is not None:
1065+
self.is_copy = weakref.ref(ref)
1066+
else:
1067+
self.is_copy = None
1068+
10581069
def _check_setitem_copy(self, stacklevel=4, t='setting'):
10591070
""" validate if we are doing a settitem on a chained copy.
10601071
10611072
If you call this function, be sure to set the stacklevel such that the
10621073
user will see the error *at the level of setting*"""
10631074
if self.is_copy:
1075+
10641076
value = config.get_option('mode.chained_assignment')
1077+
if value is None:
1078+
return
1079+
1080+
# see if the copy is not actually refererd; if so, then disolve
1081+
# the copy weakref
1082+
try:
1083+
gc.collect(2)
1084+
if not gc.get_referents(self.is_copy()):
1085+
self.is_copy = None
1086+
return
1087+
except:
1088+
pass
10651089

10661090
if t == 'referant':
10671091
t = ("A value is trying to be set on a copy of a slice from a "
@@ -1143,7 +1167,7 @@ def take(self, indices, axis=0, convert=True, is_copy=True):
11431167

11441168
# maybe set copy if we didn't actually change the index
11451169
if is_copy and not result._get_axis(axis).equals(self._get_axis(axis)):
1146-
result.is_copy=is_copy
1170+
result._set_is_copy(self)
11471171

11481172
return result
11491173

@@ -1276,12 +1300,12 @@ def xs(self, key, axis=0, level=None, copy=True, drop_level=True):
12761300
new_values, copy = self._data.fast_2d_xs(loc, copy=copy)
12771301
result = Series(new_values, index=self.columns,
12781302
name=self.index[loc])
1279-
result.is_copy=True
12801303

12811304
else:
12821305
result = self[loc]
12831306
result.index = new_index
12841307

1308+
result._set_is_copy(self)
12851309
return result
12861310

12871311
_xs = xs

pandas/core/indexing.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ def _setitem_with_indexer(self, indexer, value):
211211
labels = _safe_append_to_index(index, key)
212212
self.obj._data = self.obj.reindex_axis(labels, i)._data
213213
self.obj._maybe_update_cacher(clear=True)
214-
self.obj.is_copy=False
214+
self.obj.is_copy=None
215215

216216
if isinstance(labels, MultiIndex):
217217
self.obj.sortlevel(inplace=True)
@@ -418,6 +418,7 @@ def can_do_equal_len():
418418
if isinstance(value, ABCPanel):
419419
value = self._align_panel(indexer, value)
420420

421+
# actually do the set
421422
self.obj._data = self.obj._data.setitem(indexer, value)
422423
self.obj._maybe_update_cacher(clear=True)
423424

pandas/io/json.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def __init__(self, obj, orient, date_format, double_precision,
6060
self.date_unit = date_unit
6161
self.default_handler = default_handler
6262

63-
self.is_copy = False
63+
self.is_copy = None
6464
self._format_axes()
6565

6666
def _format_axes(self):

pandas/tests/test_index.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1380,10 +1380,10 @@ def test_set_value_keeps_names(self):
13801380
columns=['one', 'two', 'three', 'four'],
13811381
index=idx)
13821382
df = df.sortlevel()
1383-
self.assert_(df.is_copy is False)
1383+
self.assert_(df.is_copy is None)
13841384
self.assertEqual(df.index.names, ('Name', 'Number'))
13851385
df = df.set_value(('grethe', '4'), 'one', 99.34)
1386-
self.assert_(df.is_copy is False)
1386+
self.assert_(df.is_copy is None)
13871387
self.assertEqual(df.index.names, ('Name', 'Number'))
13881388

13891389
def test_names(self):

pandas/tests/test_indexing.py

+30-13
Original file line numberDiff line numberDiff line change
@@ -2022,19 +2022,19 @@ def test_detect_chained_assignment(self):
20222022
# work with the chain
20232023
expected = DataFrame([[-5,1],[-6,3]],columns=list('AB'))
20242024
df = DataFrame(np.arange(4).reshape(2,2),columns=list('AB'),dtype='int64')
2025-
self.assert_(not df.is_copy)
2025+
self.assert_(df.is_copy is None)
20262026

20272027
df['A'][0] = -5
20282028
df['A'][1] = -6
20292029
assert_frame_equal(df, expected)
20302030

20312031
expected = DataFrame([[-5,2],[np.nan,3.]],columns=list('AB'))
20322032
df = DataFrame({ 'A' : Series(range(2),dtype='int64'), 'B' : np.array(np.arange(2,4),dtype=np.float64)})
2033-
self.assert_(not df.is_copy)
2033+
self.assert_(df.is_copy is None)
20342034
df['A'][0] = -5
20352035
df['A'][1] = np.nan
20362036
assert_frame_equal(df, expected)
2037-
self.assert_(not df['A'].is_copy)
2037+
self.assert_(df['A'].is_copy is None)
20382038

20392039
# using a copy (the chain), fails
20402040
df = DataFrame({ 'A' : Series(range(2),dtype='int64'), 'B' : np.array(np.arange(2,4),dtype=np.float64)})
@@ -2046,7 +2046,7 @@ def f():
20462046
df = DataFrame({'a' : ['one', 'one', 'two',
20472047
'three', 'two', 'one', 'six'],
20482048
'c' : Series(range(7),dtype='int64') })
2049-
self.assert_(not df.is_copy)
2049+
self.assert_(df.is_copy is None)
20502050
expected = DataFrame({'a' : ['one', 'one', 'two',
20512051
'three', 'two', 'one', 'six'],
20522052
'c' : [42,42,2,3,4,42,6]})
@@ -2075,7 +2075,7 @@ def f():
20752075
# make sure that is_copy is picked up reconstruction
20762076
# GH5475
20772077
df = DataFrame({"A": [1,2]})
2078-
self.assert_(df.is_copy is False)
2078+
self.assert_(df.is_copy is None)
20792079
with tm.ensure_clean('__tmp__pickle') as path:
20802080
df.to_pickle(path)
20812081
df2 = pd.read_pickle(path)
@@ -2100,33 +2100,42 @@ def random_text(nobs=100):
21002100

21012101
# always a copy
21022102
x = df.iloc[[0,1,2]]
2103-
self.assert_(x.is_copy is True)
2103+
self.assert_(x.is_copy is not None)
21042104
x = df.iloc[[0,1,2,4]]
2105-
self.assert_(x.is_copy is True)
2105+
self.assert_(x.is_copy is not None)
21062106

21072107
# explicity copy
21082108
indexer = df.letters.apply(lambda x : len(x) > 10)
21092109
df = df.ix[indexer].copy()
2110-
self.assert_(df.is_copy is False)
2110+
self.assert_(df.is_copy is None)
21112111
df['letters'] = df['letters'].apply(str.lower)
21122112

21132113
# implicity take
21142114
df = random_text(100000)
21152115
indexer = df.letters.apply(lambda x : len(x) > 10)
21162116
df = df.ix[indexer]
2117-
self.assert_(df.is_copy is True)
2117+
self.assert_(df.is_copy is not None)
2118+
df['letters'] = df['letters'].apply(str.lower)
2119+
2120+
# implicity take 2
2121+
df = random_text(100000)
2122+
indexer = df.letters.apply(lambda x : len(x) > 10)
2123+
df = df.ix[indexer]
2124+
self.assert_(df.is_copy is not None)
21182125
df.loc[:,'letters'] = df['letters'].apply(str.lower)
21192126

2120-
# this will raise
2121-
#df['letters'] = df['letters'].apply(str.lower)
2127+
# should be ok even though its a copy!
2128+
self.assert_(df.is_copy is None)
2129+
df['letters'] = df['letters'].apply(str.lower)
2130+
self.assert_(df.is_copy is None)
21222131

21232132
df = random_text(100000)
21242133
indexer = df.letters.apply(lambda x : len(x) > 10)
21252134
df.ix[indexer,'letters'] = df.ix[indexer,'letters'].apply(str.lower)
21262135

21272136
# an identical take, so no copy
21282137
df = DataFrame({'a' : [1]}).dropna()
2129-
self.assert_(df.is_copy is False)
2138+
self.assert_(df.is_copy is None)
21302139
df['a'] += 1
21312140

21322141
# inplace ops
@@ -2165,7 +2174,15 @@ def f():
21652174
df[['c']][mask] = df[['b']][mask]
21662175
self.assertRaises(com.SettingWithCopyError, f)
21672176

2168-
pd.set_option('chained_assignment','warn')
2177+
# false positives GH6025
2178+
df = DataFrame ({'column1':['a', 'a', 'a'], 'column2': [4,8,9] })
2179+
str(df)
2180+
df['column1'] = df['column1'] + 'b'
2181+
str(df)
2182+
df = df [df['column2']!=8]
2183+
str(df)
2184+
df['column1'] = df['column1'] + 'c'
2185+
str(df)
21692186

21702187
def test_float64index_slicing_bug(self):
21712188
# GH 5557, related to slicing a float index

pandas/tools/tests/test_pivot.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ def _check_output(res, col, rows=['A', 'B'], cols=['C']):
174174
exp = self.data.groupby(rows)[col].mean()
175175
tm.assert_series_equal(cmarg, exp)
176176

177-
res.sortlevel(inplace=True)
177+
res = res.sortlevel()
178178
rmarg = res.xs(('All', ''))[:-1]
179179
exp = self.data.groupby(cols)[col].mean()
180180
tm.assert_series_equal(rmarg, exp)

0 commit comments

Comments
 (0)