Skip to content

Commit b74c420

Browse files
committed
BUG: reset setitem_copy on object enlargement
TST: eliminate SettingWithCopyWarnings in tests (catch them) TST: tests for GH5597 BUG: don't set copy on equal indexes after an operation
1 parent 4264e99 commit b74c420

File tree

5 files changed

+73
-11
lines changed

5 files changed

+73
-11
lines changed

pandas/core/frame.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1563,7 +1563,7 @@ def _ixs(self, i, axis=0, copy=False):
15631563

15641564
# a location index by definition
15651565
i = _maybe_convert_indices(i, len(self._get_axis(axis)))
1566-
return self.reindex(i, takeable=True)
1566+
return self.reindex(i, takeable=True)._setitem_copy(True)
15671567
else:
15681568
new_values, copy = self._data.fast_2d_xs(i, copy=copy)
15691569
return Series(new_values, index=self.columns,
@@ -2714,7 +2714,7 @@ def trans(v):
27142714

27152715
self._clear_item_cache()
27162716
else:
2717-
return self.take(indexer, axis=axis, convert=False)
2717+
return self.take(indexer, axis=axis, convert=False, is_copy=False)
27182718

27192719
def sortlevel(self, level=0, axis=0, ascending=True, inplace=False):
27202720
"""
@@ -2760,7 +2760,7 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False):
27602760

27612761
self._clear_item_cache()
27622762
else:
2763-
return self.take(indexer, axis=axis, convert=False)
2763+
return self.take(indexer, axis=axis, convert=False, is_copy=False)
27642764

27652765
def swaplevel(self, i, j, axis=0):
27662766
"""

pandas/core/generic.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -1064,7 +1064,7 @@ def __delitem__(self, key):
10641064
except KeyError:
10651065
pass
10661066

1067-
def take(self, indices, axis=0, convert=True):
1067+
def take(self, indices, axis=0, convert=True, is_copy=True):
10681068
"""
10691069
Analogous to ndarray.take
10701070
@@ -1073,6 +1073,7 @@ def take(self, indices, axis=0, convert=True):
10731073
indices : list / array of ints
10741074
axis : int, default 0
10751075
convert : translate neg to pos indices (default)
1076+
is_copy : mark the returned frame as a copy
10761077
10771078
Returns
10781079
-------
@@ -1090,12 +1091,17 @@ def take(self, indices, axis=0, convert=True):
10901091
labels = self._get_axis(axis)
10911092
new_items = labels.take(indices)
10921093
new_data = self._data.reindex_axis(new_items, indexer=indices,
1093-
axis=0)
1094+
axis=baxis)
10941095
else:
1095-
new_data = self._data.take(indices, axis=baxis, verify=convert)
1096-
return self._constructor(new_data)\
1097-
._setitem_copy(True)\
1098-
.__finalize__(self)
1096+
new_data = self._data.take(indices, axis=baxis)
1097+
1098+
result = self._constructor(new_data).__finalize__(self)
1099+
1100+
# maybe set copy if we didn't actually change the index
1101+
if is_copy and not result._get_axis(axis).equals(self._get_axis(axis)):
1102+
result = result._setitem_copy(is_copy)
1103+
1104+
return result
10991105

11001106
# TODO: Check if this was clearer in 0.12
11011107
def select(self, crit, axis=0):

pandas/core/indexing.py

+1
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ def _setitem_with_indexer(self, indexer, value):
209209
labels = _safe_append_to_index(index, key)
210210
self.obj._data = self.obj.reindex_axis(labels, i)._data
211211
self.obj._maybe_update_cacher(clear=True)
212+
self.obj._setitem_copy(False)
212213

213214
if isinstance(labels, MultiIndex):
214215
self.obj.sortlevel(inplace=True)

pandas/tests/test_index.py

+2
Original file line numberDiff line numberDiff line change
@@ -1381,8 +1381,10 @@ def test_set_value_keeps_names(self):
13811381
columns=['one', 'two', 'three', 'four'],
13821382
index=idx)
13831383
df = df.sortlevel()
1384+
self.assert_(df._is_copy is False)
13841385
self.assertEqual(df.index.names, ('Name', 'Number'))
13851386
df = df.set_value(('grethe', '4'), 'one', 99.34)
1387+
self.assert_(df._is_copy is False)
13861388
self.assertEqual(df.index.names, ('Name', 'Number'))
13871389

13881390
def test_names(self):

pandas/tests/test_indexing.py

+55-2
Original file line numberDiff line numberDiff line change
@@ -1049,6 +1049,8 @@ def f(name,df2):
10491049
return Series(np.arange(df2.shape[0]),name=df2.index.values[0]).reindex(f_index)
10501050
new_df = pd.concat([ f(name,df2) for name, df2 in grp ],axis=1).T
10511051

1052+
# we are actually operating on a copy here
1053+
# but in this case, that's ok
10521054
for name, df2 in grp:
10531055
new_vals = np.arange(df2.shape[0])
10541056
df.ix[name, 'new_col'] = new_vals
@@ -1769,7 +1771,8 @@ def f():
17691771
'c' : [42,42,2,3,4,42,6]})
17701772

17711773
def f():
1772-
df[df.a.str.startswith('o')]['c'] = 42
1774+
indexer = df.a.str.startswith('o')
1775+
df[indexer]['c'] = 42
17731776
self.assertRaises(com.SettingWithCopyError, f)
17741777
df['c'][df.a.str.startswith('o')] = 42
17751778
assert_frame_equal(df,expected)
@@ -1785,7 +1788,8 @@ def f():
17851788
# warnings
17861789
pd.set_option('chained_assignment','warn')
17871790
df = DataFrame({'A':['aaa','bbb','ccc'],'B':[1,2,3]})
1788-
df.loc[0]['A'] = 111
1791+
with tm.assert_produces_warning(expected_warning=com.SettingWithCopyWarning):
1792+
df.loc[0]['A'] = 111
17891793

17901794
# make sure that _is_copy is picked up reconstruction
17911795
# GH5475
@@ -1797,6 +1801,55 @@ def f():
17971801
df2["B"] = df2["A"]
17981802
df2["B"] = df2["A"]
17991803

1804+
# a suprious raise as we are setting the entire column here
1805+
# GH5597
1806+
pd.set_option('chained_assignment','raise')
1807+
from string import ascii_letters as letters
1808+
1809+
def random_text(nobs=100):
1810+
df = []
1811+
for i in range(nobs):
1812+
idx= np.random.randint(len(letters), size=2)
1813+
idx.sort()
1814+
df.append([letters[idx[0]:idx[1]]])
1815+
1816+
return DataFrame(df, columns=['letters'])
1817+
1818+
df = random_text(100000)
1819+
1820+
# always a copy
1821+
x = df.iloc[[0,1,2]]
1822+
self.assert_(x._is_copy is True)
1823+
x = df.iloc[[0,1,2,4]]
1824+
self.assert_(x._is_copy is True)
1825+
1826+
# explicity copy
1827+
indexer = df.letters.apply(lambda x : len(x) > 10)
1828+
df = df.ix[indexer].copy()
1829+
self.assert_(df._is_copy is False)
1830+
df['letters'] = df['letters'].apply(str.lower)
1831+
1832+
# implicity take
1833+
df = random_text(100000)
1834+
indexer = df.letters.apply(lambda x : len(x) > 10)
1835+
df = df.ix[indexer]
1836+
self.assert_(df._is_copy is True)
1837+
df.loc[:,'letters'] = df['letters'].apply(str.lower)
1838+
1839+
# this will raise
1840+
#df['letters'] = df['letters'].apply(str.lower)
1841+
1842+
df = random_text(100000)
1843+
indexer = df.letters.apply(lambda x : len(x) > 10)
1844+
df.ix[indexer,'letters'] = df.ix[indexer,'letters'].apply(str.lower)
1845+
1846+
# an identical take, so no copy
1847+
df = DataFrame({'a' : [1]}).dropna()
1848+
self.assert_(df._is_copy is False)
1849+
df['a'] += 1
1850+
1851+
pd.set_option('chained_assignment','warn')
1852+
18001853
def test_float64index_slicing_bug(self):
18011854
# GH 5557, related to slicing a float index
18021855
ser = {256: 2321.0, 1: 78.0, 2: 2716.0, 3: 0.0, 4: 369.0, 5: 0.0, 6: 269.0, 7: 0.0, 8: 0.0, 9: 0.0, 10: 3536.0, 11: 0.0, 12: 24.0, 13: 0.0, 14: 931.0, 15: 0.0, 16: 101.0, 17: 78.0, 18: 9643.0, 19: 0.0, 20: 0.0, 21: 0.0, 22: 63761.0, 23: 0.0, 24: 446.0, 25: 0.0, 26: 34773.0, 27: 0.0, 28: 729.0, 29: 78.0, 30: 0.0, 31: 0.0, 32: 3374.0, 33: 0.0, 34: 1391.0, 35: 0.0, 36: 361.0, 37: 0.0, 38: 61808.0, 39: 0.0, 40: 0.0, 41: 0.0, 42: 6677.0, 43: 0.0, 44: 802.0, 45: 0.0, 46: 2691.0, 47: 0.0, 48: 3582.0, 49: 0.0, 50: 734.0, 51: 0.0, 52: 627.0, 53: 70.0, 54: 2584.0, 55: 0.0, 56: 324.0, 57: 0.0, 58: 605.0, 59: 0.0, 60: 0.0, 61: 0.0, 62: 3989.0, 63: 10.0, 64: 42.0, 65: 0.0, 66: 904.0, 67: 0.0, 68: 88.0, 69: 70.0, 70: 8172.0, 71: 0.0, 72: 0.0, 73: 0.0, 74: 64902.0, 75: 0.0, 76: 347.0, 77: 0.0, 78: 36605.0, 79: 0.0, 80: 379.0, 81: 70.0, 82: 0.0, 83: 0.0, 84: 3001.0, 85: 0.0, 86: 1630.0, 87: 7.0, 88: 364.0, 89: 0.0, 90: 67404.0, 91: 9.0, 92: 0.0, 93: 0.0, 94: 7685.0, 95: 0.0, 96: 1017.0, 97: 0.0, 98: 2831.0, 99: 0.0, 100: 2963.0, 101: 0.0, 102: 854.0, 103: 0.0, 104: 0.0, 105: 0.0, 106: 0.0, 107: 0.0, 108: 0.0, 109: 0.0, 110: 0.0, 111: 0.0, 112: 0.0, 113: 0.0, 114: 0.0, 115: 0.0, 116: 0.0, 117: 0.0, 118: 0.0, 119: 0.0, 120: 0.0, 121: 0.0, 122: 0.0, 123: 0.0, 124: 0.0, 125: 0.0, 126: 67744.0, 127: 22.0, 128: 264.0, 129: 0.0, 260: 197.0, 268: 0.0, 265: 0.0, 269: 0.0, 261: 0.0, 266: 1198.0, 267: 0.0, 262: 2629.0, 258: 775.0, 257: 0.0, 263: 0.0, 259: 0.0, 264: 163.0, 250: 10326.0, 251: 0.0, 252: 1228.0, 253: 0.0, 254: 2769.0, 255: 0.0}

0 commit comments

Comments
 (0)