|
| 1 | +import numpy as np |
| 2 | +import pandas as pd |
| 3 | +from pandas.core import common as com |
| 4 | +from pandas import (compat, DataFrame, option_context, |
| 5 | + Series, MultiIndex, date_range, Timestamp) |
| 6 | +from pandas.util import testing as tm |
| 7 | + |
| 8 | + |
| 9 | +class TestCaching(tm.TestCase): |
| 10 | + |
| 11 | + def test_slice_consolidate_invalidate_item_cache(self): |
| 12 | + |
| 13 | + # this is chained assignment, but will 'work' |
| 14 | + with option_context('chained_assignment', None): |
| 15 | + |
| 16 | + # #3970 |
| 17 | + df = DataFrame({"aa": compat.lrange(5), "bb": [2.2] * 5}) |
| 18 | + |
| 19 | + # Creates a second float block |
| 20 | + df["cc"] = 0.0 |
| 21 | + |
| 22 | + # caches a reference to the 'bb' series |
| 23 | + df["bb"] |
| 24 | + |
| 25 | + # repr machinery triggers consolidation |
| 26 | + repr(df) |
| 27 | + |
| 28 | + # Assignment to wrong series |
| 29 | + df['bb'].iloc[0] = 0.17 |
| 30 | + df._clear_item_cache() |
| 31 | + self.assertAlmostEqual(df['bb'][0], 0.17) |
| 32 | + |
| 33 | + def test_setitem_cache_updating(self): |
| 34 | + # GH 5424 |
| 35 | + cont = ['one', 'two', 'three', 'four', 'five', 'six', 'seven'] |
| 36 | + |
| 37 | + for do_ref in [False, False]: |
| 38 | + df = DataFrame({'a': cont, |
| 39 | + "b": cont[3:] + cont[:3], |
| 40 | + 'c': np.arange(7)}) |
| 41 | + |
| 42 | + # ref the cache |
| 43 | + if do_ref: |
| 44 | + df.ix[0, "c"] |
| 45 | + |
| 46 | + # set it |
| 47 | + df.ix[7, 'c'] = 1 |
| 48 | + |
| 49 | + self.assertEqual(df.ix[0, 'c'], 0.0) |
| 50 | + self.assertEqual(df.ix[7, 'c'], 1.0) |
| 51 | + |
| 52 | + # GH 7084 |
| 53 | + # not updating cache on series setting with slices |
| 54 | + expected = DataFrame({'A': [600, 600, 600]}, |
| 55 | + index=date_range('5/7/2014', '5/9/2014')) |
| 56 | + out = DataFrame({'A': [0, 0, 0]}, |
| 57 | + index=date_range('5/7/2014', '5/9/2014')) |
| 58 | + df = DataFrame({'C': ['A', 'A', 'A'], 'D': [100, 200, 300]}) |
| 59 | + |
| 60 | + # loop through df to update out |
| 61 | + six = Timestamp('5/7/2014') |
| 62 | + eix = Timestamp('5/9/2014') |
| 63 | + for ix, row in df.iterrows(): |
| 64 | + out.loc[six:eix, row['C']] = out.loc[six:eix, row['C']] + row['D'] |
| 65 | + |
| 66 | + tm.assert_frame_equal(out, expected) |
| 67 | + tm.assert_series_equal(out['A'], expected['A']) |
| 68 | + |
| 69 | + # try via a chain indexing |
| 70 | + # this actually works |
| 71 | + out = DataFrame({'A': [0, 0, 0]}, |
| 72 | + index=date_range('5/7/2014', '5/9/2014')) |
| 73 | + for ix, row in df.iterrows(): |
| 74 | + v = out[row['C']][six:eix] + row['D'] |
| 75 | + out[row['C']][six:eix] = v |
| 76 | + |
| 77 | + tm.assert_frame_equal(out, expected) |
| 78 | + tm.assert_series_equal(out['A'], expected['A']) |
| 79 | + |
| 80 | + out = DataFrame({'A': [0, 0, 0]}, |
| 81 | + index=date_range('5/7/2014', '5/9/2014')) |
| 82 | + for ix, row in df.iterrows(): |
| 83 | + out.loc[six:eix, row['C']] += row['D'] |
| 84 | + |
| 85 | + tm.assert_frame_equal(out, expected) |
| 86 | + tm.assert_series_equal(out['A'], expected['A']) |
| 87 | + |
| 88 | + |
| 89 | +class TestChaining(tm.TestCase): |
| 90 | + |
| 91 | + def test_setitem_chained_setfault(self): |
| 92 | + |
| 93 | + # GH6026 |
| 94 | + # setfaults under numpy 1.7.1 (ok on 1.8) |
| 95 | + data = ['right', 'left', 'left', 'left', 'right', 'left', 'timeout'] |
| 96 | + mdata = ['right', 'left', 'left', 'left', 'right', 'left', 'none'] |
| 97 | + |
| 98 | + df = DataFrame({'response': np.array(data)}) |
| 99 | + mask = df.response == 'timeout' |
| 100 | + df.response[mask] = 'none' |
| 101 | + tm.assert_frame_equal(df, DataFrame({'response': mdata})) |
| 102 | + |
| 103 | + recarray = np.rec.fromarrays([data], names=['response']) |
| 104 | + df = DataFrame(recarray) |
| 105 | + mask = df.response == 'timeout' |
| 106 | + df.response[mask] = 'none' |
| 107 | + tm.assert_frame_equal(df, DataFrame({'response': mdata})) |
| 108 | + |
| 109 | + df = DataFrame({'response': data, 'response1': data}) |
| 110 | + mask = df.response == 'timeout' |
| 111 | + df.response[mask] = 'none' |
| 112 | + tm.assert_frame_equal(df, DataFrame({'response': mdata, |
| 113 | + 'response1': data})) |
| 114 | + |
| 115 | + # GH 6056 |
| 116 | + expected = DataFrame(dict(A=[np.nan, 'bar', 'bah', 'foo', 'bar'])) |
| 117 | + df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar']))) |
| 118 | + df['A'].iloc[0] = np.nan |
| 119 | + result = df.head() |
| 120 | + tm.assert_frame_equal(result, expected) |
| 121 | + |
| 122 | + df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar']))) |
| 123 | + df.A.iloc[0] = np.nan |
| 124 | + result = df.head() |
| 125 | + tm.assert_frame_equal(result, expected) |
| 126 | + |
| 127 | + def test_detect_chained_assignment(self): |
| 128 | + |
| 129 | + pd.set_option('chained_assignment', 'raise') |
| 130 | + |
| 131 | + # work with the chain |
| 132 | + expected = DataFrame([[-5, 1], [-6, 3]], columns=list('AB')) |
| 133 | + df = DataFrame(np.arange(4).reshape(2, 2), |
| 134 | + columns=list('AB'), dtype='int64') |
| 135 | + self.assertIsNone(df.is_copy) |
| 136 | + df['A'][0] = -5 |
| 137 | + df['A'][1] = -6 |
| 138 | + tm.assert_frame_equal(df, expected) |
| 139 | + |
| 140 | + # test with the chaining |
| 141 | + df = DataFrame({'A': Series(range(2), dtype='int64'), |
| 142 | + 'B': np.array(np.arange(2, 4), dtype=np.float64)}) |
| 143 | + self.assertIsNone(df.is_copy) |
| 144 | + |
| 145 | + def f(): |
| 146 | + df['A'][0] = -5 |
| 147 | + |
| 148 | + self.assertRaises(com.SettingWithCopyError, f) |
| 149 | + |
| 150 | + def f(): |
| 151 | + df['A'][1] = np.nan |
| 152 | + |
| 153 | + self.assertRaises(com.SettingWithCopyError, f) |
| 154 | + self.assertIsNone(df['A'].is_copy) |
| 155 | + |
| 156 | + # using a copy (the chain), fails |
| 157 | + df = DataFrame({'A': Series(range(2), dtype='int64'), |
| 158 | + 'B': np.array(np.arange(2, 4), dtype=np.float64)}) |
| 159 | + |
| 160 | + def f(): |
| 161 | + df.loc[0]['A'] = -5 |
| 162 | + |
| 163 | + self.assertRaises(com.SettingWithCopyError, f) |
| 164 | + |
| 165 | + # doc example |
| 166 | + df = DataFrame({'a': ['one', 'one', 'two', 'three', |
| 167 | + 'two', 'one', 'six'], |
| 168 | + 'c': Series(range(7), dtype='int64')}) |
| 169 | + self.assertIsNone(df.is_copy) |
| 170 | + expected = DataFrame({'a': ['one', 'one', 'two', 'three', |
| 171 | + 'two', 'one', 'six'], |
| 172 | + 'c': [42, 42, 2, 3, 4, 42, 6]}) |
| 173 | + |
| 174 | + def f(): |
| 175 | + indexer = df.a.str.startswith('o') |
| 176 | + df[indexer]['c'] = 42 |
| 177 | + |
| 178 | + self.assertRaises(com.SettingWithCopyError, f) |
| 179 | + |
| 180 | + expected = DataFrame({'A': [111, 'bbb', 'ccc'], 'B': [1, 2, 3]}) |
| 181 | + df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]}) |
| 182 | + |
| 183 | + def f(): |
| 184 | + df['A'][0] = 111 |
| 185 | + |
| 186 | + self.assertRaises(com.SettingWithCopyError, f) |
| 187 | + |
| 188 | + def f(): |
| 189 | + df.loc[0]['A'] = 111 |
| 190 | + |
| 191 | + self.assertRaises(com.SettingWithCopyError, f) |
| 192 | + |
| 193 | + df.loc[0, 'A'] = 111 |
| 194 | + tm.assert_frame_equal(df, expected) |
| 195 | + |
| 196 | + # make sure that is_copy is picked up reconstruction |
| 197 | + # GH5475 |
| 198 | + df = DataFrame({"A": [1, 2]}) |
| 199 | + self.assertIsNone(df.is_copy) |
| 200 | + with tm.ensure_clean('__tmp__pickle') as path: |
| 201 | + df.to_pickle(path) |
| 202 | + df2 = pd.read_pickle(path) |
| 203 | + df2["B"] = df2["A"] |
| 204 | + df2["B"] = df2["A"] |
| 205 | + |
| 206 | + # a suprious raise as we are setting the entire column here |
| 207 | + # GH5597 |
| 208 | + from string import ascii_letters as letters |
| 209 | + |
| 210 | + def random_text(nobs=100): |
| 211 | + df = [] |
| 212 | + for i in range(nobs): |
| 213 | + idx = np.random.randint(len(letters), size=2) |
| 214 | + idx.sort() |
| 215 | + df.append([letters[idx[0]:idx[1]]]) |
| 216 | + |
| 217 | + return DataFrame(df, columns=['letters']) |
| 218 | + |
| 219 | + df = random_text(100000) |
| 220 | + |
| 221 | + # always a copy |
| 222 | + x = df.iloc[[0, 1, 2]] |
| 223 | + self.assertIsNotNone(x.is_copy) |
| 224 | + x = df.iloc[[0, 1, 2, 4]] |
| 225 | + self.assertIsNotNone(x.is_copy) |
| 226 | + |
| 227 | + # explicity copy |
| 228 | + indexer = df.letters.apply(lambda x: len(x) > 10) |
| 229 | + df = df.ix[indexer].copy() |
| 230 | + self.assertIsNone(df.is_copy) |
| 231 | + df['letters'] = df['letters'].apply(str.lower) |
| 232 | + |
| 233 | + # implicity take |
| 234 | + df = random_text(100000) |
| 235 | + indexer = df.letters.apply(lambda x: len(x) > 10) |
| 236 | + df = df.ix[indexer] |
| 237 | + self.assertIsNotNone(df.is_copy) |
| 238 | + df['letters'] = df['letters'].apply(str.lower) |
| 239 | + |
| 240 | + # implicity take 2 |
| 241 | + df = random_text(100000) |
| 242 | + indexer = df.letters.apply(lambda x: len(x) > 10) |
| 243 | + df = df.ix[indexer] |
| 244 | + self.assertIsNotNone(df.is_copy) |
| 245 | + df.loc[:, 'letters'] = df['letters'].apply(str.lower) |
| 246 | + |
| 247 | + # should be ok even though it's a copy! |
| 248 | + self.assertIsNone(df.is_copy) |
| 249 | + df['letters'] = df['letters'].apply(str.lower) |
| 250 | + self.assertIsNone(df.is_copy) |
| 251 | + |
| 252 | + df = random_text(100000) |
| 253 | + indexer = df.letters.apply(lambda x: len(x) > 10) |
| 254 | + df.ix[indexer, 'letters'] = df.ix[indexer, 'letters'].apply(str.lower) |
| 255 | + |
| 256 | + # an identical take, so no copy |
| 257 | + df = DataFrame({'a': [1]}).dropna() |
| 258 | + self.assertIsNone(df.is_copy) |
| 259 | + df['a'] += 1 |
| 260 | + |
| 261 | + # inplace ops |
| 262 | + # original from: |
| 263 | + # http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug |
| 264 | + a = [12, 23] |
| 265 | + b = [123, None] |
| 266 | + c = [1234, 2345] |
| 267 | + d = [12345, 23456] |
| 268 | + tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'), |
| 269 | + ('ears', 'right')] |
| 270 | + events = {('eyes', 'left'): a, |
| 271 | + ('eyes', 'right'): b, |
| 272 | + ('ears', 'left'): c, |
| 273 | + ('ears', 'right'): d} |
| 274 | + multiind = MultiIndex.from_tuples(tuples, names=['part', 'side']) |
| 275 | + zed = DataFrame(events, index=['a', 'b'], columns=multiind) |
| 276 | + |
| 277 | + def f(): |
| 278 | + zed['eyes']['right'].fillna(value=555, inplace=True) |
| 279 | + |
| 280 | + self.assertRaises(com.SettingWithCopyError, f) |
| 281 | + |
| 282 | + df = DataFrame(np.random.randn(10, 4)) |
| 283 | + s = df.iloc[:, 0].sort_values() |
| 284 | + tm.assert_series_equal(s, df.iloc[:, 0].sort_values()) |
| 285 | + tm.assert_series_equal(s, df[0].sort_values()) |
| 286 | + |
| 287 | + # false positives GH6025 |
| 288 | + df = DataFrame({'column1': ['a', 'a', 'a'], 'column2': [4, 8, 9]}) |
| 289 | + str(df) |
| 290 | + df['column1'] = df['column1'] + 'b' |
| 291 | + str(df) |
| 292 | + df = df[df['column2'] != 8] |
| 293 | + str(df) |
| 294 | + df['column1'] = df['column1'] + 'c' |
| 295 | + str(df) |
| 296 | + |
| 297 | + # from SO: |
| 298 | + # http://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc |
| 299 | + df = DataFrame(np.arange(0, 9), columns=['count']) |
| 300 | + df['group'] = 'b' |
| 301 | + |
| 302 | + def f(): |
| 303 | + df.iloc[0:5]['group'] = 'a' |
| 304 | + |
| 305 | + self.assertRaises(com.SettingWithCopyError, f) |
| 306 | + |
| 307 | + # mixed type setting |
| 308 | + # same dtype & changing dtype |
| 309 | + df = DataFrame(dict(A=date_range('20130101', periods=5), |
| 310 | + B=np.random.randn(5), |
| 311 | + C=np.arange(5, dtype='int64'), |
| 312 | + D=list('abcde'))) |
| 313 | + |
| 314 | + def f(): |
| 315 | + df.ix[2]['D'] = 'foo' |
| 316 | + |
| 317 | + self.assertRaises(com.SettingWithCopyError, f) |
| 318 | + |
| 319 | + def f(): |
| 320 | + df.ix[2]['C'] = 'foo' |
| 321 | + |
| 322 | + self.assertRaises(com.SettingWithCopyError, f) |
| 323 | + |
| 324 | + def f(): |
| 325 | + df['C'][2] = 'foo' |
| 326 | + |
| 327 | + self.assertRaises(com.SettingWithCopyError, f) |
| 328 | + |
| 329 | + def test_setting_with_copy_bug(self): |
| 330 | + |
| 331 | + # operating on a copy |
| 332 | + df = pd.DataFrame({'a': list(range(4)), |
| 333 | + 'b': list('ab..'), |
| 334 | + 'c': ['a', 'b', np.nan, 'd']}) |
| 335 | + mask = pd.isnull(df.c) |
| 336 | + |
| 337 | + def f(): |
| 338 | + df[['c']][mask] = df[['b']][mask] |
| 339 | + |
| 340 | + self.assertRaises(com.SettingWithCopyError, f) |
| 341 | + |
| 342 | + # invalid warning as we are returning a new object |
| 343 | + # GH 8730 |
| 344 | + df1 = DataFrame({'x': Series(['a', 'b', 'c']), |
| 345 | + 'y': Series(['d', 'e', 'f'])}) |
| 346 | + df2 = df1[['x']] |
| 347 | + |
| 348 | + # this should not raise |
| 349 | + df2['y'] = ['g', 'h', 'i'] |
| 350 | + |
| 351 | + def test_detect_chained_assignment_warnings(self): |
| 352 | + |
| 353 | + # warnings |
| 354 | + with option_context('chained_assignment', 'warn'): |
| 355 | + df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]}) |
| 356 | + with tm.assert_produces_warning( |
| 357 | + expected_warning=com.SettingWithCopyWarning): |
| 358 | + df.loc[0]['A'] = 111 |
0 commit comments