Skip to content

Commit 3f7d2db

Browse files
committed
TST: split up tests/indexing/test_indexing a bit
Author: Jeff Reback <[email protected]> Closes pandas-dev#15367 from jreback/indexing and squashes the following commits: 15e6010 [Jeff Reback] pep 3a12fdd [Jeff Reback] add panel 5605b2b [Jeff Reback] add chaining and caching 05f6f40 [Jeff Reback] split out datetime d6be34f [Jeff Reback] TST: split up tests/indexing/test_indexing a bit
1 parent 61deba5 commit 3f7d2db

8 files changed

+2215
-2175
lines changed

pandas/tests/indexing/common.py

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
""" common utilities """
2+
3+
4+
def _mklbl(prefix, n):
5+
return ["%s%s" % (prefix, i) for i in range(n)]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,358 @@
1+
import numpy as np
2+
import pandas as pd
3+
from pandas.core import common as com
4+
from pandas import (compat, DataFrame, option_context,
5+
Series, MultiIndex, date_range, Timestamp)
6+
from pandas.util import testing as tm
7+
8+
9+
class TestCaching(tm.TestCase):
10+
11+
def test_slice_consolidate_invalidate_item_cache(self):
12+
13+
# this is chained assignment, but will 'work'
14+
with option_context('chained_assignment', None):
15+
16+
# #3970
17+
df = DataFrame({"aa": compat.lrange(5), "bb": [2.2] * 5})
18+
19+
# Creates a second float block
20+
df["cc"] = 0.0
21+
22+
# caches a reference to the 'bb' series
23+
df["bb"]
24+
25+
# repr machinery triggers consolidation
26+
repr(df)
27+
28+
# Assignment to wrong series
29+
df['bb'].iloc[0] = 0.17
30+
df._clear_item_cache()
31+
self.assertAlmostEqual(df['bb'][0], 0.17)
32+
33+
def test_setitem_cache_updating(self):
34+
# GH 5424
35+
cont = ['one', 'two', 'three', 'four', 'five', 'six', 'seven']
36+
37+
for do_ref in [False, False]:
38+
df = DataFrame({'a': cont,
39+
"b": cont[3:] + cont[:3],
40+
'c': np.arange(7)})
41+
42+
# ref the cache
43+
if do_ref:
44+
df.ix[0, "c"]
45+
46+
# set it
47+
df.ix[7, 'c'] = 1
48+
49+
self.assertEqual(df.ix[0, 'c'], 0.0)
50+
self.assertEqual(df.ix[7, 'c'], 1.0)
51+
52+
# GH 7084
53+
# not updating cache on series setting with slices
54+
expected = DataFrame({'A': [600, 600, 600]},
55+
index=date_range('5/7/2014', '5/9/2014'))
56+
out = DataFrame({'A': [0, 0, 0]},
57+
index=date_range('5/7/2014', '5/9/2014'))
58+
df = DataFrame({'C': ['A', 'A', 'A'], 'D': [100, 200, 300]})
59+
60+
# loop through df to update out
61+
six = Timestamp('5/7/2014')
62+
eix = Timestamp('5/9/2014')
63+
for ix, row in df.iterrows():
64+
out.loc[six:eix, row['C']] = out.loc[six:eix, row['C']] + row['D']
65+
66+
tm.assert_frame_equal(out, expected)
67+
tm.assert_series_equal(out['A'], expected['A'])
68+
69+
# try via a chain indexing
70+
# this actually works
71+
out = DataFrame({'A': [0, 0, 0]},
72+
index=date_range('5/7/2014', '5/9/2014'))
73+
for ix, row in df.iterrows():
74+
v = out[row['C']][six:eix] + row['D']
75+
out[row['C']][six:eix] = v
76+
77+
tm.assert_frame_equal(out, expected)
78+
tm.assert_series_equal(out['A'], expected['A'])
79+
80+
out = DataFrame({'A': [0, 0, 0]},
81+
index=date_range('5/7/2014', '5/9/2014'))
82+
for ix, row in df.iterrows():
83+
out.loc[six:eix, row['C']] += row['D']
84+
85+
tm.assert_frame_equal(out, expected)
86+
tm.assert_series_equal(out['A'], expected['A'])
87+
88+
89+
class TestChaining(tm.TestCase):
90+
91+
def test_setitem_chained_setfault(self):
92+
93+
# GH6026
94+
# setfaults under numpy 1.7.1 (ok on 1.8)
95+
data = ['right', 'left', 'left', 'left', 'right', 'left', 'timeout']
96+
mdata = ['right', 'left', 'left', 'left', 'right', 'left', 'none']
97+
98+
df = DataFrame({'response': np.array(data)})
99+
mask = df.response == 'timeout'
100+
df.response[mask] = 'none'
101+
tm.assert_frame_equal(df, DataFrame({'response': mdata}))
102+
103+
recarray = np.rec.fromarrays([data], names=['response'])
104+
df = DataFrame(recarray)
105+
mask = df.response == 'timeout'
106+
df.response[mask] = 'none'
107+
tm.assert_frame_equal(df, DataFrame({'response': mdata}))
108+
109+
df = DataFrame({'response': data, 'response1': data})
110+
mask = df.response == 'timeout'
111+
df.response[mask] = 'none'
112+
tm.assert_frame_equal(df, DataFrame({'response': mdata,
113+
'response1': data}))
114+
115+
# GH 6056
116+
expected = DataFrame(dict(A=[np.nan, 'bar', 'bah', 'foo', 'bar']))
117+
df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar'])))
118+
df['A'].iloc[0] = np.nan
119+
result = df.head()
120+
tm.assert_frame_equal(result, expected)
121+
122+
df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar'])))
123+
df.A.iloc[0] = np.nan
124+
result = df.head()
125+
tm.assert_frame_equal(result, expected)
126+
127+
def test_detect_chained_assignment(self):
128+
129+
pd.set_option('chained_assignment', 'raise')
130+
131+
# work with the chain
132+
expected = DataFrame([[-5, 1], [-6, 3]], columns=list('AB'))
133+
df = DataFrame(np.arange(4).reshape(2, 2),
134+
columns=list('AB'), dtype='int64')
135+
self.assertIsNone(df.is_copy)
136+
df['A'][0] = -5
137+
df['A'][1] = -6
138+
tm.assert_frame_equal(df, expected)
139+
140+
# test with the chaining
141+
df = DataFrame({'A': Series(range(2), dtype='int64'),
142+
'B': np.array(np.arange(2, 4), dtype=np.float64)})
143+
self.assertIsNone(df.is_copy)
144+
145+
def f():
146+
df['A'][0] = -5
147+
148+
self.assertRaises(com.SettingWithCopyError, f)
149+
150+
def f():
151+
df['A'][1] = np.nan
152+
153+
self.assertRaises(com.SettingWithCopyError, f)
154+
self.assertIsNone(df['A'].is_copy)
155+
156+
# using a copy (the chain), fails
157+
df = DataFrame({'A': Series(range(2), dtype='int64'),
158+
'B': np.array(np.arange(2, 4), dtype=np.float64)})
159+
160+
def f():
161+
df.loc[0]['A'] = -5
162+
163+
self.assertRaises(com.SettingWithCopyError, f)
164+
165+
# doc example
166+
df = DataFrame({'a': ['one', 'one', 'two', 'three',
167+
'two', 'one', 'six'],
168+
'c': Series(range(7), dtype='int64')})
169+
self.assertIsNone(df.is_copy)
170+
expected = DataFrame({'a': ['one', 'one', 'two', 'three',
171+
'two', 'one', 'six'],
172+
'c': [42, 42, 2, 3, 4, 42, 6]})
173+
174+
def f():
175+
indexer = df.a.str.startswith('o')
176+
df[indexer]['c'] = 42
177+
178+
self.assertRaises(com.SettingWithCopyError, f)
179+
180+
expected = DataFrame({'A': [111, 'bbb', 'ccc'], 'B': [1, 2, 3]})
181+
df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]})
182+
183+
def f():
184+
df['A'][0] = 111
185+
186+
self.assertRaises(com.SettingWithCopyError, f)
187+
188+
def f():
189+
df.loc[0]['A'] = 111
190+
191+
self.assertRaises(com.SettingWithCopyError, f)
192+
193+
df.loc[0, 'A'] = 111
194+
tm.assert_frame_equal(df, expected)
195+
196+
# make sure that is_copy is picked up reconstruction
197+
# GH5475
198+
df = DataFrame({"A": [1, 2]})
199+
self.assertIsNone(df.is_copy)
200+
with tm.ensure_clean('__tmp__pickle') as path:
201+
df.to_pickle(path)
202+
df2 = pd.read_pickle(path)
203+
df2["B"] = df2["A"]
204+
df2["B"] = df2["A"]
205+
206+
# a suprious raise as we are setting the entire column here
207+
# GH5597
208+
from string import ascii_letters as letters
209+
210+
def random_text(nobs=100):
211+
df = []
212+
for i in range(nobs):
213+
idx = np.random.randint(len(letters), size=2)
214+
idx.sort()
215+
df.append([letters[idx[0]:idx[1]]])
216+
217+
return DataFrame(df, columns=['letters'])
218+
219+
df = random_text(100000)
220+
221+
# always a copy
222+
x = df.iloc[[0, 1, 2]]
223+
self.assertIsNotNone(x.is_copy)
224+
x = df.iloc[[0, 1, 2, 4]]
225+
self.assertIsNotNone(x.is_copy)
226+
227+
# explicity copy
228+
indexer = df.letters.apply(lambda x: len(x) > 10)
229+
df = df.ix[indexer].copy()
230+
self.assertIsNone(df.is_copy)
231+
df['letters'] = df['letters'].apply(str.lower)
232+
233+
# implicity take
234+
df = random_text(100000)
235+
indexer = df.letters.apply(lambda x: len(x) > 10)
236+
df = df.ix[indexer]
237+
self.assertIsNotNone(df.is_copy)
238+
df['letters'] = df['letters'].apply(str.lower)
239+
240+
# implicity take 2
241+
df = random_text(100000)
242+
indexer = df.letters.apply(lambda x: len(x) > 10)
243+
df = df.ix[indexer]
244+
self.assertIsNotNone(df.is_copy)
245+
df.loc[:, 'letters'] = df['letters'].apply(str.lower)
246+
247+
# should be ok even though it's a copy!
248+
self.assertIsNone(df.is_copy)
249+
df['letters'] = df['letters'].apply(str.lower)
250+
self.assertIsNone(df.is_copy)
251+
252+
df = random_text(100000)
253+
indexer = df.letters.apply(lambda x: len(x) > 10)
254+
df.ix[indexer, 'letters'] = df.ix[indexer, 'letters'].apply(str.lower)
255+
256+
# an identical take, so no copy
257+
df = DataFrame({'a': [1]}).dropna()
258+
self.assertIsNone(df.is_copy)
259+
df['a'] += 1
260+
261+
# inplace ops
262+
# original from:
263+
# http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug
264+
a = [12, 23]
265+
b = [123, None]
266+
c = [1234, 2345]
267+
d = [12345, 23456]
268+
tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'),
269+
('ears', 'right')]
270+
events = {('eyes', 'left'): a,
271+
('eyes', 'right'): b,
272+
('ears', 'left'): c,
273+
('ears', 'right'): d}
274+
multiind = MultiIndex.from_tuples(tuples, names=['part', 'side'])
275+
zed = DataFrame(events, index=['a', 'b'], columns=multiind)
276+
277+
def f():
278+
zed['eyes']['right'].fillna(value=555, inplace=True)
279+
280+
self.assertRaises(com.SettingWithCopyError, f)
281+
282+
df = DataFrame(np.random.randn(10, 4))
283+
s = df.iloc[:, 0].sort_values()
284+
tm.assert_series_equal(s, df.iloc[:, 0].sort_values())
285+
tm.assert_series_equal(s, df[0].sort_values())
286+
287+
# false positives GH6025
288+
df = DataFrame({'column1': ['a', 'a', 'a'], 'column2': [4, 8, 9]})
289+
str(df)
290+
df['column1'] = df['column1'] + 'b'
291+
str(df)
292+
df = df[df['column2'] != 8]
293+
str(df)
294+
df['column1'] = df['column1'] + 'c'
295+
str(df)
296+
297+
# from SO:
298+
# http://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc
299+
df = DataFrame(np.arange(0, 9), columns=['count'])
300+
df['group'] = 'b'
301+
302+
def f():
303+
df.iloc[0:5]['group'] = 'a'
304+
305+
self.assertRaises(com.SettingWithCopyError, f)
306+
307+
# mixed type setting
308+
# same dtype & changing dtype
309+
df = DataFrame(dict(A=date_range('20130101', periods=5),
310+
B=np.random.randn(5),
311+
C=np.arange(5, dtype='int64'),
312+
D=list('abcde')))
313+
314+
def f():
315+
df.ix[2]['D'] = 'foo'
316+
317+
self.assertRaises(com.SettingWithCopyError, f)
318+
319+
def f():
320+
df.ix[2]['C'] = 'foo'
321+
322+
self.assertRaises(com.SettingWithCopyError, f)
323+
324+
def f():
325+
df['C'][2] = 'foo'
326+
327+
self.assertRaises(com.SettingWithCopyError, f)
328+
329+
def test_setting_with_copy_bug(self):
330+
331+
# operating on a copy
332+
df = pd.DataFrame({'a': list(range(4)),
333+
'b': list('ab..'),
334+
'c': ['a', 'b', np.nan, 'd']})
335+
mask = pd.isnull(df.c)
336+
337+
def f():
338+
df[['c']][mask] = df[['b']][mask]
339+
340+
self.assertRaises(com.SettingWithCopyError, f)
341+
342+
# invalid warning as we are returning a new object
343+
# GH 8730
344+
df1 = DataFrame({'x': Series(['a', 'b', 'c']),
345+
'y': Series(['d', 'e', 'f'])})
346+
df2 = df1[['x']]
347+
348+
# this should not raise
349+
df2['y'] = ['g', 'h', 'i']
350+
351+
def test_detect_chained_assignment_warnings(self):
352+
353+
# warnings
354+
with option_context('chained_assignment', 'warn'):
355+
df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]})
356+
with tm.assert_produces_warning(
357+
expected_warning=com.SettingWithCopyWarning):
358+
df.loc[0]['A'] = 111

0 commit comments

Comments
 (0)