Skip to content

Commit ec85371

Browse files
committed
Review (jreback)
1 parent f46c1e4 commit ec85371

File tree

3 files changed

+54
-53
lines changed

3 files changed

+54
-53
lines changed

pandas/core/frame.py

+3
Original file line numberDiff line numberDiff line change
@@ -3978,13 +3978,16 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
39783978
if not isinstance(keys, list):
39793979
keys = [keys]
39803980

3981+
# collect elements from "keys" that are not allowed array types
39813982
col_labels = [x for x in keys
39823983
if not isinstance(x, (Series, Index, MultiIndex,
39833984
list, np.ndarray))]
39843985
if any(x not in self for x in col_labels):
3986+
# if there are any invalid labels for self, we raise a KeyError
39853987
missing = [x for x in col_labels if x not in self]
39863988
raise KeyError('{}'.format(missing))
39873989
elif len(set(col_labels)) < len(col_labels):
3990+
# if all are valid labels, but there are duplicates
39883991
dup = Series(col_labels)
39893992
dup = list(dup.loc[dup.duplicated()])
39903993
raise ValueError('Passed duplicate column names '

pandas/tests/frame/common.py

-9
Original file line numberDiff line numberDiff line change
@@ -103,15 +103,6 @@ def simple(self):
103103
return pd.DataFrame(arr, columns=['one', 'two', 'three'],
104104
index=['a', 'b', 'c'])
105105

106-
@cache_readonly
107-
def dummy(self):
108-
df = pd.DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'],
109-
'B': ['one', 'two', 'three', 'one', 'two'],
110-
'C': ['a', 'b', 'c', 'd', 'e'],
111-
'D': np.random.randn(5),
112-
'E': np.random.randn(5)})
113-
return df
114-
115106
# self.ts3 = tm.makeTimeSeries()[-5:]
116107
# self.ts4 = tm.makeTimeSeries()[1:-1]
117108

pandas/tests/frame/test_alter_axes.py

+51-44
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,16 @@
2323
from pandas.tests.frame.common import TestData
2424

2525

26+
@pytest.fixture
27+
def frame_of_index_cols():
28+
df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'],
29+
'B': ['one', 'two', 'three', 'one', 'two'],
30+
'C': ['a', 'b', 'c', 'd', 'e'],
31+
'D': np.random.randn(5),
32+
'E': np.random.randn(5)})
33+
return df
34+
35+
2636
class TestDataFrameAlterAxes(TestData):
2737

2838
def test_set_index_directly(self):
@@ -54,8 +64,9 @@ def test_set_index_cast(self):
5464
@pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B']])
5565
@pytest.mark.parametrize('inplace', [True, False])
5666
@pytest.mark.parametrize('drop', [True, False])
57-
def test_set_index_drop_inplace(self, drop, inplace, keys):
58-
df = self.dummy.copy()
67+
def test_set_index_drop_inplace(self, frame_of_index_cols,
68+
drop, inplace, keys):
69+
df = frame_of_index_cols
5970

6071
if isinstance(keys, list):
6172
idx = MultiIndex.from_arrays([df[x] for x in keys], names=keys)
@@ -75,8 +86,8 @@ def test_set_index_drop_inplace(self, drop, inplace, keys):
7586
# A has duplicate values, C does not
7687
@pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B']])
7788
@pytest.mark.parametrize('drop', [True, False])
78-
def test_set_index_append(self, drop, keys):
79-
df = self.dummy.copy()
89+
def test_set_index_append(self, frame_of_index_cols, drop, keys):
90+
df = frame_of_index_cols
8091

8192
keys = keys if isinstance(keys, list) else [keys]
8293
idx = MultiIndex.from_arrays([df.index] + [df[x] for x in keys],
@@ -91,12 +102,14 @@ def test_set_index_append(self, drop, keys):
91102
# A has duplicate values, C does not
92103
@pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B']])
93104
@pytest.mark.parametrize('drop', [True, False])
94-
def test_set_index_append_to_multiindex(self, drop, keys):
105+
def test_set_index_append_to_multiindex(self, frame_of_index_cols,
106+
drop, keys):
95107
# append to existing multiindex
96-
df = self.dummy.set_index(['D'], drop=drop, append=True)
108+
df = frame_of_index_cols.set_index(['D'], drop=drop, append=True)
97109

98110
keys = keys if isinstance(keys, list) else [keys]
99-
expected = self.dummy.set_index(['D'] + keys, drop=drop, append=True)
111+
expected = frame_of_index_cols.set_index(['D'] + keys,
112+
drop=drop, append=True)
100113

101114
result = df.set_index(keys, drop=drop, append=True)
102115

@@ -112,19 +125,18 @@ def test_set_index_after_mutation(self):
112125
result = df2.set_index('key')
113126
tm.assert_frame_equal(result, expected)
114127

128+
# MultiIndex constructor does not work directly on Series -> lambda
115129
# also test index name if append=True (name is duplicate here for B)
116-
@pytest.mark.parametrize('box', [Series, Index, np.array, 'MultiIndex'])
130+
@pytest.mark.parametrize('box', [Series, Index, np.array,
131+
lambda x: MultiIndex.from_arrays([x])])
117132
@pytest.mark.parametrize('append, index_name', [(True, None),
118133
(True, 'B'), (True, 'test'), (False, None)])
119134
@pytest.mark.parametrize('drop', [True, False])
120-
def test_set_index_pass_single_array(self, drop, append, index_name, box):
121-
df = self.dummy.copy()
135+
def test_set_index_pass_single_array(self, frame_of_index_cols,
136+
drop, append, index_name, box):
137+
df = frame_of_index_cols
122138
df.index.name = index_name
123139

124-
# update constructor in case of MultiIndex
125-
box = ((lambda x: MultiIndex.from_arrays([x]))
126-
if box == 'MultiIndex' else box)
127-
128140
key = box(df['B'])
129141
# np.array and list "forget" the name of B
130142
name = [None if box in [np.array, list] else 'B']
@@ -138,21 +150,19 @@ def test_set_index_pass_single_array(self, drop, append, index_name, box):
138150

139151
tm.assert_frame_equal(result, expected)
140152

153+
# MultiIndex constructor does not work directly on Series -> lambda
141154
# also test index name if append=True (name is duplicate here for A & B)
142-
@pytest.mark.parametrize('box', [Series, Index, np.array,
143-
list, 'MultiIndex'])
155+
@pytest.mark.parametrize('box', [Series, Index, np.array, list,
156+
lambda x: MultiIndex.from_arrays([x])])
144157
@pytest.mark.parametrize('append, index_name',
145158
[(True, None), (True, 'A'), (True, 'B'),
146159
(True, 'test'), (False, None)])
147160
@pytest.mark.parametrize('drop', [True, False])
148-
def test_set_index_pass_arrays(self, drop, append, index_name, box):
149-
df = self.dummy.copy()
161+
def test_set_index_pass_arrays(self, frame_of_index_cols,
162+
drop, append, index_name, box):
163+
df = frame_of_index_cols
150164
df.index.name = index_name
151165

152-
# update constructor in case of MultiIndex
153-
box = ((lambda x: MultiIndex.from_arrays([x]))
154-
if box == 'MultiIndex' else box)
155-
156166
keys = ['A', box(df['B'])]
157167
# np.array and list "forget" the name of B
158168
names = ['A', None if box in [np.array, list] else 'B']
@@ -167,28 +177,24 @@ def test_set_index_pass_arrays(self, drop, append, index_name, box):
167177

168178
tm.assert_frame_equal(result, expected)
169179

180+
# MultiIndex constructor does not work directly on Series -> lambda
181+
# We also emulate a "constructor" for the label -> lambda
170182
# also test index name if append=True (name is duplicate here for A)
171-
@pytest.mark.parametrize('box1', ['label', Series, Index, np.array,
172-
list, 'MultiIndex'])
173-
@pytest.mark.parametrize('box2', ['label', Series, Index, np.array,
174-
list, 'MultiIndex'])
183+
@pytest.mark.parametrize('box2', [Series, Index, np.array, list,
184+
lambda x: MultiIndex.from_arrays([x]),
185+
lambda x: x.name])
186+
@pytest.mark.parametrize('box1', [Series, Index, np.array, list,
187+
lambda x: MultiIndex.from_arrays([x]),
188+
lambda x: x.name])
175189
@pytest.mark.parametrize('append, index_name', [(True, None),
176190
(True, 'A'), (True, 'test'), (False, None)])
177191
@pytest.mark.parametrize('drop', [True, False])
178-
def test_set_index_pass_arrays_duplicate(self, drop, append, index_name,
179-
box1, box2):
180-
df = self.dummy.copy()
192+
def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop,
193+
append, index_name, box1, box2):
194+
df = frame_of_index_cols
181195
df.index.name = index_name
182196

183-
# transform strings to correct box constructor
184-
def rebox(x):
185-
if x == 'label':
186-
return lambda x: x.name
187-
elif x == 'MultiIndex':
188-
return lambda x: MultiIndex.from_arrays([x])
189-
return x
190-
191-
keys = [rebox(box1)(df['A']), rebox(box2)(df['A'])]
197+
keys = [box1(df['A']), box2(df['A'])]
192198

193199
# == gives ambiguous Boolean for Series
194200
if keys[0] is 'A' and keys[1] is 'A':
@@ -208,8 +214,9 @@ def rebox(x):
208214

209215
@pytest.mark.parametrize('append', [True, False])
210216
@pytest.mark.parametrize('drop', [True, False])
211-
def test_set_index_pass_multiindex(self, drop, append):
212-
df = self.dummy.copy()
217+
def test_set_index_pass_multiindex(self, frame_of_index_cols,
218+
drop, append):
219+
df = frame_of_index_cols
213220
keys = MultiIndex.from_arrays([df['A'], df['B']], names=['A', 'B'])
214221

215222
result = df.set_index(keys, drop=drop, append=append)
@@ -219,8 +226,8 @@ def test_set_index_pass_multiindex(self, drop, append):
219226

220227
tm.assert_frame_equal(result, expected)
221228

222-
def test_set_index_verify_integrity(self):
223-
df = self.dummy.copy()
229+
def test_set_index_verify_integrity(self, frame_of_index_cols):
230+
df = frame_of_index_cols
224231

225232
with tm.assert_raises_regex(ValueError,
226233
'Index has duplicate keys'):
@@ -230,8 +237,8 @@ def test_set_index_verify_integrity(self):
230237
'Index has duplicate keys'):
231238
df.set_index([df['A'], df['A']], verify_integrity=True)
232239

233-
def test_set_index_raise(self):
234-
df = self.dummy.copy()
240+
def test_set_index_raise(self, frame_of_index_cols):
241+
df = frame_of_index_cols
235242

236243
with tm.assert_raises_regex(KeyError, '.*'): # column names are A-E
237244
df.set_index(['foo', 'bar', 'baz'], verify_integrity=True)

0 commit comments

Comments
 (0)