Skip to content

Commit 2160b40

Browse files
committed
BUG: let selecting multiple columns in DataFrame.__getitem__ work when there are duplicates. close #1943
1 parent a2f5e56 commit 2160b40

File tree

3 files changed

+25
-8
lines changed

3 files changed

+25
-8
lines changed

RELEASE.rst

+2
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,8 @@ pandas 0.9.0
201201
size cutoff (#1821)
202202
- Handle list keys in addition to tuples in DataFrame.xs when
203203
partial-indexing a hierarchically-indexed DataFrame (#1796)
204+
- Support multiple column selection in DataFrame.__getitem__ with duplicate
205+
columns (#1943)
204206

205207
pandas 0.8.1
206208
============

pandas/core/frame.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -1707,14 +1707,18 @@ def _getitem_array(self, key):
17071707
inds, = key.nonzero()
17081708
return self.take(inds)
17091709
else:
1710-
indexer = self.columns.get_indexer(key)
1711-
mask = indexer == -1
1712-
if mask.any():
1713-
raise KeyError("No column(s) named: %s" % str(key[mask]))
1714-
result = self.reindex(columns=key)
1715-
if result.columns.name is None:
1716-
result.columns.name = self.columns.name
1717-
return result
1710+
if self.columns.is_unique:
1711+
indexer = self.columns.get_indexer(key)
1712+
mask = indexer == -1
1713+
if mask.any():
1714+
raise KeyError("No column(s) named: %s" % str(key[mask]))
1715+
result = self.reindex(columns=key)
1716+
if result.columns.name is None:
1717+
result.columns.name = self.columns.name
1718+
return result
1719+
else:
1720+
mask = self.columns.isin(key)
1721+
return self.take(mask.nonzero()[0], axis=1)
17181722

17191723
def _slice(self, slobj, axis=0):
17201724
if axis == 0:

pandas/tests/test_frame.py

+11
Original file line numberDiff line numberDiff line change
@@ -1076,6 +1076,17 @@ def test_getitem_ix_boolean_duplicates_multiple(self):
10761076
exp = df[df[0] > 0]
10771077
assert_frame_equal(result, exp)
10781078

1079+
def test_getitem_list_duplicates(self):
1080+
# #1943
1081+
df = DataFrame(np.random.randn(4,4), columns=list('AABC'))
1082+
df.columns.name = 'foo'
1083+
1084+
result = df[['B', 'C']]
1085+
self.assert_(result.columns.name == 'foo')
1086+
1087+
expected = df.ix[:, 2:]
1088+
assert_frame_equal(result, expected)
1089+
10791090
def test_get_value(self):
10801091
for idx in self.frame.index:
10811092
for col in self.frame.columns:

0 commit comments

Comments
 (0)