Skip to content

Commit 4406d37

Browse files
committed
BUG: fix duplicate index indexing problems close #1201
1 parent 2c44f57 commit 4406d37

File tree

4 files changed

+30
-5
lines changed

4 files changed

+30
-5
lines changed

RELEASE.rst

+1
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ pandas 0.8.1
7676
- Fix unhandled IndexError when indexing very large time series (#1562)
7777
- Fix DatetimeIndex intersection logic error with irregular indexes (#1551)
7878
- Fix unit test errors on Python 3 (#1550)
79+
- Fix .ix indexing bugs in duplicate DataFrame index (#1201)
7980

8081
pandas 0.8.0
8182
============

pandas/core/indexing.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,8 @@ def _reindex(keys, level=None):
275275

276276
if com._is_bool_indexer(key):
277277
key = _check_bool_indexer(labels, key)
278-
return _reindex(labels[np.asarray(key)])
278+
inds, = np.asarray(key, dtype=bool).nonzero()
279+
return self.obj.take(inds, axis=axis)
279280
else:
280281
if isinstance(key, Index):
281282
# want Index objects to pass through untouched
@@ -294,7 +295,11 @@ def _reindex(keys, level=None):
294295
else:
295296
level = None
296297

297-
return _reindex(keyarr, level=level)
298+
if labels.is_unique:
299+
return _reindex(keyarr, level=level)
300+
else:
301+
mask = labels.isin(keyarr)
302+
return self.obj.take(mask.nonzero()[0], axis=axis)
298303

299304
def _convert_to_indexer(self, obj, axis=0):
300305
"""

pandas/io/tests/test_yahoo.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,15 @@
77
import unittest
88
import pandas.io.data as pd
99
import nose
10+
from pandas.util.testing import network
1011

1112
class TestYahoo(unittest.TestCase):
1213

14+
@network
1315
def test_yahoo(self):
14-
"""asserts that yahoo is minimally working and that it throws
15-
an excecption when DataReader can't get a 200 response from
16-
yahoo """
16+
# asserts that yahoo is minimally working and that it throws
17+
# an excecption when DataReader can't get a 200 response from
18+
# yahoo
1719
start = datetime(2010,1,1)
1820
end = datetime(2012,1,24)
1921
self.assertEquals(

pandas/tests/test_frame.py

+17
Original file line numberDiff line numberDiff line change
@@ -969,6 +969,23 @@ def test_getitem_setitem_ix_duplicates(self):
969969
expected = df.ix[3]
970970
assert_series_equal(result, expected)
971971

972+
def test_getitem_ix_boolean_duplicates_multiple(self):
973+
# #1201
974+
df = DataFrame(np.random.randn(5, 3),
975+
index=['foo', 'foo', 'bar', 'baz', 'bar'])
976+
977+
result = df.ix[['bar']]
978+
exp = df.ix[[2, 4]]
979+
assert_frame_equal(result, exp)
980+
981+
result = df.ix[df[1] > 0]
982+
exp = df[df[1] > 0]
983+
assert_frame_equal(result, exp)
984+
985+
result = df.ix[df[0] > 0]
986+
exp = df[df[0] > 0]
987+
assert_frame_equal(result, exp)
988+
972989
def test_get_value(self):
973990
for idx in self.frame.index:
974991
for col in self.frame.columns:

0 commit comments

Comments
 (0)