Skip to content

Commit 9ea0d44

Browse files
committed
Merge pull request #4622 from jreback/ix_issue
BUG: Fix selection with ``ix/loc`` and non_unique selectors (GH4619)
2 parents da85a3c + d9db022 commit 9ea0d44

File tree

3 files changed

+42
-12
lines changed

3 files changed

+42
-12
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
264264
to a possible lazay frequency inference issue (:issue:`3317`)
265265
- Fixed issue where ``DataFrame.apply`` was reraising exceptions incorrectly
266266
(causing the original stack trace to be truncated).
267+
- Fix selection with ``ix/loc`` and non_unique selectors (:issue:`4619`)
267268

268269
pandas 0.12
269270
===========

pandas/core/indexing.py

+14-3
Original file line numberDiff line numberDiff line change
@@ -476,8 +476,12 @@ def _reindex(keys, level=None):
476476
else:
477477
level = None
478478

479-
if labels.is_unique and Index(keyarr).is_unique:
479+
keyarr_is_unique = Index(keyarr).is_unique
480+
481+
# existing labels are unique and indexer is unique
482+
if labels.is_unique and keyarr_is_unique:
480483
return _reindex(keyarr, level=level)
484+
481485
else:
482486
indexer, missing = labels.get_indexer_non_unique(keyarr)
483487
check = indexer != -1
@@ -496,8 +500,15 @@ def _reindex(keys, level=None):
496500
new_labels = np.empty(tuple([len(indexer)]),dtype=object)
497501
new_labels[cur_indexer] = cur_labels
498502
new_labels[missing_indexer] = missing_labels
499-
new_indexer = (Index(cur_indexer) + Index(missing_indexer)).values
500-
new_indexer[missing_indexer] = -1
503+
504+
# a unique indexer
505+
if keyarr_is_unique:
506+
new_indexer = (Index(cur_indexer) + Index(missing_indexer)).values
507+
new_indexer[missing_indexer] = -1
508+
509+
# we have a non_unique selector, need to use the original indexer here
510+
else:
511+
new_indexer = indexer
501512

502513
# reindex with the specified axis
503514
ndim = self.obj.ndim

pandas/tests/test_indexing.py

+27-9
Original file line numberDiff line numberDiff line change
@@ -796,27 +796,45 @@ def test_dups_fancy_indexing(self):
796796
assert_frame_equal(df,result)
797797

798798
# GH 3561, dups not in selected order
799-
ind = ['A', 'A', 'B', 'C']
800-
df = DataFrame({'test':lrange(len(ind))}, index=ind)
799+
df = DataFrame({'test': [5,7,9,11]}, index=['A', 'A', 'B', 'C'])
801800
rows = ['C', 'B']
802-
res = df.ix[rows]
803-
self.assert_(rows == list(res.index))
801+
expected = DataFrame({'test' : [11,9]},index=rows)
802+
result = df.ix[rows]
803+
assert_frame_equal(result, expected)
804804

805-
res = df.ix[Index(rows)]
806-
self.assert_(Index(rows).equals(res.index))
805+
result = df.ix[Index(rows)]
806+
assert_frame_equal(result, expected)
807807

808808
rows = ['C','B','E']
809-
res = df.ix[rows]
810-
self.assert_(rows == list(res.index))
809+
expected = DataFrame({'test' : [11,9,np.nan]},index=rows)
810+
result = df.ix[rows]
811+
assert_frame_equal(result, expected)
811812

812-
# inconcistent returns for unique/duplicate indices when values are missing
813+
# inconsistent returns for unique/duplicate indices when values are missing
813814
df = DataFrame(randn(4,3),index=list('ABCD'))
814815
expected = df.ix[['E']]
815816

816817
dfnu = DataFrame(randn(5,3),index=list('AABCD'))
817818
result = dfnu.ix[['E']]
818819
assert_frame_equal(result, expected)
819820

821+
# GH 4619; duplicate indexer with missing label
822+
df = DataFrame({"A": [0, 1, 2]})
823+
result = df.ix[[0,8,0]]
824+
expected = DataFrame({"A": [0, np.nan, 0]},index=[0,8,0])
825+
assert_frame_equal(result,expected)
826+
827+
df = DataFrame({"A": list('abc')})
828+
result = df.ix[[0,8,0]]
829+
expected = DataFrame({"A": ['a', np.nan, 'a']},index=[0,8,0])
830+
assert_frame_equal(result,expected)
831+
832+
# non unique with non unique selector
833+
df = DataFrame({'test': [5,7,9,11]}, index=['A','A','B','C'])
834+
expected = DataFrame({'test' : [5,7,5,7,np.nan]},index=['A','A','A','A','E'])
835+
result = df.ix[['A','A','E']]
836+
assert_frame_equal(result, expected)
837+
820838
def test_indexing_mixed_frame_bug(self):
821839

822840
# GH3492

0 commit comments

Comments
 (0)