diff --git a/doc/source/release.rst b/doc/source/release.rst index e164584674ae5..0ed1f39d72cb5 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -411,6 +411,7 @@ Bug Fixes - Fixed an issue related to ticklocs/ticklabels with log scale bar plots across different versions of matplotlib (:issue:`4789`) - Suppressed DeprecationWarning associated with internal calls issued by repr() (:issue:`4391`) + - Fixed an issue with a duplicate index and duplicate selector with ``.loc`` (:issue:`4825`) pandas 0.12.0 ------------- diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 72196fcdad38d..4f5e6623e1512 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -701,6 +701,11 @@ def _reindex(keys, level=None): new_labels[cur_indexer] = cur_labels new_labels[missing_indexer] = missing_labels + # reindex with the specified axis + ndim = self.obj.ndim + if axis+1 > ndim: + raise AssertionError("invalid indexing error with non-unique index") + # a unique indexer if keyarr_is_unique: new_indexer = (Index(cur_indexer) + Index(missing_indexer)).values @@ -708,12 +713,15 @@ def _reindex(keys, level=None): # we have a non_unique selector, need to use the original indexer here else: - new_indexer = indexer - # reindex with the specified axis - ndim = self.obj.ndim - if axis+1 > ndim: - raise AssertionError("invalid indexing error with non-unique index") + # need to retake to have the same size as the indexer + rindexer = indexer.values + rindexer[~check] = 0 + result = self.obj.take(rindexer, axis=axis, convert=False) + + # reset the new indexer to account for the new size + new_indexer = np.arange(len(result)) + new_indexer[~check] = -1 result = result._reindex_with_indexers({ axis : [ new_labels, new_indexer ] }, copy=True, allow_dups=True) diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 4b17dd5ffd9db..0c862576b09a1 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -1436,6 +1436,49 @@ def f(): p.loc[:,:,'C'] = Series([30,32],index=p_orig.items) assert_panel_equal(p,expected) + def test_series_partial_set(self): + # partial set with new index + # Regression from GH4825 + ser = Series([0.1, 0.2], index=[1, 2]) + + # loc + expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3]) + result = ser.loc[[3, 2, 3]] + assert_series_equal(result, expected) + + expected = Series([np.nan, np.nan, np.nan], index=[3, 3, 3]) + result = ser.loc[[3, 3, 3]] + assert_series_equal(result, expected) + + expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3]) + result = ser.loc[[2, 2, 3]] + assert_series_equal(result, expected) + + expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4]) + result = Series([0.1, 0.2, 0.3], index=[1,2,3]).loc[[3,4,4]] + assert_series_equal(result, expected) + + expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3]) + result = Series([0.1, 0.2, 0.3, 0.4], index=[1,2,3,4]).loc[[5,3,3]] + assert_series_equal(result, expected) + + expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4]) + result = Series([0.1, 0.2, 0.3, 0.4], index=[1,2,3,4]).loc[[5,4,4]] + assert_series_equal(result, expected) + + expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2]) + result = Series([0.1, 0.2, 0.3, 0.4], index=[4,5,6,7]).loc[[7,2,2]] + assert_series_equal(result, expected) + + expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5]) + result = Series([0.1, 0.2, 0.3, 0.4], index=[1,2,3,4]).loc[[4,5,5]] + assert_series_equal(result, expected) + + # iloc + expected = Series([0.2,0.2,0.1,0.1], index=[2,2,1,1]) + result = ser.iloc[[1,1,0,0]] + assert_series_equal(result, expected) + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],