diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 62091d7ff03ff..f0e4f66fa4567 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -855,3 +855,4 @@ Bug Fixes - Bug in ``.to_excel()`` when DataFrame contains a MultiIndex which contains a label with a NaN value (:issue:`13511`) - Bug in ``pd.read_csv`` in Python 2.x with non-UTF8 encoded, multi-character separated data (:issue:`3404`) +- Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 0cba8308c1c53..933ecd1b8de86 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1217,7 +1217,9 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): else: (indexer, missing) = labels.get_indexer_non_unique(objarr) - check = indexer + # 'indexer' has dupes, create 'check' using 'missing' + check = np.zeros_like(objarr) + check[missing] = -1 mask = check == -1 if mask.any(): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 44c7f2277293d..a96e4acfad89b 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -1332,6 +1332,15 @@ def f(): self.assertEqual(result, 3) self.assertRaises(ValueError, lambda: df.at['a', 0]) + # GH 13822, incorrect error string with non-unique columns when missing + # column is accessed + df = DataFrame({'x': [1.], 'y': [2.], 'z': [3.]}) + df.columns = ['x', 'x', 'z'] + + # Check that we get the correct value in the KeyError + self.assertRaisesRegexp(KeyError, "\['y'\] not in index", + lambda: df[['x', 'y', 'z']]) + def test_loc_getitem_label_slice(self): # label slices (with ints)