From cc18b405eefa0b6a8eee45c537ea37a7fc30d69d Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 14 Jan 2021 01:01:48 +0100 Subject: [PATCH 1/3] BUG: Fix regression in loc setitem raising KeyError when enlarging df mit multiindex --- doc/source/whatsnew/v1.2.1.rst | 2 +- pandas/core/indexing.py | 4 +++- pandas/tests/indexing/multiindex/test_loc.py | 12 ++++++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst index 1c8db4dd32393..55fddb8b732e2 100644 --- a/doc/source/whatsnew/v1.2.1.rst +++ b/doc/source/whatsnew/v1.2.1.rst @@ -29,7 +29,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrameGroupBy.diff` raising for ``int8`` and ``int16`` columns (:issue:`39050`) - Fixed regression that raised ``AttributeError`` with PyArrow versions [0.16.0, 1.0.0) (:issue:`38801`) - Fixed regression in :meth:`DataFrame.groupby` when aggregating an :class:`ExtensionDType` that could fail for non-numeric values (:issue:`38980`) -- +- Fixed regression in :meth:`DataFrame.loc.__setitem__` raising ``KeyError`` with :class:`MultiIndex` and list-like columns indexer enlarging :class:`DataFrame` (:issue:`39147`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ac0955c6c3e4c..d4acca4192435 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -662,7 +662,9 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None): if self.ndim != 2: return - if isinstance(key, tuple) and not isinstance(self.obj.index, ABCMultiIndex): + if isinstance(key, tuple) and ( + isinstance(key[0], slice) or not isinstance(self.obj.index, ABCMultiIndex) + ): # key may be a tuple if we are .loc # if index is not a MultiIndex, set key to column part key = key[column_axis] diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index ba01caba544f4..15f682b3b98ac 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -308,6 +308,18 @@ def test_multiindex_one_dimensional_tuple_columns(self, indexer): expected = DataFrame([0, 2], index=mi) tm.assert_frame_equal(obj, expected) + def test_multiindex_setitem_columns_enlarging(self): + # GH#39147 + mi = MultiIndex.from_tuples([(1, 2), (3, 4)]) + df = DataFrame([[1, 2], [3, 4]], index=mi, columns=["a", "b"]) + df.loc[:, ["c", "d"]] = None + expected = DataFrame( + [[1, 2, None, None], [3, 4, None, None]], + index=mi, + columns=["a", "b", "c", "d"], + ) + tm.assert_frame_equal(df, expected) + @pytest.mark.parametrize( "indexer, pos", From 6080faad3a840cef0a4991818282e4de180d3d18 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 14 Jan 2021 21:47:17 +0100 Subject: [PATCH 2/3] Change bugfix and add test --- pandas/core/indexing.py | 7 +++++-- pandas/tests/indexing/multiindex/test_loc.py | 9 ++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index d4acca4192435..4ac987bb75728 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -663,10 +663,13 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None): return if isinstance(key, tuple) and ( - isinstance(key[0], slice) or not isinstance(self.obj.index, ABCMultiIndex) + not isinstance(self.obj.index, ABCMultiIndex) + or len(key) == 2 + and not isinstance(key[1], slice) ): # key may be a tuple if we are .loc - # if index is not a MultiIndex, set key to column part + # if index is not a MultiIndex or second element is not a slice, + # set key to column part key = key[column_axis] axis = column_axis diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 15f682b3b98ac..402668321600b 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -308,13 +308,16 @@ def test_multiindex_one_dimensional_tuple_columns(self, indexer): expected = DataFrame([0, 2], index=mi) tm.assert_frame_equal(obj, expected) - def test_multiindex_setitem_columns_enlarging(self): + @pytest.mark.parametrize( + "indexer, exp_value", [(slice(None), 1.0), ((1, 2), np.nan)] + ) + def test_multiindex_setitem_columns_enlarging(self, indexer, exp_value): # GH#39147 mi = MultiIndex.from_tuples([(1, 2), (3, 4)]) df = DataFrame([[1, 2], [3, 4]], index=mi, columns=["a", "b"]) - df.loc[:, ["c", "d"]] = None + df.loc[indexer, ["c", "d"]] = 1.0 expected = DataFrame( - [[1, 2, None, None], [3, 4, None, None]], + [[1, 2, 1.0, 1.0], [3, 4, exp_value, exp_value]], index=mi, columns=["a", "b", "c", "d"], ) From a4013b96ee5caaf2e7d2ef30b044ae458dddd3df Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 14 Jan 2021 22:07:31 +0100 Subject: [PATCH 3/3] Simplify condition --- pandas/core/indexing.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 4ac987bb75728..8cd1b23b75edd 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -662,14 +662,9 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None): if self.ndim != 2: return - if isinstance(key, tuple) and ( - not isinstance(self.obj.index, ABCMultiIndex) - or len(key) == 2 - and not isinstance(key[1], slice) - ): + if isinstance(key, tuple) and len(key) > 1: # key may be a tuple if we are .loc - # if index is not a MultiIndex or second element is not a slice, - # set key to column part + # if length of key is > 1 set key to column part key = key[column_axis] axis = column_axis