From bef2819eb74ddca1f74206fb09d549d6c79b8814 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 29 Nov 2020 15:20:39 +0100 Subject: [PATCH 01/10] Fix dropping of levels in multiindex --- pandas/core/indexing.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 6aa031af64833..b5591af9c643e 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -838,8 +838,9 @@ def _getitem_nested_tuple(self, tup: Tuple): if self.name != "loc": # This should never be reached, but lets be explicit about it raise ValueError("Too many indices") - with suppress(IndexingError): - return self._handle_lowerdim_multi_index_axis0(tup) + if len(self.obj) > 1 or not any(isinstance(x, slice) for x in tup): + with suppress(IndexingError): + return self._handle_lowerdim_multi_index_axis0(tup) # this is a series with a multi-index specified a tuple of # selectors From 74084640b205296b3eb5bdea729a2db8f31fa7bc Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 29 Nov 2020 16:50:18 +0100 Subject: [PATCH 02/10] Add test and whatsnew --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/tests/indexing/multiindex/test_loc.py | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 5d36c52da9f0d..a2b37b8ad32fd 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -634,6 +634,7 @@ Indexing - Bug in :meth:`DataFrame.loc` returning and assigning elements in wrong order when indexer is differently ordered than the :class:`MultiIndex` to filter (:issue:`31330`, :issue:`34603`) - Bug in :meth:`DataFrame.loc` and :meth:`DataFrame.__getitem__` raising ``KeyError`` when columns were :class:`MultiIndex` with only one level (:issue:`29749`) - Bug in :meth:`Series.__getitem__` and :meth:`DataFrame.__getitem__` raising blank ``KeyError`` without missing keys for :class:`IntervalIndex` (:issue:`27365`) +- Bug in :meth:`DataFrame.loc` dropping levels of :class:`MultiIndex` when :class:`DataFrame` used as input has only one row (:issue:`10521`) Missing ^^^^^^^ diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 42525fc575397..489cc9ef54973 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -695,3 +695,13 @@ def test_loc_getitem_index_differently_ordered_slice_none(): columns=["a", "b"], ) tm.assert_frame_equal(result, expected) + + +def test_loc_getitem_drops_levels_for_one_row_dataframe(): + # GH#10521 + df = DataFrame({"a": ["a"], "b": ["b"], "c": ["a"], "d": 0}).set_index( + ["a", "b", "c"] + ) + expected = df.copy() + result = df.loc["a", :, "a"] + tm.assert_frame_equal(result, expected) From da86c93215f069e75193249c021045a2fe7b8a33 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 29 Nov 2020 17:00:51 +0100 Subject: [PATCH 03/10] Add note --- pandas/core/indexing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b5591af9c643e..cb4198e568331 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -839,6 +839,7 @@ def _getitem_nested_tuple(self, tup: Tuple): # This should never be reached, but lets be explicit about it raise ValueError("Too many indices") if len(self.obj) > 1 or not any(isinstance(x, slice) for x in tup): + # GH#10521 IndexingError is not raised for slices for objs with one row with suppress(IndexingError): return self._handle_lowerdim_multi_index_axis0(tup) From 3f54db9b753dde9341763b49ca439d31abdeaff9 Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 30 Nov 2020 23:10:12 +0100 Subject: [PATCH 04/10] Simplify test --- pandas/tests/indexing/multiindex/test_loc.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 489cc9ef54973..732799fe3e425 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -699,9 +699,8 @@ def test_loc_getitem_index_differently_ordered_slice_none(): def test_loc_getitem_drops_levels_for_one_row_dataframe(): # GH#10521 - df = DataFrame({"a": ["a"], "b": ["b"], "c": ["a"], "d": 0}).set_index( - ["a", "b", "c"] - ) + mi = MultiIndex.from_arrays([["x"], ["y"], ["z"]], names=["a", "b", "c"]) + df = DataFrame({"d": [0]}, index=mi) expected = df.copy() - result = df.loc["a", :, "a"] + result = df.loc["x", :, "z"] tm.assert_frame_equal(result, expected) From 7a1c22ad2d5e2865f2f2ee17fc4999a9ec5840d8 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 22 Dec 2020 23:00:07 +0100 Subject: [PATCH 05/10] Move whatsnew --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index fbd2c2b5345fc..af19cfeede210 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -219,7 +219,7 @@ Indexing - Bug in :meth:`CategoricalIndex.get_indexer` failing to raise ``InvalidIndexError`` when non-unique (:issue:`38372`) - Bug in inserting many new columns into a :class:`DataFrame` causing incorrect subsequent indexing behavior (:issue:`38380`) - Bug in :meth:`DataFrame.iloc.__setitem__` and :meth:`DataFrame.loc.__setitem__` with mixed dtypes when setting with a dictionary value (:issue:`38335`) -- +- Bug in :meth:`DataFrame.loc` dropping levels of :class:`MultiIndex` when :class:`DataFrame` used as input has only one row (:issue:`10521`) - Missing From 8517274dbb3c0c193d3322250f80e09fdc16797c Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 24 Dec 2020 00:45:15 +0100 Subject: [PATCH 06/10] Remove len --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c48928d4635e6..f97de1e021838 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -842,7 +842,7 @@ def _getitem_nested_tuple(self, tup: Tuple): if self.name != "loc": # This should never be reached, but lets be explicit about it raise ValueError("Too many indices") - if len(self.obj) > 1 or not any(isinstance(x, slice) for x in tup): + if not any(isinstance(x, slice) for x in tup): # GH#10521 IndexingError is not raised for slices for objs with one row with suppress(IndexingError): return self._handle_lowerdim_multi_index_axis0(tup) From ee106a9c1c2e4bd60a60c7bc715fc3bdf7b1135f Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 24 Dec 2020 11:35:25 +0100 Subject: [PATCH 07/10] Revert --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index f97de1e021838..c48928d4635e6 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -842,7 +842,7 @@ def _getitem_nested_tuple(self, tup: Tuple): if self.name != "loc": # This should never be reached, but lets be explicit about it raise ValueError("Too many indices") - if not any(isinstance(x, slice) for x in tup): + if len(self.obj) > 1 or not any(isinstance(x, slice) for x in tup): # GH#10521 IndexingError is not raised for slices for objs with one row with suppress(IndexingError): return self._handle_lowerdim_multi_index_axis0(tup) From 693038f8a45d3b339959553f6d93fbfd5e4be167 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 29 Dec 2020 21:33:59 +0100 Subject: [PATCH 08/10] Add test and fix bug --- pandas/core/indexing.py | 3 ++- pandas/tests/indexing/multiindex/test_loc.py | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c48928d4635e6..ba3dbaceb8d3a 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -842,8 +842,9 @@ def _getitem_nested_tuple(self, tup: Tuple): if self.name != "loc": # This should never be reached, but lets be explicit about it raise ValueError("Too many indices") - if len(self.obj) > 1 or not any(isinstance(x, slice) for x in tup): + if isinstance(self.obj, ABCSeries) or not any(isinstance(x, slice) for x in tup): # GH#10521 IndexingError is not raised for slices for objs with one row + # so we have to catch this case here with suppress(IndexingError): return self._handle_lowerdim_multi_index_axis0(tup) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 732799fe3e425..37153bef8d77b 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -704,3 +704,8 @@ def test_loc_getitem_drops_levels_for_one_row_dataframe(): expected = df.copy() result = df.loc["x", :, "z"] tm.assert_frame_equal(result, expected) + + ser = Series([0], index=mi) + result = ser.loc["x", :, "z"] + expected = Series([0], index=Index(["y"], name="b")) + tm.assert_series_equal(result, expected) From 68987cd4458ed903e6a72ba58f64dfe953cde2e9 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 29 Dec 2020 21:35:51 +0100 Subject: [PATCH 09/10] Add comment --- pandas/core/indexing.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ba3dbaceb8d3a..a27eec4cdbe1c 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -842,9 +842,12 @@ def _getitem_nested_tuple(self, tup: Tuple): if self.name != "loc": # This should never be reached, but lets be explicit about it raise ValueError("Too many indices") - if isinstance(self.obj, ABCSeries) or not any(isinstance(x, slice) for x in tup): - # GH#10521 IndexingError is not raised for slices for objs with one row - # so we have to catch this case here + if isinstance(self.obj, ABCSeries) or not any( + isinstance(x, slice) for x in tup + ): + # GH#10521 Series should reduce MultiIndex dimensions instead of + # DataFrame, IndexingError is not raised when slice(None,None,None) + # with one row. with suppress(IndexingError): return self._handle_lowerdim_multi_index_axis0(tup) From 04bfc9b69492f507ea7e7d75199e9898b99f3bf1 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 30 Dec 2020 10:11:12 +0100 Subject: [PATCH 10/10] Change validation --- pandas/core/indexing.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a27eec4cdbe1c..60b526426d413 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -842,9 +842,7 @@ def _getitem_nested_tuple(self, tup: Tuple): if self.name != "loc": # This should never be reached, but lets be explicit about it raise ValueError("Too many indices") - if isinstance(self.obj, ABCSeries) or not any( - isinstance(x, slice) for x in tup - ): + if self.ndim == 1 or not any(isinstance(x, slice) for x in tup): # GH#10521 Series should reduce MultiIndex dimensions instead of # DataFrame, IndexingError is not raised when slice(None,None,None) # with one row.