From 3c4ea6adeeaf04971725731aae77a7780ed3730a Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 9 Jan 2021 22:10:49 +0100 Subject: [PATCH 1/3] Regression in loc.setitem raising ValueError with unordered MultiIndex columns and scalar indexer --- doc/source/whatsnew/v1.2.1.rst | 1 + pandas/core/indexing.py | 6 +++++- pandas/tests/frame/indexing/test_indexing.py | 9 +++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst index 4b7a4180ee9f9..16444b4103d2f 100644 --- a/doc/source/whatsnew/v1.2.1.rst +++ b/doc/source/whatsnew/v1.2.1.rst @@ -22,6 +22,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.any` and :meth:`DataFrame.all` not returning a result for tz-aware ``datetime64`` columns (:issue:`38723`) - Fixed regression in :meth:`DataFrame.__setitem__` raising ``ValueError`` when expanding :class:`DataFrame` and new column is from type ``"0 - name"`` (:issue:`39010`) - Fixed regression in :meth:`.GroupBy.sem` where the presence of non-numeric columns would cause an error instead of being dropped (:issue:`38774`) +- Fixed regression in :meth:`DataFrame.loc.__setitem__` raising ``ValueError`` when :class:`DataFrame` has unsorted :class:`MultiIndex` columns and indexer is a scalar (:issue:`38601`) - Fixed regression in :func:`read_excel` with non-rawbyte file handles (:issue:`38788`) - Bug in :meth:`read_csv` with ``float_precision="high"`` caused segfault or wrong parsing of long exponent strings. This resulted in a regression in some cases as the default for ``float_precision`` was changed in pandas 1.2.0 (:issue:`38753`) - Fixed regression in :meth:`Rolling.skew` and :meth:`Rolling.kurt` modifying the object inplace (:issue:`38908`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 12694c19b2173..2160ad50219fa 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -15,6 +15,7 @@ from pandas.core.dtypes.common import ( is_array_like, + is_bool_dtype, is_hashable, is_integer, is_iterator, @@ -1933,12 +1934,15 @@ def _ensure_iterable_column_indexer(self, column_indexer): """ Ensure that our column indexer is something that can be iterated over. """ - # Ensure we have something we can iterate over if is_integer(column_indexer): ilocs = [column_indexer] elif isinstance(column_indexer, slice): ri = Index(range(len(self.obj.columns))) ilocs = ri[column_indexer] + elif isinstance(column_indexer, np.ndarray) and is_bool_dtype( + column_indexer.dtype + ): + ilocs = np.arange(len(column_indexer))[column_indexer] else: ilocs = column_indexer return ilocs diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 4cbdf61ff8dae..f450ed93f08ce 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1685,6 +1685,15 @@ def test_getitem_interval_index_partial_indexing(self): res = df.loc[:, 0.5] tm.assert_series_equal(res, expected) + @pytest.mark.parametrize("indexer", ["A", ["A"]]) + def test_setitem_unsorted_multiindex_columns(self, indexer): + # GH#38601 + mi = MultiIndex.from_tuples([("A", 4), ("B", "3"), ("A", "2")]) + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=mi) + df.loc[:, indexer] = np.zeros((2, 2), dtype=int) + expected = DataFrame([[0, 2, 0], [0, 5, 0]], columns=mi) + tm.assert_frame_equal(df, expected) + class TestDataFrameIndexingUInt64: def test_setitem(self, uint64_frame): From d1fef9184ef7465f3f552b0ec8edb182d4822a4e Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 10 Jan 2021 00:26:44 +0100 Subject: [PATCH 2/3] Add tests --- pandas/tests/frame/indexing/test_indexing.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index f450ed93f08ce..6808ffe65e561 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1685,13 +1685,19 @@ def test_getitem_interval_index_partial_indexing(self): res = df.loc[:, 0.5] tm.assert_series_equal(res, expected) - @pytest.mark.parametrize("indexer", ["A", ["A"]]) + @pytest.mark.parametrize("indexer", ["A", ["A"], ("A", slice(None))]) def test_setitem_unsorted_multiindex_columns(self, indexer): # GH#38601 mi = MultiIndex.from_tuples([("A", 4), ("B", "3"), ("A", "2")]) df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=mi) - df.loc[:, indexer] = np.zeros((2, 2), dtype=int) + obj = df.copy() + obj.loc[:, indexer] = np.zeros((2, 2), dtype=int) expected = DataFrame([[0, 2, 0], [0, 5, 0]], columns=mi) + tm.assert_frame_equal(obj, expected) + + df = df.sort_index(1) + df.loc[:, indexer] = np.zeros((2, 2), dtype=int) + expected = expected.sort_index(1) tm.assert_frame_equal(df, expected) From 9cde8f46c7f0d0ac4c897bf9fe18c97bee5c5e47 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 10 Jan 2021 00:51:01 +0100 Subject: [PATCH 3/3] Improve performance --- pandas/core/indexing.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 2160ad50219fa..a1b06f3c9d6a1 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1937,8 +1937,7 @@ def _ensure_iterable_column_indexer(self, column_indexer): if is_integer(column_indexer): ilocs = [column_indexer] elif isinstance(column_indexer, slice): - ri = Index(range(len(self.obj.columns))) - ilocs = ri[column_indexer] + ilocs = np.arange(len(self.obj.columns))[column_indexer] elif isinstance(column_indexer, np.ndarray) and is_bool_dtype( column_indexer.dtype ):