From 1b7dc81235db94bbedab276296731954e1a742a2 Mon Sep 17 00:00:00 2001 From: anzor Date: Tue, 15 Oct 2024 22:59:39 +0300 Subject: [PATCH 01/11] Fix issue #59333 --- pandas/core/indexing.py | 6 +++--- pandas/tests/indexing/test_loc.py | 33 +++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 08bd3cde60806..675e5145c5856 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2353,7 +2353,6 @@ def ravel(i): return i.ravel() if isinstance(i, np.ndarray) else i indexer = tuple(map(ravel, indexer)) - aligners = [not com.is_null_slice(idx) for idx in indexer] sum_aligners = sum(aligners) single_aligner = sum_aligners == 1 @@ -2371,7 +2370,9 @@ def ravel(i): # we have a frame, with multiple indexers on both axes; and a # series, so need to broadcast (see GH5206) - if sum_aligners == self.ndim and all(is_sequence(_) for _ in indexer): + if len(indexer) == self.ndim and all( + is_sequence(_) or isinstance(_, slice) for _ in indexer + ): ser_values = ser.reindex(obj.axes[0][indexer[0]])._values # single indexer @@ -2399,7 +2400,6 @@ def ravel(i): if using_cow: return ser return ser._values.copy() - return ser.reindex(new_ix)._values # 2 dims diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 36b08ee1df790..643a5e126bb34 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -3297,3 +3297,36 @@ def test_loc_reindexing_of_empty_index(self): df.loc[Series([False] * 4, index=df.index, name=0), 0] = df[0] expected = DataFrame(index=[1, 1, 2, 2], data=["1", "1", "2", "2"]) tm.assert_frame_equal(df, expected) + + def test_loc_set_series_to_multiple_columns(self): + # GH 59933 + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=list("ABC")) + df.loc[0:3, ["A", "B", "C"]] = Series([10, 20, 30]) + expected = DataFrame([[10, 10, 10], [20, 20, 20]], columns=list("ABC")) + tm.assert_frame_equal(df, expected) + + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=list("ABC")) + df.loc[:, ["A", "B", "C"]] = Series([10, 20, 30]) + expected = DataFrame([[10, 10, 10], [20, 20, 20]], columns=list("ABC")) + tm.assert_frame_equal(df, expected) + + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")) + df.loc[df["A"] > 0, ["A", "B", "C"]] = Series([10, 20, 30]) + expected = DataFrame( + [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") + ) + tm.assert_frame_equal(df, expected) + + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")) + df.loc[:, ["A", "B", "C"]] = Series([10, 20, 30]) + expected = DataFrame( + [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") + ) + tm.assert_frame_equal(df, expected) + + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")) + df.loc[0:4, ["A", "B", "C"]] = Series([10, 20, 30]) + expected = DataFrame( + [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") + ) + tm.assert_frame_equal(df, expected) From bab14ffb49cbac89248ff077321a6a22b36d4bae Mon Sep 17 00:00:00 2001 From: anzor Date: Wed, 16 Oct 2024 10:00:10 +0300 Subject: [PATCH 02/11] remove condition --- pandas/core/indexing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 675e5145c5856..a7323ceb13da6 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2353,6 +2353,7 @@ def ravel(i): return i.ravel() if isinstance(i, np.ndarray) else i indexer = tuple(map(ravel, indexer)) + aligners = [not com.is_null_slice(idx) for idx in indexer] sum_aligners = sum(aligners) single_aligner = sum_aligners == 1 @@ -2370,9 +2371,7 @@ def ravel(i): # we have a frame, with multiple indexers on both axes; and a # series, so need to broadcast (see GH5206) - if len(indexer) == self.ndim and all( - is_sequence(_) or isinstance(_, slice) for _ in indexer - ): + if all(is_sequence(_) or isinstance(_, slice) for _ in indexer): ser_values = ser.reindex(obj.axes[0][indexer[0]])._values # single indexer @@ -2400,6 +2399,7 @@ def ravel(i): if using_cow: return ser return ser._values.copy() + return ser.reindex(new_ix)._values # 2 dims From 19a149c3d5847b4222ea470ff03e142eebf0446d Mon Sep 17 00:00:00 2001 From: anzor Date: Wed, 23 Oct 2024 16:59:24 +0300 Subject: [PATCH 03/11] fix case with two slices, test added --- pandas/core/indexing.py | 5 ++++- pandas/tests/indexing/test_loc.py | 7 +++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a7323ceb13da6..4efcb1ecc03de 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2376,7 +2376,10 @@ def ravel(i): # single indexer if len(indexer) > 1 and not multiindex_indexer: - len_indexer = len(indexer[1]) + if isinstance(indexer[1], slice): + len_indexer = len(obj.loc[indexer[1]].axes[1]) + else: + len_indexer = len(indexer[1]) ser_values = ( np.tile(ser_values, len_indexer).reshape(len_indexer, -1).T ) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 643a5e126bb34..b50f5c96b544d 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -3330,3 +3330,10 @@ def test_loc_set_series_to_multiple_columns(self): [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") ) tm.assert_frame_equal(df, expected) + + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")) + df.loc[:, "A":"C"] = Series([10, 20, 30]) + expected = DataFrame( + [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") + ) + tm.assert_frame_equal(df, expected) From 27f1122bc0a0e42ba0279b02bc24707ffad99245 Mon Sep 17 00:00:00 2001 From: anzor Date: Thu, 31 Oct 2024 00:11:00 +0300 Subject: [PATCH 04/11] simpler getting len_indexer (without .loc) --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 4efcb1ecc03de..44b3e623e398e 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2377,7 +2377,7 @@ def ravel(i): # single indexer if len(indexer) > 1 and not multiindex_indexer: if isinstance(indexer[1], slice): - len_indexer = len(obj.loc[indexer[1]].axes[1]) + len_indexer = len(obj.axes[1][indexer[1]]) else: len_indexer = len(indexer[1]) ser_values = ( From c7a149c4437b952fb3b07d6ce79789eac21467dc Mon Sep 17 00:00:00 2001 From: anzor Date: Sun, 17 Nov 2024 21:07:19 +0300 Subject: [PATCH 05/11] Used @pytest.mark.parametrize in tests, added whatsnew --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/tests/indexing/test_loc.py | 89 ++++++++++++++++++------------- 2 files changed, 52 insertions(+), 39 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 7da2f968b900b..93de6807afada 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -665,7 +665,7 @@ Indexing ^^^^^^^^ - Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`) - Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`) -- +- Fixed bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`599333`) Missing ^^^^^^^ diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 032cd6c01e441..f11f79960dcb1 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -3298,42 +3298,55 @@ def test_loc_reindexing_of_empty_index(self): expected = DataFrame(index=[1, 1, 2, 2], data=["1", "1", "2", "2"]) tm.assert_frame_equal(df, expected) - def test_loc_set_series_to_multiple_columns(self): + @pytest.mark.parametrize( + "df, row_index, col_index, setting_series, expected_df", + [ + [ + DataFrame([[1, 2, 3], [4, 5, 6]], columns=list("ABC")), + slice(0, 3), + ["A", "B", "C"], + Series([10, 20, 30]), + DataFrame([[10, 10, 10], [20, 20, 20]], columns=list("ABC")), + ], + [ + DataFrame([[1, 2, 3], [4, 5, 6]], columns=list("ABC")), + slice(None), + ["A", "B", "C"], + Series([10, 20, 30]), + DataFrame([[10, 10, 10], [20, 20, 20]], columns=list("ABC")), + ], + [ + DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")), + [True, True, True], + ["A", "B", "C"], + Series([10, 20, 30]), + DataFrame( + [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") + ), + ], + [ + DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")), + slice(0, 4), + ["A", "B", "C"], + Series([10, 20, 30]), + DataFrame( + [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") + ), + ], + [ + DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")), + slice(None), + slice("A", "C"), + Series([10, 20, 30]), + DataFrame( + [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") + ), + ], + ], + ) + def test_loc_set_series_to_multiple_columns( + self, df, row_index, col_index, setting_series, expected_df + ): # GH 59933 - df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=list("ABC")) - df.loc[0:3, ["A", "B", "C"]] = Series([10, 20, 30]) - expected = DataFrame([[10, 10, 10], [20, 20, 20]], columns=list("ABC")) - tm.assert_frame_equal(df, expected) - - df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=list("ABC")) - df.loc[:, ["A", "B", "C"]] = Series([10, 20, 30]) - expected = DataFrame([[10, 10, 10], [20, 20, 20]], columns=list("ABC")) - tm.assert_frame_equal(df, expected) - - df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")) - df.loc[df["A"] > 0, ["A", "B", "C"]] = Series([10, 20, 30]) - expected = DataFrame( - [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") - ) - tm.assert_frame_equal(df, expected) - - df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")) - df.loc[:, ["A", "B", "C"]] = Series([10, 20, 30]) - expected = DataFrame( - [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") - ) - tm.assert_frame_equal(df, expected) - - df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")) - df.loc[0:4, ["A", "B", "C"]] = Series([10, 20, 30]) - expected = DataFrame( - [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") - ) - tm.assert_frame_equal(df, expected) - - df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")) - df.loc[:, "A":"C"] = Series([10, 20, 30]) - expected = DataFrame( - [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") - ) - tm.assert_frame_equal(df, expected) + df.loc[row_index, col_index] = setting_series + tm.assert_frame_equal(df, expected_df) From fdc09049a07e30658a27ace837c85a552e9f7cb2 Mon Sep 17 00:00:00 2001 From: anzor Date: Sun, 17 Nov 2024 21:19:44 +0300 Subject: [PATCH 06/11] Remove unnecessary parameter --- pandas/tests/indexing/test_loc.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index f11f79960dcb1..f9a67ae3b0d60 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -3299,27 +3299,24 @@ def test_loc_reindexing_of_empty_index(self): tm.assert_frame_equal(df, expected) @pytest.mark.parametrize( - "df, row_index, col_index, setting_series, expected_df", + "df, row_index, col_index, expected_df", [ [ DataFrame([[1, 2, 3], [4, 5, 6]], columns=list("ABC")), slice(0, 3), ["A", "B", "C"], - Series([10, 20, 30]), DataFrame([[10, 10, 10], [20, 20, 20]], columns=list("ABC")), ], [ DataFrame([[1, 2, 3], [4, 5, 6]], columns=list("ABC")), slice(None), ["A", "B", "C"], - Series([10, 20, 30]), DataFrame([[10, 10, 10], [20, 20, 20]], columns=list("ABC")), ], [ DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")), [True, True, True], ["A", "B", "C"], - Series([10, 20, 30]), DataFrame( [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") ), @@ -3328,7 +3325,6 @@ def test_loc_reindexing_of_empty_index(self): DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")), slice(0, 4), ["A", "B", "C"], - Series([10, 20, 30]), DataFrame( [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") ), @@ -3337,7 +3333,6 @@ def test_loc_reindexing_of_empty_index(self): DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")), slice(None), slice("A", "C"), - Series([10, 20, 30]), DataFrame( [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") ), @@ -3345,8 +3340,8 @@ def test_loc_reindexing_of_empty_index(self): ], ) def test_loc_set_series_to_multiple_columns( - self, df, row_index, col_index, setting_series, expected_df + self, df, row_index, col_index, expected_df ): # GH 59933 - df.loc[row_index, col_index] = setting_series + df.loc[row_index, col_index] = Series([10, 20, 30]) tm.assert_frame_equal(df, expected_df) From 12d38eb2eff5560e9af236695e388ceb43e907f0 Mon Sep 17 00:00:00 2001 From: anzor Date: Mon, 30 Dec 2024 00:18:00 +0300 Subject: [PATCH 07/11] fix issue with indexing by boolean series --- pandas/core/indexing.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 0cec8e63fdbb0..9d1cf69722600 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2353,11 +2353,17 @@ def _align_series( if isinstance(indexer, tuple): # flatten np.ndarray indexers + if ( + len(indexer) == 2 + and isinstance(indexer[1], np.ndarray) + and indexer[1].dtype == np.bool_ + ): + indexer = (indexer[0], np.where(indexer[1])[0]) + def ravel(i): return i.ravel() if isinstance(i, np.ndarray) else i indexer = tuple(map(ravel, indexer)) - aligners = [not com.is_null_slice(idx) for idx in indexer] sum_aligners = sum(aligners) single_aligner = sum_aligners == 1 From 163dfe31839f969274f3da5017742b55ea7b6ff5 Mon Sep 17 00:00:00 2001 From: anzor Date: Mon, 30 Dec 2024 00:25:45 +0300 Subject: [PATCH 08/11] add unit-test --- .gitignore | 2 ++ pandas/tests/indexing/test_loc.py | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/.gitignore b/.gitignore index a188e216d9f70..cdbf1384c96bd 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,8 @@ .vagrant .noseids .ipynb_checkpoints +.ipynb +.csv .tags .cache/ .vscode/ diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 94b5f47f5ca33..d7781688ae414 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -3337,6 +3337,18 @@ def test_loc_reindexing_of_empty_index(self): [[10, 10, 10], [20, 20, 20], [30, 30, 30]], columns=list("ABC") ), ], + [ + DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=list("ABC")), + slice(None), + Series( + { + "A": True, + "C": False, + "B": True, + } + ), + DataFrame([[10, 10, 3], [20, 20, 6], [30, 30, 9]], columns=list("ABC")), + ], ], ) def test_loc_set_series_to_multiple_columns( From e2f39e44508a82a7cf9bc0cae1154e9d7d0a00c3 Mon Sep 17 00:00:00 2001 From: anzor Date: Mon, 30 Dec 2024 17:23:04 +0300 Subject: [PATCH 09/11] remove ipynb and csv from gitignore --- .gitignore | 2 -- 1 file changed, 2 deletions(-) diff --git a/.gitignore b/.gitignore index cdbf1384c96bd..a188e216d9f70 100644 --- a/.gitignore +++ b/.gitignore @@ -20,8 +20,6 @@ .vagrant .noseids .ipynb_checkpoints -.ipynb -.csv .tags .cache/ .vscode/ From 5bb12fb7ae54e9b565563987ce5697d8820610f5 Mon Sep 17 00:00:00 2001 From: anzber <5594175+anzber@users.noreply.github.com> Date: Sat, 22 Feb 2025 19:31:13 +0300 Subject: [PATCH 10/11] fix issue number --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 1ec32a5164456..ca3239461ded3 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -690,7 +690,7 @@ Indexing ^^^^^^^^ - Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`) - Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`) -- Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`599333`) +- Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`59333`) - Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`) - Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`) From d2c8ab8dde4583db861f1eab8950b395cd840c25 Mon Sep 17 00:00:00 2001 From: anzber <5594175+anzber@users.noreply.github.com> Date: Mon, 24 Feb 2025 07:55:20 +0300 Subject: [PATCH 11/11] Update v3.0.0.rst fix issue number again --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index ca3239461ded3..0bd68a539a2ba 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -690,7 +690,7 @@ Indexing ^^^^^^^^ - Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`) - Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`) -- Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`59333`) +- Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`59933`) - Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`) - Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`)