From 5003eaa456e522b9d185656090ee01b462cc180d Mon Sep 17 00:00:00 2001 From: sanggon6107 Date: Tue, 4 Mar 2025 23:20:52 +0900 Subject: [PATCH 1/9] Fix loc dtype Co-authored-by: Parthi --- pandas/core/indexing.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index bcb27d0320c91..ada32e17e6b61 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1094,7 +1094,16 @@ def _getitem_lowerdim(self, tup: tuple): if com.is_null_slice(new_key): return section # This is an elided recursive call to iloc/loc - return getattr(section, self.name)[new_key] + out = getattr(section, self.name)[new_key] + # Re-interpret dtype of out.values for loc/iloc[int, list/slice]. + # GH60600 + if ( + i == 0 + and isinstance(key, int) + and isinstance(new_key, (list, slice)) + ): + out = out.infer_objects() + return out raise IndexingError("not applicable") From d4a031b9e1ca09b7552d7534c55ff45103789ad2 Mon Sep 17 00:00:00 2001 From: sanggon6107 Date: Tue, 4 Mar 2025 23:30:46 +0900 Subject: [PATCH 2/9] Modify test_loc_setitem_frame_mixed_labels accordingly --- pandas/tests/indexing/test_loc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 8838fc7eed2f7..41fb091ab25ab 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -807,7 +807,7 @@ def test_loc_setitem_frame_mixed_labels(self): result = df.loc[0, [1, 2]] expected = Series( - [1, 3], index=Index([1, 2], dtype=object), dtype=object, name=0 + [1, 3], index=Index([1, 2], dtype=object), dtype="int64", name=0 ) tm.assert_series_equal(result, expected) From 2536652fecd77770b740564a29245a7e6d7496a8 Mon Sep 17 00:00:00 2001 From: sanggon6107 Date: Tue, 4 Mar 2025 23:34:20 +0900 Subject: [PATCH 3/9] Add test for the linked issue --- pandas/tests/indexing/test_loc.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 41fb091ab25ab..65fa524c953b5 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -60,6 +60,14 @@ def test_not_change_nan_loc(series, new_series, expected_ser): tm.assert_frame_equal(df.notna(), ~expected) +def test_loc_dtype(): + # GH 60600 + df = DataFrame([["a", 1.0, 2.0], ["b", 3.0, 4.0]]) + result = df.loc[0, [1, 2]] + expected = df[[1, 2]].loc[0] + tm.assert_frame_equal(result, expected) + + class TestLoc: def test_none_values_on_string_columns(self, using_infer_string): # Issue #32218 From c5a5de160728684a3583a2afb3e8bc64ab8e7bde Mon Sep 17 00:00:00 2001 From: sanggon6107 Date: Wed, 5 Mar 2025 00:11:09 +0900 Subject: [PATCH 4/9] Fix test_loc_dtype --- pandas/tests/indexing/test_loc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 65fa524c953b5..300a6cbf9c3a8 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -65,7 +65,7 @@ def test_loc_dtype(): df = DataFrame([["a", 1.0, 2.0], ["b", 3.0, 4.0]]) result = df.loc[0, [1, 2]] expected = df[[1, 2]].loc[0] - tm.assert_frame_equal(result, expected) + tm.assert_series_equal(result, expected) class TestLoc: From 1f529ce0dbc2b36fd8cf657b47470ec158df2ffa Mon Sep 17 00:00:00 2001 From: sanggon6107 Date: Thu, 6 Mar 2025 20:56:01 +0900 Subject: [PATCH 5/9] Fix dtype inference --- pandas/core/indexing.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ada32e17e6b61..3fe4d011ebb33 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -28,6 +28,7 @@ from pandas.core.dtypes.cast import ( can_hold_element, + find_common_type, maybe_promote, ) from pandas.core.dtypes.common import ( @@ -1095,14 +1096,12 @@ def _getitem_lowerdim(self, tup: tuple): return section # This is an elided recursive call to iloc/loc out = getattr(section, self.name)[new_key] - # Re-interpret dtype of out.values for loc/iloc[int, list/slice]. + # Re-interpret dtype of out.values for loc/iloc[int, list-like]. # GH60600 - if ( - i == 0 - and isinstance(key, int) - and isinstance(new_key, (list, slice)) - ): - out = out.infer_objects() + if i == 0 and isinstance(key, int) and is_list_like(tup[1]): + dt = self.obj.dtypes.__getitem__(tup[1]) + if len(dt) > 0: + out = out.astype(find_common_type(dt.tolist())) return out raise IndexingError("not applicable") From d2e29c0b538d88fd22c31eb27dc15aa0383e7e2e Mon Sep 17 00:00:00 2001 From: sanggon6107 Date: Thu, 13 Mar 2025 23:23:46 +0900 Subject: [PATCH 6/9] Reverse tuple to avoid unintended dtype inference --- pandas/core/indexing.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 3fe4d011ebb33..4c2a91a0981f1 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -28,7 +28,6 @@ from pandas.core.dtypes.cast import ( can_hold_element, - find_common_type, maybe_promote, ) from pandas.core.dtypes.common import ( @@ -1067,7 +1066,13 @@ def _getitem_lowerdim(self, tup: tuple): tup = self._validate_key_length(tup) - for i, key in enumerate(tup): + # Reverse tuple so that we are indexing along columns before rows + # and avoid unintended dtype inference. # GH60600 + if any(isinstance(ax, MultiIndex) for ax in self.obj.axes): + enum = enumerate(tup) + else: + enum = zip(range(len(tup) - 1, -1, -1), reversed(tup)) + for i, key in enum: if is_label_like(key): # We don't need to check for tuples here because those are # caught by the _is_nested_tuple_indexer check above. @@ -1095,14 +1100,7 @@ def _getitem_lowerdim(self, tup: tuple): if com.is_null_slice(new_key): return section # This is an elided recursive call to iloc/loc - out = getattr(section, self.name)[new_key] - # Re-interpret dtype of out.values for loc/iloc[int, list-like]. - # GH60600 - if i == 0 and isinstance(key, int) and is_list_like(tup[1]): - dt = self.obj.dtypes.__getitem__(tup[1]) - if len(dt) > 0: - out = out.astype(find_common_type(dt.tolist())) - return out + return getattr(section, self.name)[new_key] raise IndexingError("not applicable") From 74eb3564e9a14caab0c9a5a8d2cf9bd8fb1022ec Mon Sep 17 00:00:00 2001 From: sanggon6107 Date: Mon, 17 Mar 2025 18:34:08 +0900 Subject: [PATCH 7/9] Reverse axis order --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 4c2a91a0981f1..17ae8d6b8ea31 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1073,7 +1073,7 @@ def _getitem_lowerdim(self, tup: tuple): else: enum = zip(range(len(tup) - 1, -1, -1), reversed(tup)) for i, key in enum: - if is_label_like(key): + if is_label_like(key) or is_list_like(key): # We don't need to check for tuples here because those are # caught by the _is_nested_tuple_indexer check above. section = self._getitem_axis(key, axis=i) From 6388d933106fad412aa1aee2fa0440f0c8bd4352 Mon Sep 17 00:00:00 2001 From: sanggon6107 Date: Wed, 2 Apr 2025 21:44:31 +0900 Subject: [PATCH 8/9] Reverse indexing order --- pandas/core/indexing.py | 6 +----- pandas/tests/indexing/multiindex/test_loc.py | 2 +- pandas/tests/indexing/test_loc.py | 2 +- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 17ae8d6b8ea31..87de4cad7dd8c 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1068,11 +1068,7 @@ def _getitem_lowerdim(self, tup: tuple): # Reverse tuple so that we are indexing along columns before rows # and avoid unintended dtype inference. # GH60600 - if any(isinstance(ax, MultiIndex) for ax in self.obj.axes): - enum = enumerate(tup) - else: - enum = zip(range(len(tup) - 1, -1, -1), reversed(tup)) - for i, key in enum: + for i, key in zip(range(len(tup) - 1, -1, -1), reversed(tup)): if is_label_like(key) or is_list_like(key): # We don't need to check for tuples here because those are # caught by the _is_nested_tuple_indexer check above. diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 1d3258ab18a61..70d71de66d3cc 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -757,7 +757,7 @@ def test_missing_keys_raises_keyerror(self): df = DataFrame(np.arange(12).reshape(4, 3), columns=["A", "B", "C"]) df2 = df.set_index(["A", "B"]) - with pytest.raises(KeyError, match="1"): + with pytest.raises(KeyError, match="6"): df2.loc[(1, 6)] def test_missing_key_raises_keyerror2(self): diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 300a6cbf9c3a8..acae10bc4cd97 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -449,7 +449,7 @@ def test_loc_to_fail(self): msg = ( rf"\"None of \[Index\(\[1, 2\], dtype='{np.dtype(int)}'\)\] are " - r"in the \[index\]\"" + r"in the \[columns\]\"" ) with pytest.raises(KeyError, match=msg): df.loc[[1, 2], [1, 2]] From 3596f7644975b586d55c0f51c78dcfdb3f3943a3 Mon Sep 17 00:00:00 2001 From: sanggon6107 Date: Thu, 17 Apr 2025 20:18:13 +0900 Subject: [PATCH 9/9] Explicitly assign the expected result --- pandas/tests/indexing/test_loc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index acae10bc4cd97..ccb58aae2783f 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -64,7 +64,7 @@ def test_loc_dtype(): # GH 60600 df = DataFrame([["a", 1.0, 2.0], ["b", 3.0, 4.0]]) result = df.loc[0, [1, 2]] - expected = df[[1, 2]].loc[0] + expected = Series([1.0, 2.0], index=[1, 2], dtype=float, name=0) tm.assert_series_equal(result, expected)