From 01358f6e6b3373e0dbc2967409990acd0dabce6d Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 21 Aug 2021 16:58:36 +0530 Subject: [PATCH 01/15] BUG: Outer/right merge with EA dtypes cast to object --- doc/source/whatsnew/v1.3.3.rst | 2 +- pandas/core/reshape/merge.py | 13 ++++--- pandas/tests/reshape/merge/test_merge.py | 43 ++++++++++++++++++++++-- 3 files changed, 50 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v1.3.3.rst b/doc/source/whatsnew/v1.3.3.rst index 1340188c3d609..02ca357ffd584 100644 --- a/doc/source/whatsnew/v1.3.3.rst +++ b/doc/source/whatsnew/v1.3.3.rst @@ -17,7 +17,7 @@ Fixed regressions - Fixed regression in :class:`DataFrame` constructor failing to broadcast for defined :class:`Index` and len one list of :class:`Timestamp` (:issue:`42810`) - Performance regression in :meth:`core.window.ewm.ExponentialMovingWindow.mean` (:issue:`42333`) - Fixed regression in :meth:`.GroupBy.agg` incorrectly raising in some cases (:issue:`42390`) -- +- Fixed regression in :meth:`merge` where columns with ``ExtensionDtype`` was cast to ``object`` in ``left`` and ``outer`` merge (:issue:`40073`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index a3baf8ade9c2e..4d8d36f3b7846 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -70,6 +70,7 @@ Categorical, Index, MultiIndex, + Series, ) from pandas.core import groupby import pandas.core.algorithms as algos @@ -81,10 +82,7 @@ from pandas.core.sorting import is_int64_overflow_possible if TYPE_CHECKING: - from pandas import ( - DataFrame, - Series, - ) + from pandas import DataFrame from pandas.core.arrays import DatetimeArray @@ -904,17 +902,22 @@ def _maybe_add_join_keys( # error: Item "bool" of "Union[Any, bool]" has no attribute "all" if mask_left.all(): # type: ignore[union-attr] key_col = Index(rvals) + final_dtype = rvals.dtype # error: Item "bool" of "Union[Any, bool]" has no attribute "all" elif ( right_indexer is not None and mask_right.all() # type: ignore[union-attr] ): key_col = Index(lvals) + final_dtype = lvals.dtype else: key_col = Index(lvals).where(~mask_left, rvals) + final_dtype = lvals.dtype if result._is_label_reference(name): - result[name] = key_col + result[name] = Series( + key_col, dtype=final_dtype, index=result.index + ) elif result._is_level_reference(name): if isinstance(result.index, MultiIndex): key_col.name = name diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index cd07b3814d023..2d3b386c55b20 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -354,8 +354,8 @@ def test_merge_join_key_dtype_cast(self): df = merge(df1, df2, how="outer") # GH13169 - # this really should be bool - assert df["key"].dtype == "object" + # GH#40073 + assert df["key"].dtype == "bool" df1 = DataFrame({"val": [1]}) df2 = DataFrame({"val": [2]}) @@ -2487,3 +2487,42 @@ def test_mergeerror_on_left_index_mismatched_dtypes(): df_2 = DataFrame(data=["X"], columns=["C"], index=[999]) with pytest.raises(MergeError, match="Can only pass argument"): merge(df_1, df_2, on=["C"], left_index=True) + + +@pytest.mark.parametrize( + "expected_data, how", + [ + ([1, 2], "outer"), + ([], "inner"), + ([2], "right"), + ([1], "left"), + ], +) +@pytest.mark.parametrize( + "dtype", ["Float64", "Float32", "Int64", "Int32", "UInt64", "UInt32"] +) +def test_merge_EA_dtype(dtype, how, expected_data): + # GH#40073 + d1 = DataFrame([(1,)], columns=["id"], dtype=dtype) + d2 = DataFrame([(2,)], columns=["id"], dtype=dtype) + result = merge(d1, d2, how=how) + expected = DataFrame(expected_data, columns=["id"], dtype=dtype) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "expected_data, how", + [ + (["a", "b"], "outer"), + ([], "inner"), + (["b"], "right"), + (["a"], "left"), + ], +) +def test_merge_string_dtype(how, expected_data): + # GH#40073 + d1 = DataFrame([("a",)], columns=["id"], dtype="string") + d2 = DataFrame([("b",)], columns=["id"], dtype="string") + result = merge(d1, d2, how=how) + expected = DataFrame(expected_data, columns=["id"], dtype="string") + tm.assert_frame_equal(result, expected) From 0e02e8da84305141a637662e1f5ed4c719cd97d0 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 21 Aug 2021 20:03:12 +0530 Subject: [PATCH 02/15] suggested edits --- pandas/core/reshape/merge.py | 8 +-- pandas/tests/reshape/merge/test_merge.py | 73 +++++++++++------------- 2 files changed, 38 insertions(+), 43 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 4d8d36f3b7846..46496b892f172 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -902,21 +902,21 @@ def _maybe_add_join_keys( # error: Item "bool" of "Union[Any, bool]" has no attribute "all" if mask_left.all(): # type: ignore[union-attr] key_col = Index(rvals) - final_dtype = rvals.dtype + result_dtype = rvals.dtype # error: Item "bool" of "Union[Any, bool]" has no attribute "all" elif ( right_indexer is not None and mask_right.all() # type: ignore[union-attr] ): key_col = Index(lvals) - final_dtype = lvals.dtype + result_dtype = lvals.dtype else: key_col = Index(lvals).where(~mask_left, rvals) - final_dtype = lvals.dtype + result_dtype = lvals.dtype if result._is_label_reference(name): result[name] = Series( - key_col, dtype=final_dtype, index=result.index + key_col, dtype=result_dtype, index=result.index ) elif result._is_level_reference(name): if isinstance(result.index, MultiIndex): diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 2d3b386c55b20..95452d99ad380 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1642,6 +1642,40 @@ def test_merge_incompat_dtypes_error(self, df1_vals, df2_vals): with pytest.raises(ValueError, match=msg): merge(df2, df1, on=["A"]) + @pytest.mark.parametrize( + "expected_data, how", + [ + ([1, 2], "outer"), + ([], "inner"), + ([2], "right"), + ([1], "left"), + ], + ) + def test_merge_EA_dtype(self, any_numeric_ea_dtype, how, expected_data): + # GH#40073 + d1 = DataFrame([(1,)], columns=["id"], dtype=any_numeric_ea_dtype) + d2 = DataFrame([(2,)], columns=["id"], dtype=any_numeric_ea_dtype) + result = merge(d1, d2, how=how) + expected = DataFrame(expected_data, columns=["id"], dtype=any_numeric_ea_dtype) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "expected_data, how", + [ + (["a", "b"], "outer"), + ([], "inner"), + (["b"], "right"), + (["a"], "left"), + ], + ) + def test_merge_string_dtype(self, how, expected_data, any_string_dtype): + # GH#40073 + d1 = DataFrame([("a",)], columns=["id"], dtype=any_string_dtype) + d2 = DataFrame([("b",)], columns=["id"], dtype=any_string_dtype) + result = merge(d1, d2, how=how) + expected = DataFrame(expected_data, columns=["id"], dtype=any_string_dtype) + tm.assert_frame_equal(result, expected) + @pytest.fixture def left(): @@ -2487,42 +2521,3 @@ def test_mergeerror_on_left_index_mismatched_dtypes(): df_2 = DataFrame(data=["X"], columns=["C"], index=[999]) with pytest.raises(MergeError, match="Can only pass argument"): merge(df_1, df_2, on=["C"], left_index=True) - - -@pytest.mark.parametrize( - "expected_data, how", - [ - ([1, 2], "outer"), - ([], "inner"), - ([2], "right"), - ([1], "left"), - ], -) -@pytest.mark.parametrize( - "dtype", ["Float64", "Float32", "Int64", "Int32", "UInt64", "UInt32"] -) -def test_merge_EA_dtype(dtype, how, expected_data): - # GH#40073 - d1 = DataFrame([(1,)], columns=["id"], dtype=dtype) - d2 = DataFrame([(2,)], columns=["id"], dtype=dtype) - result = merge(d1, d2, how=how) - expected = DataFrame(expected_data, columns=["id"], dtype=dtype) - tm.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize( - "expected_data, how", - [ - (["a", "b"], "outer"), - ([], "inner"), - (["b"], "right"), - (["a"], "left"), - ], -) -def test_merge_string_dtype(how, expected_data): - # GH#40073 - d1 = DataFrame([("a",)], columns=["id"], dtype="string") - d2 = DataFrame([("b",)], columns=["id"], dtype="string") - result = merge(d1, d2, how=how) - expected = DataFrame(expected_data, columns=["id"], dtype="string") - tm.assert_frame_equal(result, expected) From c4619baa2c35e48f114337b911bfc8dd4a765d19 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sun, 22 Aug 2021 22:11:37 +0530 Subject: [PATCH 03/15] updated whatsnew; tweaked failing test --- doc/source/whatsnew/v1.3.3.rst | 4 ++-- pandas/tests/reshape/merge/test_merge.py | 21 ++++++++++++++++++++- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.3.3.rst b/doc/source/whatsnew/v1.3.3.rst index 02ca357ffd584..d79a491d8b5d8 100644 --- a/doc/source/whatsnew/v1.3.3.rst +++ b/doc/source/whatsnew/v1.3.3.rst @@ -17,7 +17,7 @@ Fixed regressions - Fixed regression in :class:`DataFrame` constructor failing to broadcast for defined :class:`Index` and len one list of :class:`Timestamp` (:issue:`42810`) - Performance regression in :meth:`core.window.ewm.ExponentialMovingWindow.mean` (:issue:`42333`) - Fixed regression in :meth:`.GroupBy.agg` incorrectly raising in some cases (:issue:`42390`) -- Fixed regression in :meth:`merge` where columns with ``ExtensionDtype`` was cast to ``object`` in ``left`` and ``outer`` merge (:issue:`40073`) +- Fixed regression in :meth:`merge` where ``on`` columns with few data types (``ExtensionDtype`` and ``bool``) was cast to ``object`` in ``right`` merge (:issue:`40073`) .. --------------------------------------------------------------------------- @@ -25,7 +25,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Fixed bug in :meth:`merge` where ``on`` columns with few data types (``ExtensionDtype`` and ``bool``) was cast to ``object`` in ``outer`` merge (:issue:`40073`) - .. --------------------------------------------------------------------------- diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 95452d99ad380..66434c8233295 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -365,8 +365,10 @@ def test_merge_join_key_dtype_cast(self): assert df["key_0"].dtype == "int64" def test_handle_join_key_pass_array(self): + # changed column `value` from np.arange to list + # to ensure same dtype of column `key` and variable `key` left = DataFrame( - {"key": [1, 1, 2, 2, 3], "value": np.arange(5)}, columns=["value", "key"] + {"key": [1, 1, 2, 2, 3], "value": [0, 1, 2, 3, 4]}, columns=["value", "key"] ) right = DataFrame({"rvalue": np.arange(6)}) key = np.array([1, 1, 2, 3, 4, 5]) @@ -1676,6 +1678,23 @@ def test_merge_string_dtype(self, how, expected_data, any_string_dtype): expected = DataFrame(expected_data, columns=["id"], dtype=any_string_dtype) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( + "how, expected_data", + [ + ("inner", [[True, 1, 4], [False, 5, 3]]), + ("outer", [[True, 1, 4], [False, 5, 3]]), + ("left", [[True, 1, 4], [False, 5, 3]]), + ("right", [[False, 5, 3], [True, 1, 4]]), + ], + ) + def test_merge_bool_dtype(self, how, expected_data): + # GH#40073 + df1 = DataFrame({"A": [True, False], "B": [1, 5]}) + df2 = DataFrame({"A": [False, True], "C": [3, 4]}) + result = merge(df1, df2, how=how) + expected = DataFrame(expected_data, columns=["A", "B", "C"]) + tm.assert_frame_equal(result, expected) + @pytest.fixture def left(): From 9010b113aebe6b977a661de01062c83334d619d8 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sun, 22 Aug 2021 23:25:13 +0530 Subject: [PATCH 04/15] failing test tweak --- pandas/tests/reshape/merge/test_merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 66434c8233295..82df05f3e24ed 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -370,7 +370,7 @@ def test_handle_join_key_pass_array(self): left = DataFrame( {"key": [1, 1, 2, 2, 3], "value": [0, 1, 2, 3, 4]}, columns=["value", "key"] ) - right = DataFrame({"rvalue": np.arange(6)}) + right = DataFrame({"rvalue": [0, 1, 2, 3, 4, 5]}) key = np.array([1, 1, 2, 3, 4, 5]) merged = merge(left, right, left_on="key", right_on=key, how="outer") From 2a7838c521231b40be459edf746d6ef42cb9d85d Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Mon, 23 Aug 2021 00:06:00 +0530 Subject: [PATCH 05/15] reverted failed tweaked test --- pandas/tests/reshape/merge/test_merge.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 82df05f3e24ed..4c271f53b6415 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -365,12 +365,10 @@ def test_merge_join_key_dtype_cast(self): assert df["key_0"].dtype == "int64" def test_handle_join_key_pass_array(self): - # changed column `value` from np.arange to list - # to ensure same dtype of column `key` and variable `key` left = DataFrame( - {"key": [1, 1, 2, 2, 3], "value": [0, 1, 2, 3, 4]}, columns=["value", "key"] + {"key": [1, 1, 2, 2, 3], "value": np.arange(5)}, columns=["value", "key"] ) - right = DataFrame({"rvalue": [0, 1, 2, 3, 4, 5]}) + right = DataFrame({"rvalue": np.arange(6)}) key = np.array([1, 1, 2, 3, 4, 5]) merged = merge(left, right, left_on="key", right_on=key, how="outer") From e38ff200e07e193da55f940334b6a47c8172d53a Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Mon, 30 Aug 2021 22:23:41 +0530 Subject: [PATCH 06/15] xfailed test_handle_join_key_pass_array in 32bit --- pandas/tests/reshape/merge/test_merge.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 4c271f53b6415..119193f1b0d4d 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -9,6 +9,8 @@ import numpy as np import pytest +from pandas.compat import IS64 + from pandas.core.dtypes.common import ( is_categorical_dtype, is_object_dtype, @@ -364,6 +366,7 @@ def test_merge_join_key_dtype_cast(self): df = merge(df1, df2, left_on=lkey, right_on=rkey, how="outer") assert df["key_0"].dtype == "int64" + @pytest.mark.xfail(not IS64, reason="GH#40073: fail on 32-bit system") def test_handle_join_key_pass_array(self): left = DataFrame( {"key": [1, 1, 2, 2, 3], "value": np.arange(5)}, columns=["value", "key"] From f2f7aac9a7f3e6869c795c9171b0fd312b57b37e Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Tue, 31 Aug 2021 00:08:44 +0530 Subject: [PATCH 07/15] skipif --- pandas/tests/reshape/merge/test_merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 119193f1b0d4d..4b25819ba062f 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -366,7 +366,7 @@ def test_merge_join_key_dtype_cast(self): df = merge(df1, df2, left_on=lkey, right_on=rkey, how="outer") assert df["key_0"].dtype == "int64" - @pytest.mark.xfail(not IS64, reason="GH#40073: fail on 32-bit system") + @pytest.mark.skipif(not IS64, reason="GH#40073: fail on 32-bit system") def test_handle_join_key_pass_array(self): left = DataFrame( {"key": [1, 1, 2, 2, 3], "value": np.arange(5)}, columns=["value", "key"] From 55add8b26a2c0cb31278d0e14a3aa95d5f603a46 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Tue, 31 Aug 2021 01:00:49 +0530 Subject: [PATCH 08/15] skipif windows --- pandas/tests/reshape/merge/test_merge.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 4b25819ba062f..6053ca5f36b28 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -9,7 +9,7 @@ import numpy as np import pytest -from pandas.compat import IS64 +from pandas.compat import is_platform_windows from pandas.core.dtypes.common import ( is_categorical_dtype, @@ -366,7 +366,7 @@ def test_merge_join_key_dtype_cast(self): df = merge(df1, df2, left_on=lkey, right_on=rkey, how="outer") assert df["key_0"].dtype == "int64" - @pytest.mark.skipif(not IS64, reason="GH#40073: fail on 32-bit system") + @pytest.mark.skipif(not is_platform_windows(), reason="GH#40073: fail on Windows") def test_handle_join_key_pass_array(self): left = DataFrame( {"key": [1, 1, 2, 2, 3], "value": np.arange(5)}, columns=["value", "key"] From 6db265add2ed507942b9bd79e7187cf202bf6e26 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Tue, 31 Aug 2021 01:03:11 +0530 Subject: [PATCH 09/15] skipif windows & 32 --- pandas/tests/reshape/merge/test_merge.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 6053ca5f36b28..7eaf045491087 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -9,7 +9,10 @@ import numpy as np import pytest -from pandas.compat import is_platform_windows +from pandas.compat import ( + IS64, + is_platform_windows, +) from pandas.core.dtypes.common import ( is_categorical_dtype, @@ -366,7 +369,9 @@ def test_merge_join_key_dtype_cast(self): df = merge(df1, df2, left_on=lkey, right_on=rkey, how="outer") assert df["key_0"].dtype == "int64" - @pytest.mark.skipif(not is_platform_windows(), reason="GH#40073: fail on Windows") + @pytest.mark.skipif( + not (is_platform_windows() or IS64), reason="GH#40073: fail on Windows" + ) def test_handle_join_key_pass_array(self): left = DataFrame( {"key": [1, 1, 2, 2, 3], "value": np.arange(5)}, columns=["value", "key"] From b34e5622907b0b7f5c5ed89abac01b5fb7bd1dd1 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Tue, 31 Aug 2021 22:23:07 +0530 Subject: [PATCH 10/15] changed or to and --- pandas/tests/reshape/merge/test_merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 7eaf045491087..3d56a3b9e9709 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -370,7 +370,7 @@ def test_merge_join_key_dtype_cast(self): assert df["key_0"].dtype == "int64" @pytest.mark.skipif( - not (is_platform_windows() or IS64), reason="GH#40073: fail on Windows" + not (is_platform_windows() and IS64), reason="GH#40073: fail on Windows/32bit" ) def test_handle_join_key_pass_array(self): left = DataFrame( From 81360aafcf8174d79c04007b259c7c90309c5207 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Tue, 31 Aug 2021 23:59:52 +0530 Subject: [PATCH 11/15] xfail on windows/32bit --- pandas/tests/reshape/merge/test_merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 3d56a3b9e9709..1bd8bc55499f0 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -370,7 +370,7 @@ def test_merge_join_key_dtype_cast(self): assert df["key_0"].dtype == "int64" @pytest.mark.skipif( - not (is_platform_windows() and IS64), reason="GH#40073: fail on Windows/32bit" + (is_platform_windows() or not IS64), reason="GH#40073: fail on Windows/32bit" ) def test_handle_join_key_pass_array(self): left = DataFrame( From a08869602c1e0b9a3580623045e278b2a4349f07 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Wed, 1 Sep 2021 00:00:23 +0530 Subject: [PATCH 12/15] xfail on windows/32bit --- pandas/tests/reshape/merge/test_merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 1bd8bc55499f0..5ac46d9aafb91 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -369,7 +369,7 @@ def test_merge_join_key_dtype_cast(self): df = merge(df1, df2, left_on=lkey, right_on=rkey, how="outer") assert df["key_0"].dtype == "int64" - @pytest.mark.skipif( + @pytest.mark.xfail( (is_platform_windows() or not IS64), reason="GH#40073: fail on Windows/32bit" ) def test_handle_join_key_pass_array(self): From b864d925e3c779853eb6f0f8380190375a83a31e Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Wed, 1 Sep 2021 01:34:00 +0530 Subject: [PATCH 13/15] updated whatsnew as suggested --- doc/source/whatsnew/v1.3.3.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.3.3.rst b/doc/source/whatsnew/v1.3.3.rst index e26f6bc500e26..dd7a4edb5586a 100644 --- a/doc/source/whatsnew/v1.3.3.rst +++ b/doc/source/whatsnew/v1.3.3.rst @@ -17,7 +17,7 @@ Fixed regressions - Fixed regression in :class:`DataFrame` constructor failing to broadcast for defined :class:`Index` and len one list of :class:`Timestamp` (:issue:`42810`) - Performance regression in :meth:`core.window.ewm.ExponentialMovingWindow.mean` (:issue:`42333`) - Fixed regression in :meth:`.GroupBy.agg` incorrectly raising in some cases (:issue:`42390`) -- Fixed regression in :meth:`merge` where ``on`` columns with few data types (``ExtensionDtype`` and ``bool``) was cast to ``object`` in ``right`` merge (:issue:`40073`) +- Fixed regression in :meth:`merge` where ``on`` columns with few data types (``ExtensionDtype`` and ``bool``) was cast to ``object`` in ``right`` and ``outer`` merge (:issue:`40073`) - Fixed regression in :meth:`RangeIndex.where` and :meth:`RangeIndex.putmask` raising ``AssertionError`` when result did not represent a :class:`RangeIndex` (:issue:`43240`) .. --------------------------------------------------------------------------- @@ -36,7 +36,6 @@ Performance improvements Bug fixes ~~~~~~~~~ -- Fixed bug in :meth:`merge` where ``on`` columns with few data types (``ExtensionDtype`` and ``bool``) was cast to ``object`` in ``outer`` merge (:issue:`40073`) - Bug in :meth:`.DataFrameGroupBy.agg` and :meth:`.DataFrameGroupBy.transform` with ``engine="numba"`` where ``index`` data was not being correctly passed into ``func`` (:issue:`43133`) - From de0e84da2cd8bb6ce89aa026d58edb4d7c834a33 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Wed, 1 Sep 2021 11:28:05 +0530 Subject: [PATCH 14/15] explicitly int64 --- pandas/tests/reshape/merge/test_merge.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 5ac46d9aafb91..54fc978d64336 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -9,11 +9,6 @@ import numpy as np import pytest -from pandas.compat import ( - IS64, - is_platform_windows, -) - from pandas.core.dtypes.common import ( is_categorical_dtype, is_object_dtype, @@ -369,15 +364,14 @@ def test_merge_join_key_dtype_cast(self): df = merge(df1, df2, left_on=lkey, right_on=rkey, how="outer") assert df["key_0"].dtype == "int64" - @pytest.mark.xfail( - (is_platform_windows() or not IS64), reason="GH#40073: fail on Windows/32bit" - ) def test_handle_join_key_pass_array(self): left = DataFrame( - {"key": [1, 1, 2, 2, 3], "value": np.arange(5)}, columns=["value", "key"] + {"key": [1, 1, 2, 2, 3], "value": np.arange(5)}, + columns=["value", "key"], + dtype="int64", ) - right = DataFrame({"rvalue": np.arange(6)}) - key = np.array([1, 1, 2, 3, 4, 5]) + right = DataFrame({"rvalue": np.arange(6)}, dtype="int64") + key = np.array([1, 1, 2, 3, 4, 5], dtype="int64") merged = merge(left, right, left_on="key", right_on=key, how="outer") merged2 = merge(right, left, left_on=key, right_on="key", how="outer") From c34dc5815a88c54ce1a4d692fffcd46448a4ae76 Mon Sep 17 00:00:00 2001 From: Shoham Debnath Date: Thu, 2 Sep 2021 10:12:25 +0530 Subject: [PATCH 15/15] Update doc/source/whatsnew/v1.3.3.rst Co-authored-by: Matthew Zeitlin <37011898+mzeitlin11@users.noreply.github.com> --- doc/source/whatsnew/v1.3.3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.3.rst b/doc/source/whatsnew/v1.3.3.rst index dd7a4edb5586a..da56c13cf7a6a 100644 --- a/doc/source/whatsnew/v1.3.3.rst +++ b/doc/source/whatsnew/v1.3.3.rst @@ -17,7 +17,7 @@ Fixed regressions - Fixed regression in :class:`DataFrame` constructor failing to broadcast for defined :class:`Index` and len one list of :class:`Timestamp` (:issue:`42810`) - Performance regression in :meth:`core.window.ewm.ExponentialMovingWindow.mean` (:issue:`42333`) - Fixed regression in :meth:`.GroupBy.agg` incorrectly raising in some cases (:issue:`42390`) -- Fixed regression in :meth:`merge` where ``on`` columns with few data types (``ExtensionDtype`` and ``bool``) was cast to ``object`` in ``right`` and ``outer`` merge (:issue:`40073`) +- Fixed regression in :meth:`merge` where ``on`` columns with ``ExtensionDtype`` or ``bool`` data types were cast to ``object`` in ``right`` and ``outer`` merge (:issue:`40073`) - Fixed regression in :meth:`RangeIndex.where` and :meth:`RangeIndex.putmask` raising ``AssertionError`` when result did not represent a :class:`RangeIndex` (:issue:`43240`) .. ---------------------------------------------------------------------------