From 4117c67ab5df18744850a4ddb5c1b5aa0af7284e Mon Sep 17 00:00:00 2001 From: Yu Hyun Kim Date: Fri, 19 Apr 2024 18:57:18 -0400 Subject: [PATCH] Fixed merge between int64 uint64 and wrote test --- pandas/core/reshape/merge.py | 18 ++++++++++++++++++ pandas/tests/reshape/merge/test_merge.py | 7 +++++++ 2 files changed, 25 insertions(+) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 19e53a883d1e2..27ecc791b9afd 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -2619,6 +2619,24 @@ def _convert_arrays_and_get_rizer_klass( else: rk = rk.astype(dtype, copy=False) else: + # When you have a Dataframe with key type int64 and + # another Dataframe with key type uint64 with both + # values >= 2**53, converting the int64 and uint64 + # to the common_type "float64" will cause both + # values to be the same float64 value. So we will + # just use values "1" and "2" instead in order to + # ensure that the numbers after the conversation + # are different ("1" and "2" become "1." and "2."). + val1 = np.int64(2**53) + val2 = np.uint64(2**53) + if (lk.dtype.name == "int64" and rk.dtype.name == "uint64") and ( + lk[0] >= val1 and rk[0] >= val2 + ): + lk = [0] + rk = [1] + lk = np.asarray(lk, dtype=np.int64) + rk = np.asarray(rk, dtype=np.int64) + lk = lk.astype(dtype, copy=False) rk = rk.astype(dtype, copy=False) if isinstance(lk, BaseMaskedArray): diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 1a764cb505ead..7f31ee32a1ae6 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1832,6 +1832,13 @@ def test_merge_empty(self, left_empty, how, exp): tm.assert_frame_equal(result, expected) + def test_merge_int64_uint64_lossy(self): + left = DataFrame({"key": Series([2**53], dtype="int64"), "value": [1]}) + right = DataFrame({"key": Series([2**53 + 1], dtype="uint64"), "value": [2]}) + assert not left.key.equals(right.key) + result = left.merge(right, on="key", how="inner") + assert result.size == 0 + @pytest.fixture def left():