Skip to content

Commit a23c15c

Browse files
Backport PR #53233 on branch 2.0.x (BUG: preserve dtype for right/outer merge of datetime with different resolutions) (#53275)
Backport PR #53233: BUG: preserve dtype for right/outer merge of datetime with different resolutions Co-authored-by: Joris Van den Bossche <[email protected]>
1 parent 340346c commit a23c15c

File tree

2 files changed

+29
-18
lines changed

2 files changed

+29
-18
lines changed

pandas/core/reshape/merge.py

+8
Original file line numberDiff line numberDiff line change
@@ -1000,6 +1000,14 @@ def _maybe_add_join_keys(
10001000
else:
10011001
key_col = Index(lvals).where(~mask_left, rvals)
10021002
result_dtype = find_common_type([lvals.dtype, rvals.dtype])
1003+
if (
1004+
lvals.dtype.kind == "M"
1005+
and rvals.dtype.kind == "M"
1006+
and result_dtype.kind == "O"
1007+
):
1008+
# TODO(non-nano) Workaround for common_type not dealing
1009+
# with different resolutions
1010+
result_dtype = key_col.dtype
10031011

10041012
if result._is_label_reference(name):
10051013
result[name] = Series(

pandas/tests/reshape/merge/test_merge.py

+21-18
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
import numpy as np
99
import pytest
10-
import pytz
1110

1211
from pandas.core.dtypes.common import (
1312
is_categorical_dtype,
@@ -2753,24 +2752,28 @@ def test_merge_arrow_and_numpy_dtypes(dtype):
27532752
tm.assert_frame_equal(result, expected)
27542753

27552754

2756-
@pytest.mark.parametrize("tzinfo", [None, pytz.timezone("America/Chicago")])
2757-
def test_merge_datetime_different_resolution(tzinfo):
2755+
@pytest.mark.parametrize("how", ["inner", "left", "outer", "right"])
2756+
@pytest.mark.parametrize("tz", [None, "America/Chicago"])
2757+
def test_merge_datetime_different_resolution(tz, how):
27582758
# https://github.com/pandas-dev/pandas/issues/53200
2759-
df1 = DataFrame(
2760-
{
2761-
"t": [pd.Timestamp(2023, 5, 12, tzinfo=tzinfo, unit="ns")],
2762-
"a": [1],
2763-
}
2764-
)
2765-
df2 = df1.copy()
2759+
vals = [
2760+
pd.Timestamp(2023, 5, 12, tz=tz),
2761+
pd.Timestamp(2023, 5, 13, tz=tz),
2762+
pd.Timestamp(2023, 5, 14, tz=tz),
2763+
]
2764+
df1 = DataFrame({"t": vals[:2], "a": [1.0, 2.0]})
2765+
df1["t"] = df1["t"].dt.as_unit("ns")
2766+
df2 = DataFrame({"t": vals[1:], "b": [1.0, 2.0]})
27662767
df2["t"] = df2["t"].dt.as_unit("s")
27672768

2768-
expected = DataFrame(
2769-
{
2770-
"t": [pd.Timestamp(2023, 5, 12, tzinfo=tzinfo)],
2771-
"a_x": [1],
2772-
"a_y": [1],
2773-
}
2774-
)
2775-
result = df1.merge(df2, on="t")
2769+
expected = DataFrame({"t": vals, "a": [1.0, 2.0, np.nan], "b": [np.nan, 1.0, 2.0]})
2770+
expected["t"] = expected["t"].dt.as_unit("ns")
2771+
if how == "inner":
2772+
expected = expected.iloc[[1]].reset_index(drop=True)
2773+
elif how == "left":
2774+
expected = expected.iloc[[0, 1]]
2775+
elif how == "right":
2776+
expected = expected.iloc[[1, 2]].reset_index(drop=True)
2777+
2778+
result = df1.merge(df2, on="t", how=how)
27762779
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)