Skip to content

Commit 26ed455

Browse files
jorisvandenbosschetopper-123
authored andcommitted
BUG: preserve dtype for right/outer merge of datetime with different resolutions (pandas-dev#53233)
1 parent f2f4072 commit 26ed455

File tree

2 files changed

+29
-18
lines changed

2 files changed

+29
-18
lines changed

pandas/core/reshape/merge.py

+8
Original file line numberDiff line numberDiff line change
@@ -994,6 +994,14 @@ def _maybe_add_join_keys(
994994
else:
995995
key_col = Index(lvals).where(~mask_left, rvals)
996996
result_dtype = find_common_type([lvals.dtype, rvals.dtype])
997+
if (
998+
lvals.dtype.kind == "M"
999+
and rvals.dtype.kind == "M"
1000+
and result_dtype.kind == "O"
1001+
):
1002+
# TODO(non-nano) Workaround for common_type not dealing
1003+
# with different resolutions
1004+
result_dtype = key_col.dtype
9971005

9981006
if result._is_label_reference(name):
9991007
result[name] = result._constructor_sliced(

pandas/tests/reshape/merge/test_merge.py

+21-18
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
import numpy as np
99
import pytest
10-
import pytz
1110

1211
from pandas.core.dtypes.common import is_object_dtype
1312
from pandas.core.dtypes.dtypes import CategoricalDtype
@@ -2776,26 +2775,30 @@ def test_merge_arrow_and_numpy_dtypes(dtype):
27762775
tm.assert_frame_equal(result, expected)
27772776

27782777

2779-
@pytest.mark.parametrize("tzinfo", [None, pytz.timezone("America/Chicago")])
2780-
def test_merge_datetime_different_resolution(tzinfo):
2778+
@pytest.mark.parametrize("how", ["inner", "left", "outer", "right"])
2779+
@pytest.mark.parametrize("tz", [None, "America/Chicago"])
2780+
def test_merge_datetime_different_resolution(tz, how):
27812781
# https://github.com/pandas-dev/pandas/issues/53200
2782-
df1 = DataFrame(
2783-
{
2784-
"t": [pd.Timestamp(2023, 5, 12, tzinfo=tzinfo, unit="ns")],
2785-
"a": [1],
2786-
}
2787-
)
2788-
df2 = df1.copy()
2782+
vals = [
2783+
pd.Timestamp(2023, 5, 12, tz=tz),
2784+
pd.Timestamp(2023, 5, 13, tz=tz),
2785+
pd.Timestamp(2023, 5, 14, tz=tz),
2786+
]
2787+
df1 = DataFrame({"t": vals[:2], "a": [1.0, 2.0]})
2788+
df1["t"] = df1["t"].dt.as_unit("ns")
2789+
df2 = DataFrame({"t": vals[1:], "b": [1.0, 2.0]})
27892790
df2["t"] = df2["t"].dt.as_unit("s")
27902791

2791-
expected = DataFrame(
2792-
{
2793-
"t": [pd.Timestamp(2023, 5, 12, tzinfo=tzinfo)],
2794-
"a_x": [1],
2795-
"a_y": [1],
2796-
}
2797-
)
2798-
result = df1.merge(df2, on="t")
2792+
expected = DataFrame({"t": vals, "a": [1.0, 2.0, np.nan], "b": [np.nan, 1.0, 2.0]})
2793+
expected["t"] = expected["t"].dt.as_unit("ns")
2794+
if how == "inner":
2795+
expected = expected.iloc[[1]].reset_index(drop=True)
2796+
elif how == "left":
2797+
expected = expected.iloc[[0, 1]]
2798+
elif how == "right":
2799+
expected = expected.iloc[[1, 2]].reset_index(drop=True)
2800+
2801+
result = df1.merge(df2, on="t", how=how)
27992802
tm.assert_frame_equal(result, expected)
28002803

28012804

0 commit comments

Comments
 (0)