Skip to content

Commit 7a28ced

Browse files
phoflglemaitre
andauthored
Backport PR #53213 on branch 2.0.x (FIX preserve dtype with datetime columns of different resolution when merging) (#53228)
FIX preserve dtype with datetime columns of different resolution when merging (#53213) (cherry picked from commit 935244a) Co-authored-by: Guillaume Lemaitre <[email protected]>
1 parent cc47ec2 commit 7a28ced

File tree

3 files changed

+32
-1
lines changed

3 files changed

+32
-1
lines changed

doc/source/whatsnew/v2.0.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ Bug fixes
2828
- Bug in :func:`api.interchange.from_dataframe` was raising ``IndexError`` on empty categorical data (:issue:`53077`)
2929
- Bug in :func:`api.interchange.from_dataframe` was returning :class:`DataFrame`'s of incorrect sizes when called on slices (:issue:`52824`)
3030
- Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on bitmasks (:issue:`49888`)
31+
- Bug in :func:`merge` when merging on datetime columns on different resolutions (:issue:`53200`)
3132
- Bug in :meth:`DataFrame.convert_dtypes` ignores ``convert_*`` keywords when set to False ``dtype_backend="pyarrow"`` (:issue:`52872`)
3233
- Bug in :meth:`Series.describe` treating pyarrow-backed timestamps and timedeltas as categorical data (:issue:`53001`)
3334
- Bug in :meth:`Series.rename` not making a lazy copy when Copy-on-Write is enabled when a scalar is passed to it (:issue:`52450`)

pandas/core/reshape/merge.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -1401,6 +1401,12 @@ def _maybe_coerce_merge_keys(self) -> None:
14011401
rk.dtype, DatetimeTZDtype
14021402
):
14031403
raise ValueError(msg)
1404+
elif (
1405+
isinstance(lk.dtype, DatetimeTZDtype)
1406+
and isinstance(rk.dtype, DatetimeTZDtype)
1407+
) or (lk.dtype.kind == "M" and rk.dtype.kind == "M"):
1408+
# allows datetime with different resolutions
1409+
continue
14041410

14051411
elif lk_is_object and rk_is_object:
14061412
continue
@@ -2355,7 +2361,7 @@ def _factorize_keys(
23552361
if isinstance(lk.dtype, DatetimeTZDtype) and isinstance(rk.dtype, DatetimeTZDtype):
23562362
# Extract the ndarray (UTC-localized) values
23572363
# Note: we dont need the dtypes to match, as these can still be compared
2358-
# TODO(non-nano): need to make sure resolutions match
2364+
lk, rk = cast("DatetimeArray", lk)._ensure_matching_resos(rk)
23592365
lk = cast("DatetimeArray", lk)._ndarray
23602366
rk = cast("DatetimeArray", rk)._ndarray
23612367

pandas/tests/reshape/merge/test_merge.py

+24
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import numpy as np
99
import pytest
10+
import pytz
1011

1112
from pandas.core.dtypes.common import (
1213
is_categorical_dtype,
@@ -2750,3 +2751,26 @@ def test_merge_arrow_and_numpy_dtypes(dtype):
27502751
result = df2.merge(df)
27512752
expected = df2.copy()
27522753
tm.assert_frame_equal(result, expected)
2754+
2755+
2756+
@pytest.mark.parametrize("tzinfo", [None, pytz.timezone("America/Chicago")])
2757+
def test_merge_datetime_different_resolution(tzinfo):
2758+
# https://github.com/pandas-dev/pandas/issues/53200
2759+
df1 = DataFrame(
2760+
{
2761+
"t": [pd.Timestamp(2023, 5, 12, tzinfo=tzinfo, unit="ns")],
2762+
"a": [1],
2763+
}
2764+
)
2765+
df2 = df1.copy()
2766+
df2["t"] = df2["t"].dt.as_unit("s")
2767+
2768+
expected = DataFrame(
2769+
{
2770+
"t": [pd.Timestamp(2023, 5, 12, tzinfo=tzinfo)],
2771+
"a_x": [1],
2772+
"a_y": [1],
2773+
}
2774+
)
2775+
result = df1.merge(df2, on="t")
2776+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)