Skip to content

Commit 935244a

Browse files
authored
FIX preserve dtype with datetime columns of different resolution when merging (pandas-dev#53213)
1 parent 3cfd868 commit 935244a

File tree

3 files changed

+32
-1
lines changed

3 files changed

+32
-1
lines changed

doc/source/whatsnew/v2.0.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ Bug fixes
2828
- Bug in :func:`api.interchange.from_dataframe` was raising ``IndexError`` on empty categorical data (:issue:`53077`)
2929
- Bug in :func:`api.interchange.from_dataframe` was returning :class:`DataFrame`'s of incorrect sizes when called on slices (:issue:`52824`)
3030
- Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on bitmasks (:issue:`49888`)
31+
- Bug in :func:`merge` when merging on datetime columns on different resolutions (:issue:`53200`)
3132
- Bug in :func:`to_timedelta` was raising ``ValueError`` with ``pandas.NA`` (:issue:`52909`)
3233
- Bug in :meth:`DataFrame.__getitem__` not preserving dtypes for :class:`MultiIndex` partial keys (:issue:`51895`)
3334
- Bug in :meth:`DataFrame.convert_dtypes` ignores ``convert_*`` keywords when set to False ``dtype_backend="pyarrow"`` (:issue:`52872`)

pandas/core/reshape/merge.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -1395,6 +1395,12 @@ def _maybe_coerce_merge_keys(self) -> None:
13951395
rk.dtype, DatetimeTZDtype
13961396
):
13971397
raise ValueError(msg)
1398+
elif (
1399+
isinstance(lk.dtype, DatetimeTZDtype)
1400+
and isinstance(rk.dtype, DatetimeTZDtype)
1401+
) or (lk.dtype.kind == "M" and rk.dtype.kind == "M"):
1402+
# allows datetime with different resolutions
1403+
continue
13981404

13991405
elif lk_is_object and rk_is_object:
14001406
continue
@@ -2352,7 +2358,7 @@ def _factorize_keys(
23522358
if isinstance(lk.dtype, DatetimeTZDtype) and isinstance(rk.dtype, DatetimeTZDtype):
23532359
# Extract the ndarray (UTC-localized) values
23542360
# Note: we dont need the dtypes to match, as these can still be compared
2355-
# TODO(non-nano): need to make sure resolutions match
2361+
lk, rk = cast("DatetimeArray", lk)._ensure_matching_resos(rk)
23562362
lk = cast("DatetimeArray", lk)._ndarray
23572363
rk = cast("DatetimeArray", rk)._ndarray
23582364

pandas/tests/reshape/merge/test_merge.py

+24
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import numpy as np
99
import pytest
10+
import pytz
1011

1112
from pandas.core.dtypes.common import is_object_dtype
1213
from pandas.core.dtypes.dtypes import CategoricalDtype
@@ -2773,3 +2774,26 @@ def test_merge_arrow_and_numpy_dtypes(dtype):
27732774
result = df2.merge(df)
27742775
expected = df2.copy()
27752776
tm.assert_frame_equal(result, expected)
2777+
2778+
2779+
@pytest.mark.parametrize("tzinfo", [None, pytz.timezone("America/Chicago")])
2780+
def test_merge_datetime_different_resolution(tzinfo):
2781+
# https://github.com/pandas-dev/pandas/issues/53200
2782+
df1 = DataFrame(
2783+
{
2784+
"t": [pd.Timestamp(2023, 5, 12, tzinfo=tzinfo, unit="ns")],
2785+
"a": [1],
2786+
}
2787+
)
2788+
df2 = df1.copy()
2789+
df2["t"] = df2["t"].dt.as_unit("s")
2790+
2791+
expected = DataFrame(
2792+
{
2793+
"t": [pd.Timestamp(2023, 5, 12, tzinfo=tzinfo)],
2794+
"a_x": [1],
2795+
"a_y": [1],
2796+
}
2797+
)
2798+
result = df1.merge(df2, on="t")
2799+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)