diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 649ad37a56b35..f95c9465859cb 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -761,6 +761,7 @@ Datetimelike - Bug in parsing datetime strings with nanosecond resolution with non-ISO8601 formats incorrectly truncating sub-microsecond components (:issue:`56051`) - Bug in parsing datetime strings with sub-second resolution and trailing zeros incorrectly inferring second or millisecond resolution (:issue:`55737`) - Bug in the results of :func:`to_datetime` with an floating-dtype argument with ``unit`` not matching the pointwise results of :class:`Timestamp` (:issue:`56037`) +- Fixed bug in :meth:`DataFrame.merge` not being able to join on ``datetime64`` columns of differing resolutions (:issue:`55212`) - Fixed regression where :func:`concat` would raise an error when concatenating ``datetime64`` columns with differing resolutions (:issue:`53641`) Timedelta diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 4aff99dc42250..6e64a60ae4502 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1083,18 +1083,11 @@ def _maybe_add_join_keys( result_dtype = lvals.dtype else: key_col = Index(lvals) + result_dtype = find_common_type([lvals.dtype, rvals.dtype]) + key_col = key_col.astype(result_dtype, copy=False) if left_indexer is not None: mask_left = left_indexer == -1 key_col = key_col.where(~mask_left, rvals) - result_dtype = find_common_type([lvals.dtype, rvals.dtype]) - if ( - lvals.dtype.kind == "M" - and rvals.dtype.kind == "M" - and result_dtype.kind == "O" - ): - # TODO(non-nano) Workaround for common_type not dealing - # with different resolutions - result_dtype = key_col.dtype if result._is_label_reference(name): result[name] = result._constructor_sliced( diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 2505d9163a6d2..cf3cc3eb8bdc4 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2816,7 +2816,8 @@ def test_merge_arrow_and_numpy_dtypes(dtype): @pytest.mark.parametrize("how", ["inner", "left", "outer", "right"]) @pytest.mark.parametrize("tz", [None, "America/Chicago"]) -def test_merge_datetime_different_resolution(tz, how): +@pytest.mark.parametrize("unit", ["us", "ms", "s"]) +def test_merge_datetime_different_resolution(tz, how, unit): # https://github.com/pandas-dev/pandas/issues/53200 vals = [ pd.Timestamp(2023, 5, 12, tz=tz), @@ -2826,19 +2827,31 @@ def test_merge_datetime_different_resolution(tz, how): df1 = DataFrame({"t": vals[:2], "a": [1.0, 2.0]}) df1["t"] = df1["t"].dt.as_unit("ns") df2 = DataFrame({"t": vals[1:], "b": [1.0, 2.0]}) - df2["t"] = df2["t"].dt.as_unit("s") + df2["t"] = df2["t"].dt.as_unit(unit) expected = DataFrame({"t": vals, "a": [1.0, 2.0, np.nan], "b": [np.nan, 1.0, 2.0]}) expected["t"] = expected["t"].dt.as_unit("ns") if how == "inner": - expected = expected.iloc[[1]].reset_index(drop=True) + expected1 = expected.iloc[[1]].reset_index(drop=True) + expected2 = expected1 elif how == "left": - expected = expected.iloc[[0, 1]] + expected1 = expected.iloc[[0, 1]] + expected2 = expected.iloc[[1, 2]].reset_index(drop=True) elif how == "right": - expected = expected.iloc[[1, 2]].reset_index(drop=True) + expected1 = expected.iloc[[1, 2]].reset_index(drop=True) + expected2 = expected.iloc[[0, 1]] + else: + expected1 = expected + expected2 = expected result = df1.merge(df2, on="t", how=how) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected1) + + # Check lower resolution to higher resolution also works + # GH55212 + expected2 = expected2[["t", "b", "a"]] + result1 = df2.merge(df1, on="t", how=how) + tm.assert_frame_equal(result1, expected2) def test_merge_multiindex_single_level():