Skip to content

Commit 94a6d6a

Browse files
authored
BUG: merge_asof(tolerance=Timedelta) with ArrowDtype (#56488)
1 parent 6399982 commit 94a6d6a

File tree

3 files changed

+36
-4
lines changed

3 files changed

+36
-4
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,7 @@ Reshaping
666666
- Bug in :func:`concat` renaming :class:`Series` when ``ignore_index=False`` (:issue:`15047`)
667667
- Bug in :func:`merge_asof` raising ``TypeError`` when ``by`` dtype is not ``object``, ``int64``, or ``uint64`` (:issue:`22794`)
668668
- Bug in :func:`merge_asof` raising incorrect error for string dtype (:issue:`56444`)
669+
- Bug in :func:`merge_asof` when using a :class:`Timedelta` tolerance on a :class:`ArrowDtype` column (:issue:`56486`)
669670
- Bug in :func:`merge` returning columns in incorrect order when left and/or right is empty (:issue:`51929`)
670671
- Bug in :meth:`DataFrame.melt` where an exception was raised if ``var_name`` was not a string (:issue:`55948`)
671672
- Bug in :meth:`DataFrame.melt` where it would not preserve the datetime (:issue:`55254`)

pandas/core/reshape/merge.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -2071,7 +2071,9 @@ def _validate_tolerance(self, left_join_keys: list[ArrayLike]) -> None:
20712071
f"with type {repr(lt.dtype)}"
20722072
)
20732073

2074-
if needs_i8_conversion(lt.dtype):
2074+
if needs_i8_conversion(lt.dtype) or (
2075+
isinstance(lt, ArrowExtensionArray) and lt.dtype.kind in "mM"
2076+
):
20752077
if not isinstance(self.tolerance, datetime.timedelta):
20762078
raise MergeError(msg)
20772079
if self.tolerance < Timedelta(0):
@@ -2137,15 +2139,21 @@ def _get_join_indexers(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]
21372139
if tolerance is not None:
21382140
# TODO: can we reuse a tolerance-conversion function from
21392141
# e.g. TimedeltaIndex?
2140-
if needs_i8_conversion(left_values.dtype):
2142+
if needs_i8_conversion(left_values.dtype) or (
2143+
isinstance(left_values, ArrowExtensionArray)
2144+
and left_values.dtype.kind in "mM"
2145+
):
21412146
tolerance = Timedelta(tolerance)
21422147
# TODO: we have no test cases with PeriodDtype here; probably
21432148
# need to adjust tolerance for that case.
21442149
if left_values.dtype.kind in "mM":
21452150
# Make sure the i8 representation for tolerance
21462151
# matches that for left_values/right_values.
2147-
lvs = ensure_wrapped_if_datetimelike(left_values)
2148-
tolerance = tolerance.as_unit(lvs.unit)
2152+
if isinstance(left_values, ArrowExtensionArray):
2153+
unit = left_values.dtype.pyarrow_dtype.unit
2154+
else:
2155+
unit = ensure_wrapped_if_datetimelike(left_values).unit
2156+
tolerance = tolerance.as_unit(unit)
21492157

21502158
tolerance = tolerance._value
21512159

pandas/tests/reshape/merge/test_merge_asof.py

+23
Original file line numberDiff line numberDiff line change
@@ -3577,6 +3577,29 @@ def test_merge_asof_extension_dtype(dtype):
35773577
tm.assert_frame_equal(result, expected)
35783578

35793579

3580+
@td.skip_if_no("pyarrow")
3581+
def test_merge_asof_pyarrow_td_tolerance():
3582+
# GH 56486
3583+
ser = pd.Series(
3584+
[datetime.datetime(2023, 1, 1)], dtype="timestamp[us, UTC][pyarrow]"
3585+
)
3586+
df = pd.DataFrame(
3587+
{
3588+
"timestamp": ser,
3589+
"value": [1],
3590+
}
3591+
)
3592+
result = merge_asof(df, df, on="timestamp", tolerance=Timedelta("1s"))
3593+
expected = pd.DataFrame(
3594+
{
3595+
"timestamp": ser,
3596+
"value_x": [1],
3597+
"value_y": [1],
3598+
}
3599+
)
3600+
tm.assert_frame_equal(result, expected)
3601+
3602+
35803603
def test_merge_asof_read_only_ndarray():
35813604
# GH 53513
35823605
left = pd.Series([2], index=[2], name="left")

0 commit comments

Comments
 (0)