Skip to content

Commit def01cf

Browse files
ianzurTomAugspurger
authored andcommitted
BUG: timedelta merge asof with tolerance (pandas-dev#27650)
* issue pandas-dev#27642 - timedelta merge asof with tolerance
1 parent b1c871c commit def01cf

File tree

3 files changed

+53
-6
lines changed

3 files changed

+53
-6
lines changed

doc/source/whatsnew/v0.25.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ Reshaping
9595
^^^^^^^^^
9696

9797
- A ``KeyError`` is now raised if ``.unstack()`` is called on a :class:`Series` or :class:`DataFrame` with a flat :class:`Index` passing a name which is not the correct one (:issue:`18303`)
98+
- Bug :meth:`merge_asof` could not merge :class:`Timedelta` objects when passing `tolerance` kwarg (:issue:`27642`)
9899
- Bug in :meth:`DataFrame.crosstab` when ``margins`` set to ``True`` and ``normalize`` is not ``False``, an error is raised. (:issue:`27500`)
99100
- :meth:`DataFrame.join` now suppresses the ``FutureWarning`` when the sort parameter is specified (:issue:`21952`)
100101
- Bug in :meth:`DataFrame.join` raising with readonly arrays (:issue:`27943`)

pandas/core/reshape/merge.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
is_bool,
2323
is_bool_dtype,
2424
is_categorical_dtype,
25-
is_datetime64_dtype,
2625
is_datetime64tz_dtype,
2726
is_datetimelike,
2827
is_dtype_equal,
@@ -1635,7 +1634,7 @@ def _get_merge_keys(self):
16351634
)
16361635
)
16371636

1638-
if is_datetime64_dtype(lt) or is_datetime64tz_dtype(lt):
1637+
if is_datetimelike(lt):
16391638
if not isinstance(self.tolerance, Timedelta):
16401639
raise MergeError(msg)
16411640
if self.tolerance < Timedelta(0):

pandas/tests/reshape/merge/test_merge_asof.py

+51-4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import datetime
2+
13
import numpy as np
24
import pytest
35
import pytz
@@ -588,14 +590,23 @@ def test_non_sorted(self):
588590
# ok, though has dupes
589591
merge_asof(trades, self.quotes, on="time", by="ticker")
590592

591-
def test_tolerance(self):
593+
@pytest.mark.parametrize(
594+
"tolerance",
595+
[
596+
Timedelta("1day"),
597+
pytest.param(
598+
datetime.timedelta(days=1),
599+
marks=pytest.mark.xfail(reason="not implemented", strict=True),
600+
),
601+
],
602+
ids=["pd.Timedelta", "datetime.timedelta"],
603+
)
604+
def test_tolerance(self, tolerance):
592605

593606
trades = self.trades
594607
quotes = self.quotes
595608

596-
result = merge_asof(
597-
trades, quotes, on="time", by="ticker", tolerance=Timedelta("1day")
598-
)
609+
result = merge_asof(trades, quotes, on="time", by="ticker", tolerance=tolerance)
599610
expected = self.tolerance
600611
assert_frame_equal(result, expected)
601612

@@ -1246,3 +1257,39 @@ def test_by_mixed_tz_aware(self):
12461257
)
12471258
expected["value_y"] = np.array([np.nan], dtype=object)
12481259
assert_frame_equal(result, expected)
1260+
1261+
def test_timedelta_tolerance_nearest(self):
1262+
# GH 27642
1263+
1264+
left = pd.DataFrame(
1265+
list(zip([0, 5, 10, 15, 20, 25], [0, 1, 2, 3, 4, 5])),
1266+
columns=["time", "left"],
1267+
)
1268+
1269+
left["time"] = pd.to_timedelta(left["time"], "ms")
1270+
1271+
right = pd.DataFrame(
1272+
list(zip([0, 3, 9, 12, 15, 18], [0, 1, 2, 3, 4, 5])),
1273+
columns=["time", "right"],
1274+
)
1275+
1276+
right["time"] = pd.to_timedelta(right["time"], "ms")
1277+
1278+
expected = pd.DataFrame(
1279+
list(
1280+
zip(
1281+
[0, 5, 10, 15, 20, 25],
1282+
[0, 1, 2, 3, 4, 5],
1283+
[0, np.nan, 2, 4, np.nan, np.nan],
1284+
)
1285+
),
1286+
columns=["time", "left", "right"],
1287+
)
1288+
1289+
expected["time"] = pd.to_timedelta(expected["time"], "ms")
1290+
1291+
result = pd.merge_asof(
1292+
left, right, on="time", tolerance=Timedelta("1ms"), direction="nearest"
1293+
)
1294+
1295+
assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)