Skip to content

BUG: merge_asof with non-nano #50835

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def _unbox_scalar(self, value) -> np.timedelta64:
raise ValueError("'value' should be a Timedelta.")
self._check_compatible_with(value)
if value is NaT:
return np.timedelta64(value.value, "ns")
return np.timedelta64(value.value, self.unit)
else:
return value.as_unit(self.unit).asm8

Expand Down
21 changes: 18 additions & 3 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,10 @@
)
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
import pandas.core.common as com
from pandas.core.construction import extract_array
from pandas.core.construction import (
ensure_wrapped_if_datetimelike,
extract_array,
)
from pandas.core.frame import _merge_doc
from pandas.core.indexes.api import default_index
from pandas.core.sorting import is_int64_overflow_possible
Expand Down Expand Up @@ -2109,12 +2112,24 @@ def injection(obj):

# initial type conversion as needed
if needs_i8_conversion(left_values):
left_values = left_values.view("i8")
right_values = right_values.view("i8")
if tolerance is not None:
tolerance = Timedelta(tolerance)

# TODO: we have no test cases with PeriodDtype here; probably
# need to adjust tolerance for that case.
if left_values.dtype.kind in ["m", "M"]:
# Make sure the i8 representation for tolerance
# matches that for left_values/right_values.
lvs = ensure_wrapped_if_datetimelike(left_values)
tolerance = tolerance.as_unit(lvs.unit)

tolerance = tolerance.value

# TODO: require left_values.dtype == right_values.dtype, or at least
# comparable for e.g. dt64tz
left_values = left_values.view("i8")
right_values = right_values.view("i8")

# a "by" parameter requires special handling
if self.left_by is not None:
# remove 'on' parameter from values if one existed
Expand Down
54 changes: 40 additions & 14 deletions pandas/tests/reshape/merge/test_merge_asof.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@
from pandas.core.reshape.merge import MergeError


@pytest.fixture(params=["s", "ms", "us", "ns"])
def unit(request):
"""
Resolution for datetimelike dtypes.
"""
return request.param


class TestAsOfMerge:
def read_data(self, datapath, name, dedupe=False):
path = datapath("reshape", "merge", "data", name)
Expand Down Expand Up @@ -63,8 +71,13 @@ def test_examples1(self):
result = merge_asof(left, right, on="a")
tm.assert_frame_equal(result, expected)

def test_examples2(self):
def test_examples2(self, unit):
"""doc-string examples"""
if unit == "s":
pytest.skip(
"This test is invalid for unit='s' because that would "
"round the trades['time']]"
)
trades = pd.DataFrame(
{
"time": to_datetime(
Expand All @@ -75,7 +88,7 @@ def test_examples2(self):
"20160525 13:30:00.048",
"20160525 13:30:00.048",
]
),
).astype(f"M8[{unit}]"),
"ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
"price": [51.95, 51.95, 720.77, 720.92, 98.00],
"quantity": [75, 155, 100, 100, 100],
Expand All @@ -96,7 +109,7 @@ def test_examples2(self):
"20160525 13:30:00.072",
"20160525 13:30:00.075",
]
),
).astype(f"M8[{unit}]"),
"ticker": [
"GOOG",
"MSFT",
Expand Down Expand Up @@ -127,7 +140,7 @@ def test_examples2(self):
"20160525 13:30:00.048",
"20160525 13:30:00.048",
]
),
).astype(f"M8[{unit}]"),
"ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
"price": [51.95, 51.95, 720.77, 720.92, 98.00],
"quantity": [75, 155, 100, 100, 100],
Expand Down Expand Up @@ -639,7 +652,7 @@ def test_tolerance_nearest(self):
result = merge_asof(left, right, on="a", direction="nearest", tolerance=1)
tm.assert_frame_equal(result, expected)

def test_tolerance_tz(self):
def test_tolerance_tz(self, unit):
# GH 14844
left = pd.DataFrame(
{
Expand All @@ -648,6 +661,7 @@ def test_tolerance_tz(self):
freq="D",
periods=5,
tz=pytz.timezone("UTC"),
unit=unit,
),
"value1": np.arange(5),
}
Expand All @@ -659,6 +673,7 @@ def test_tolerance_tz(self):
freq="D",
periods=5,
tz=pytz.timezone("UTC"),
unit=unit,
),
"value2": list("ABCDE"),
}
Expand All @@ -672,6 +687,7 @@ def test_tolerance_tz(self):
freq="D",
periods=5,
tz=pytz.timezone("UTC"),
unit=unit,
),
"value1": np.arange(5),
"value2": list("BCDEE"),
Expand Down Expand Up @@ -1314,22 +1330,27 @@ def test_by_mixed_tz_aware(self):
expected["value_y"] = np.array([np.nan], dtype=object)
tm.assert_frame_equal(result, expected)

def test_timedelta_tolerance_nearest(self):
def test_timedelta_tolerance_nearest(self, unit):
# GH 27642
if unit == "s":
pytest.skip(
"This test is invalid with unit='s' because that would "
"round left['time']"
)

left = pd.DataFrame(
list(zip([0, 5, 10, 15, 20, 25], [0, 1, 2, 3, 4, 5])),
columns=["time", "left"],
)

left["time"] = pd.to_timedelta(left["time"], "ms")
left["time"] = pd.to_timedelta(left["time"], "ms").astype(f"m8[{unit}]")

right = pd.DataFrame(
list(zip([0, 3, 9, 12, 15, 18], [0, 1, 2, 3, 4, 5])),
columns=["time", "right"],
)

right["time"] = pd.to_timedelta(right["time"], "ms")
right["time"] = pd.to_timedelta(right["time"], "ms").astype(f"m8[{unit}]")

expected = pd.DataFrame(
list(
Expand All @@ -1342,7 +1363,7 @@ def test_timedelta_tolerance_nearest(self):
columns=["time", "left", "right"],
)

expected["time"] = pd.to_timedelta(expected["time"], "ms")
expected["time"] = pd.to_timedelta(expected["time"], "ms").astype(f"m8[{unit}]")

result = merge_asof(
left, right, on="time", tolerance=Timedelta("1ms"), direction="nearest"
Expand Down Expand Up @@ -1400,12 +1421,17 @@ def test_merge_index_column_tz(self):
)
tm.assert_frame_equal(result, expected)

def test_left_index_right_index_tolerance(self):
def test_left_index_right_index_tolerance(self, unit):
# https://github.com/pandas-dev/pandas/issues/35558
dr1 = pd.date_range(start="1/1/2020", end="1/20/2020", freq="2D") + Timedelta(
seconds=0.4
)
dr2 = pd.date_range(start="1/1/2020", end="2/1/2020")
if unit == "s":
pytest.skip(
"This test is invalid with unit='s' because that would round dr1"
)

dr1 = pd.date_range(
start="1/1/2020", end="1/20/2020", freq="2D", unit=unit
) + Timedelta(seconds=0.4).as_unit(unit)
dr2 = pd.date_range(start="1/1/2020", end="2/1/2020", unit=unit)

df1 = pd.DataFrame({"val1": "foo"}, index=pd.DatetimeIndex(dr1))
df2 = pd.DataFrame({"val2": "bar"}, index=pd.DatetimeIndex(dr2))
Expand Down