Skip to content

Commit 5120720

Browse files
authored
BUG: merge with non-nano (#50835)
1 parent 08a7a9e commit 5120720

File tree

3 files changed

+59
-18
lines changed

3 files changed

+59
-18
lines changed

pandas/core/arrays/timedeltas.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ def _unbox_scalar(self, value) -> np.timedelta64:
316316
raise ValueError("'value' should be a Timedelta.")
317317
self._check_compatible_with(value)
318318
if value is NaT:
319-
return np.timedelta64(value.value, "ns")
319+
return np.timedelta64(value.value, self.unit)
320320
else:
321321
return value.as_unit(self.unit).asm8
322322

pandas/core/reshape/merge.py

+18-3
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,10 @@
8989
)
9090
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
9191
import pandas.core.common as com
92-
from pandas.core.construction import extract_array
92+
from pandas.core.construction import (
93+
ensure_wrapped_if_datetimelike,
94+
extract_array,
95+
)
9396
from pandas.core.frame import _merge_doc
9497
from pandas.core.indexes.api import default_index
9598
from pandas.core.sorting import is_int64_overflow_possible
@@ -2109,12 +2112,24 @@ def injection(obj):
21092112

21102113
# initial type conversion as needed
21112114
if needs_i8_conversion(left_values):
2112-
left_values = left_values.view("i8")
2113-
right_values = right_values.view("i8")
21142115
if tolerance is not None:
21152116
tolerance = Timedelta(tolerance)
2117+
2118+
# TODO: we have no test cases with PeriodDtype here; probably
2119+
# need to adjust tolerance for that case.
2120+
if left_values.dtype.kind in ["m", "M"]:
2121+
# Make sure the i8 representation for tolerance
2122+
# matches that for left_values/right_values.
2123+
lvs = ensure_wrapped_if_datetimelike(left_values)
2124+
tolerance = tolerance.as_unit(lvs.unit)
2125+
21162126
tolerance = tolerance.value
21172127

2128+
# TODO: require left_values.dtype == right_values.dtype, or at least
2129+
# comparable for e.g. dt64tz
2130+
left_values = left_values.view("i8")
2131+
right_values = right_values.view("i8")
2132+
21182133
# a "by" parameter requires special handling
21192134
if self.left_by is not None:
21202135
# remove 'on' parameter from values if one existed

pandas/tests/reshape/merge/test_merge_asof.py

+40-14
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,14 @@
1616
from pandas.core.reshape.merge import MergeError
1717

1818

19+
@pytest.fixture(params=["s", "ms", "us", "ns"])
20+
def unit(request):
21+
"""
22+
Resolution for datetimelike dtypes.
23+
"""
24+
return request.param
25+
26+
1927
class TestAsOfMerge:
2028
def read_data(self, datapath, name, dedupe=False):
2129
path = datapath("reshape", "merge", "data", name)
@@ -63,8 +71,13 @@ def test_examples1(self):
6371
result = merge_asof(left, right, on="a")
6472
tm.assert_frame_equal(result, expected)
6573

66-
def test_examples2(self):
74+
def test_examples2(self, unit):
6775
"""doc-string examples"""
76+
if unit == "s":
77+
pytest.skip(
78+
"This test is invalid for unit='s' because that would "
79+
"round the trades['time']]"
80+
)
6881
trades = pd.DataFrame(
6982
{
7083
"time": to_datetime(
@@ -75,7 +88,7 @@ def test_examples2(self):
7588
"20160525 13:30:00.048",
7689
"20160525 13:30:00.048",
7790
]
78-
),
91+
).astype(f"M8[{unit}]"),
7992
"ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
8093
"price": [51.95, 51.95, 720.77, 720.92, 98.00],
8194
"quantity": [75, 155, 100, 100, 100],
@@ -96,7 +109,7 @@ def test_examples2(self):
96109
"20160525 13:30:00.072",
97110
"20160525 13:30:00.075",
98111
]
99-
),
112+
).astype(f"M8[{unit}]"),
100113
"ticker": [
101114
"GOOG",
102115
"MSFT",
@@ -127,7 +140,7 @@ def test_examples2(self):
127140
"20160525 13:30:00.048",
128141
"20160525 13:30:00.048",
129142
]
130-
),
143+
).astype(f"M8[{unit}]"),
131144
"ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],
132145
"price": [51.95, 51.95, 720.77, 720.92, 98.00],
133146
"quantity": [75, 155, 100, 100, 100],
@@ -639,7 +652,7 @@ def test_tolerance_nearest(self):
639652
result = merge_asof(left, right, on="a", direction="nearest", tolerance=1)
640653
tm.assert_frame_equal(result, expected)
641654

642-
def test_tolerance_tz(self):
655+
def test_tolerance_tz(self, unit):
643656
# GH 14844
644657
left = pd.DataFrame(
645658
{
@@ -648,6 +661,7 @@ def test_tolerance_tz(self):
648661
freq="D",
649662
periods=5,
650663
tz=pytz.timezone("UTC"),
664+
unit=unit,
651665
),
652666
"value1": np.arange(5),
653667
}
@@ -659,6 +673,7 @@ def test_tolerance_tz(self):
659673
freq="D",
660674
periods=5,
661675
tz=pytz.timezone("UTC"),
676+
unit=unit,
662677
),
663678
"value2": list("ABCDE"),
664679
}
@@ -672,6 +687,7 @@ def test_tolerance_tz(self):
672687
freq="D",
673688
periods=5,
674689
tz=pytz.timezone("UTC"),
690+
unit=unit,
675691
),
676692
"value1": np.arange(5),
677693
"value2": list("BCDEE"),
@@ -1314,22 +1330,27 @@ def test_by_mixed_tz_aware(self):
13141330
expected["value_y"] = np.array([np.nan], dtype=object)
13151331
tm.assert_frame_equal(result, expected)
13161332

1317-
def test_timedelta_tolerance_nearest(self):
1333+
def test_timedelta_tolerance_nearest(self, unit):
13181334
# GH 27642
1335+
if unit == "s":
1336+
pytest.skip(
1337+
"This test is invalid with unit='s' because that would "
1338+
"round left['time']"
1339+
)
13191340

13201341
left = pd.DataFrame(
13211342
list(zip([0, 5, 10, 15, 20, 25], [0, 1, 2, 3, 4, 5])),
13221343
columns=["time", "left"],
13231344
)
13241345

1325-
left["time"] = pd.to_timedelta(left["time"], "ms")
1346+
left["time"] = pd.to_timedelta(left["time"], "ms").astype(f"m8[{unit}]")
13261347

13271348
right = pd.DataFrame(
13281349
list(zip([0, 3, 9, 12, 15, 18], [0, 1, 2, 3, 4, 5])),
13291350
columns=["time", "right"],
13301351
)
13311352

1332-
right["time"] = pd.to_timedelta(right["time"], "ms")
1353+
right["time"] = pd.to_timedelta(right["time"], "ms").astype(f"m8[{unit}]")
13331354

13341355
expected = pd.DataFrame(
13351356
list(
@@ -1342,7 +1363,7 @@ def test_timedelta_tolerance_nearest(self):
13421363
columns=["time", "left", "right"],
13431364
)
13441365

1345-
expected["time"] = pd.to_timedelta(expected["time"], "ms")
1366+
expected["time"] = pd.to_timedelta(expected["time"], "ms").astype(f"m8[{unit}]")
13461367

13471368
result = merge_asof(
13481369
left, right, on="time", tolerance=Timedelta("1ms"), direction="nearest"
@@ -1400,12 +1421,17 @@ def test_merge_index_column_tz(self):
14001421
)
14011422
tm.assert_frame_equal(result, expected)
14021423

1403-
def test_left_index_right_index_tolerance(self):
1424+
def test_left_index_right_index_tolerance(self, unit):
14041425
# https://github.com/pandas-dev/pandas/issues/35558
1405-
dr1 = pd.date_range(start="1/1/2020", end="1/20/2020", freq="2D") + Timedelta(
1406-
seconds=0.4
1407-
)
1408-
dr2 = pd.date_range(start="1/1/2020", end="2/1/2020")
1426+
if unit == "s":
1427+
pytest.skip(
1428+
"This test is invalid with unit='s' because that would round dr1"
1429+
)
1430+
1431+
dr1 = pd.date_range(
1432+
start="1/1/2020", end="1/20/2020", freq="2D", unit=unit
1433+
) + Timedelta(seconds=0.4).as_unit(unit)
1434+
dr2 = pd.date_range(start="1/1/2020", end="2/1/2020", unit=unit)
14091435

14101436
df1 = pd.DataFrame({"val1": "foo"}, index=pd.DatetimeIndex(dr1))
14111437
df2 = pd.DataFrame({"val2": "bar"}, index=pd.DatetimeIndex(dr2))

0 commit comments

Comments
 (0)