-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Add more overflow tests for timedelta64 operations #46854
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 13 commits
f0b34b0
287ca88
992ca95
1ea03e6
2868200
eb1f61b
ac91c58
b8e4a51
4c72f1e
5ef0a48
aeef81c
438339d
e86d0df
bd62e08
1424dee
3369bbc
6c3b482
c14d0c8
f2e0ba4
3f89526
e03a902
6ca60f5
15ef834
d4d2160
13d198e
b37aa4d
cd59647
05f4137
7a6a8cf
7ce2082
24fbc09
d6069d3
2f24ec7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,20 @@ | |
datetime, | ||
timedelta, | ||
) | ||
from itertools import ( | ||
chain, | ||
combinations_with_replacement, | ||
product, | ||
) | ||
from operator import attrgetter | ||
from typing import ( | ||
NamedTuple, | ||
Type, | ||
Union, | ||
) | ||
|
||
from hypothesis import given | ||
import hypothesis.strategies as st | ||
import numpy as np | ||
import pytest | ||
|
||
|
@@ -31,12 +44,92 @@ | |
Int64Index, | ||
UInt64Index, | ||
) | ||
from pandas.core.arrays import ( | ||
DatetimeArray, | ||
TimedeltaArray, | ||
) | ||
from pandas.tests.arithmetic.common import ( | ||
assert_invalid_addsub_type, | ||
assert_invalid_comparison, | ||
get_upcast_box, | ||
) | ||
|
||
timedelta_types = (Timedelta, TimedeltaArray, TimedeltaIndex, Series, DataFrame) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Most of the newly-added overflow tests are parameterized over There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that assumption is accurate id prefer to keep the Timedelta scalar tests in tests.scalar.timedelta, then use the box_with_array fixture for the non-scalar tests. part of the virtue of this is that when writing the non-scalar tests, we assume that the scalar behavior is correct and tested. |
||
timestamp_types = (Timestamp, DatetimeArray, DatetimeIndex, Series, DataFrame) | ||
containers = slice(1, None) | ||
patrickmckenna marked this conversation as resolved.
Show resolved
Hide resolved
|
||
get_item_names = lambda t: "-".join(map(attrgetter("__name__"), t)) | ||
|
||
|
||
class BinaryOpTypes(NamedTuple): | ||
""" | ||
The expected operand and result types for a binary operation. | ||
""" | ||
|
||
left: Type | ||
right: Type | ||
result: Type | ||
|
||
def __str__(self) -> str: | ||
return get_item_names(self) | ||
|
||
def __repr__(self) -> str: | ||
return f"BinaryOpTypes({self})" | ||
|
||
|
||
positive_tds = st.integers(min_value=1, max_value=Timedelta.max.value).map(Timedelta) | ||
|
||
xfail_no_overflow_check = pytest.mark.xfail(reason="No overflow check") | ||
|
||
|
||
@pytest.fixture( | ||
name="add_sub_types", | ||
scope="module", | ||
params=tuple(combinations_with_replacement(timedelta_types, 2)), | ||
ids=get_item_names, | ||
) | ||
def fixture_add_sub_types(request) -> BinaryOpTypes: | ||
""" | ||
Expected types when adding, subtracting Timedeltas. | ||
""" | ||
return_type = max(request.param, key=lambda t: timedelta_types.index(t)) | ||
return BinaryOpTypes(request.param[0], request.param[1], return_type) | ||
|
||
|
||
@pytest.fixture( | ||
name="ts_add_sub_types", | ||
scope="module", | ||
params=tuple(product(timedelta_types, timestamp_types)), | ||
ids=get_item_names, | ||
) | ||
def fixture_ts_add_sub_types(request) -> BinaryOpTypes: | ||
""" | ||
Expected types when adding, subtracting Timedeltas and Timestamps. | ||
""" | ||
type_hierarchy = { | ||
name: i | ||
for i, name in chain(enumerate(timedelta_types), enumerate(timestamp_types)) | ||
} | ||
return_type = timestamp_types[max(type_hierarchy[t] for t in request.param)] | ||
|
||
return BinaryOpTypes(request.param[0], request.param[1], return_type) | ||
|
||
|
||
def wrap_value(value: Union[Timestamp, Timedelta], type_): | ||
""" | ||
Return value wrapped in a container of given type_, or as-is if type_ is a scalar. | ||
""" | ||
if issubclass(type_, (Timedelta, Timestamp)): | ||
return type_(value) | ||
|
||
if issubclass(type_, pd.core.arrays.ExtensionArray): | ||
box_cls = pd.array | ||
elif issubclass(type_, pd.Index): | ||
box_cls = pd.Index | ||
else: | ||
box_cls = type_ | ||
|
||
return type_(tm.box_expected([value], box_cls)) | ||
|
||
|
||
def assert_dtype(obj, expected_dtype): | ||
""" | ||
|
@@ -275,6 +368,123 @@ def test_comparisons_coverage(self): | |
# Timedelta64[ns] dtype Arithmetic Operations | ||
|
||
|
||
@given(positive_td=positive_tds) | ||
def test_add_raises_expected_error_if_result_would_overflow( | ||
add_sub_types: BinaryOpTypes, | ||
positive_td: Timedelta, | ||
): | ||
left = wrap_value(Timedelta.max, add_sub_types.left) | ||
right = wrap_value(positive_td, add_sub_types.right) | ||
|
||
if add_sub_types.result is Timedelta: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be raising a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. probably, yes |
||
msg = "|".join( | ||
[ | ||
"int too big to convert", | ||
"Python int too large to convert to C long", | ||
] | ||
) | ||
else: | ||
msg = "Overflow in int64 addition" | ||
|
||
with pytest.raises(OverflowError, match=msg): | ||
left + right | ||
|
||
with pytest.raises(OverflowError, match=msg): | ||
right + left | ||
|
||
|
||
@xfail_no_overflow_check | ||
@given(positive_td=positive_tds) | ||
def test_sub_raises_expected_error_if_result_would_overflow( | ||
add_sub_types: BinaryOpTypes, | ||
positive_td: Timedelta, | ||
): | ||
left = wrap_value(Timedelta.min, add_sub_types.left) | ||
right = wrap_value(positive_td, add_sub_types.right) | ||
|
||
msg = "Overflow in int64 addition" | ||
with pytest.raises(OverflowError, match=msg): | ||
left - right | ||
|
||
with pytest.raises(OverflowError, match=msg): | ||
(-1 * right) - abs(left) | ||
|
||
|
||
@given(td_value=positive_tds) | ||
def test_add_timestamp_raises_expected_error_if_result_would_overflow( | ||
ts_add_sub_types: BinaryOpTypes, | ||
td_value: Timedelta, | ||
): | ||
left = wrap_value(td_value, ts_add_sub_types.left) | ||
right = wrap_value(Timestamp.max, ts_add_sub_types.right) | ||
|
||
ex = (OutOfBoundsDatetime, OverflowError) | ||
msg = "|".join(["Out of bounds nanosecond timestamp", "Overflow in int64 addition"]) | ||
|
||
with pytest.raises(ex, match=msg): | ||
left + right | ||
|
||
with pytest.raises(ex, match=msg): | ||
right + left | ||
|
||
|
||
@xfail_no_overflow_check | ||
@given(td_value=positive_tds) | ||
def test_sub_timestamp_raises_expected_error_if_result_would_overflow( | ||
ts_add_sub_types: BinaryOpTypes, | ||
td_value: Timedelta, | ||
): | ||
right = wrap_value(td_value, ts_add_sub_types[0]) | ||
left = wrap_value(Timestamp.min, ts_add_sub_types[1]) | ||
|
||
ex = (OutOfBoundsDatetime, OverflowError) | ||
msg = "|".join(["Out of bounds nanosecond timestamp", "Overflow in int64 addition"]) | ||
|
||
with pytest.raises(ex, match=msg): | ||
left - right | ||
|
||
|
||
@given(value=st.floats().filter(lambda f: abs(f) > 1)) | ||
def test_scalar_multiplication_raises_expected_error_if_result_would_overflow( | ||
value: float, | ||
): | ||
td = Timedelta.max | ||
|
||
msg = "|".join( | ||
[ | ||
"cannot convert float infinity to integer", | ||
"Python int too large to convert to C long", | ||
"int too big to convert", | ||
] | ||
) | ||
with pytest.raises(OverflowError, match=msg): | ||
td * value | ||
|
||
with pytest.raises(OverflowError, match=msg): | ||
value * td | ||
|
||
|
||
@xfail_no_overflow_check | ||
@given(value=st.floats().filter(lambda f: abs(f) > 1)) | ||
@pytest.mark.parametrize( | ||
argnames="td_type", | ||
argvalues=timedelta_types[containers], # type: ignore[arg-type] | ||
ids=attrgetter("__name__"), | ||
) | ||
def test_container_scalar_multiplication_raises_expected_error_if_result_would_overflow( | ||
value: float, | ||
td_type: Type, | ||
): | ||
td = wrap_value(Timedelta.max, td_type) | ||
|
||
msg = "Overflow in int64 addition" | ||
with pytest.raises(OverflowError, match=msg): | ||
td * value | ||
|
||
with pytest.raises(OverflowError, match=msg): | ||
value * td | ||
|
||
|
||
class TestTimedelta64ArithmeticUnsorted: | ||
# Tests moved from type-specific test files but not | ||
# yet sorted/parametrized/de-duplicated | ||
|
@@ -687,30 +897,6 @@ def test_tdarr_add_timestamp_nat_masking(self, box_with_array, str_ts): | |
assert res[1] is NaT | ||
|
||
def test_tdi_add_overflow(self): | ||
# See GH#14068 | ||
# preliminary test scalar analogue of vectorized tests below | ||
# TODO: Make raised error message more informative and test | ||
with pytest.raises(OutOfBoundsDatetime, match="10155196800000000000"): | ||
pd.to_timedelta(106580, "D") + Timestamp("2000") | ||
with pytest.raises(OutOfBoundsDatetime, match="10155196800000000000"): | ||
Timestamp("2000") + pd.to_timedelta(106580, "D") | ||
|
||
_NaT = NaT.value + 1 | ||
msg = "Overflow in int64 addition" | ||
with pytest.raises(OverflowError, match=msg): | ||
pd.to_timedelta([106580], "D") + Timestamp("2000") | ||
with pytest.raises(OverflowError, match=msg): | ||
Timestamp("2000") + pd.to_timedelta([106580], "D") | ||
with pytest.raises(OverflowError, match=msg): | ||
pd.to_timedelta([_NaT]) - Timedelta("1 days") | ||
with pytest.raises(OverflowError, match=msg): | ||
pd.to_timedelta(["5 days", _NaT]) - Timedelta("1 days") | ||
with pytest.raises(OverflowError, match=msg): | ||
( | ||
pd.to_timedelta([_NaT, "5 days", "1 hours"]) | ||
- pd.to_timedelta(["7 seconds", _NaT, "4 hours"]) | ||
) | ||
|
||
# These should not overflow! | ||
exp = TimedeltaIndex([NaT]) | ||
result = pd.to_timedelta([NaT]) - Timedelta("1 days") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can't use
type
and|
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wish, that's a
3.10
feature.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
even if we do
from __future__ import annotations
? im sure we use pipes elsewhereThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Huh, I'd tried that and switching to pipes, and it was failing for type aliases like
TD64_BOX_TYPE = TimedeltaArray | TimedeltaIndex | Series | DataFrame
. But TIL: pipes will work for function annotations (with thatimport
).type
does work. I'll switch to that, and replace the aliases with pipes.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Huh, I'd tried that and switching to pipes, and it was failing for type aliases like
TD64_BOX_TYPE = TimedeltaArray | TimedeltaIndex | Series | DataFrame
. But TIL: pipes will work for function annotations (with thatimport
).type
does work. I'll switch to that, and replace the aliases with pipes.