-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Add more overflow tests for timedelta64 operations #46854
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 23 commits
f0b34b0
287ca88
992ca95
1ea03e6
2868200
eb1f61b
ac91c58
b8e4a51
4c72f1e
5ef0a48
aeef81c
438339d
e86d0df
bd62e08
1424dee
3369bbc
6c3b482
c14d0c8
f2e0ba4
3f89526
e03a902
6ca60f5
15ef834
d4d2160
13d198e
b37aa4d
cd59647
05f4137
7a6a8cf
7ce2082
24fbc09
d6069d3
2f24ec7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,18 @@ | ||
# Arithmetic tests for DataFrame/Series/Index/Array classes that should | ||
# behave identically. | ||
from contextlib import ( | ||
AbstractContextManager, | ||
nullcontext, | ||
) | ||
from datetime import ( | ||
datetime, | ||
timedelta, | ||
) | ||
from functools import partial | ||
from typing import ( | ||
Type, | ||
Union, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can't use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wish, that's a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. even if we do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Huh, I'd tried that and switching to pipes, and it was failing for type aliases like
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Huh, I'd tried that and switching to pipes, and it was failing for type aliases like
|
||
) | ||
|
||
import numpy as np | ||
import pytest | ||
|
@@ -31,12 +40,33 @@ | |
Int64Index, | ||
UInt64Index, | ||
) | ||
from pandas.core.arrays import ( | ||
DatetimeArray, | ||
TimedeltaArray, | ||
) | ||
from pandas.tests.arithmetic.common import ( | ||
assert_invalid_addsub_type, | ||
assert_invalid_comparison, | ||
get_upcast_box, | ||
) | ||
|
||
TD64_BOX_TYPE = Union[TimedeltaArray, TimedeltaIndex, Series, DataFrame] | ||
TD64_TYPE = Union[Timedelta, TD64_BOX_TYPE] | ||
DT64_TYPE = Union[Timestamp, DatetimeArray, DatetimeIndex, Series, DataFrame] | ||
|
||
|
||
TD64_OVERFLOW_MSG = "|".join( | ||
[ | ||
"int too big to convert", | ||
"Python int too large to convert to C long", | ||
"Overflow in int64 addition", | ||
] | ||
) | ||
|
||
|
||
does_not_raise = nullcontext | ||
td64_overflow_error = partial(pytest.raises, OverflowError, match=TD64_OVERFLOW_MSG) | ||
|
||
|
||
def assert_dtype(obj, expected_dtype): | ||
""" | ||
|
@@ -59,6 +89,59 @@ def get_expected_name(box, names): | |
return exname | ||
|
||
|
||
def get_result_type( | ||
td64_type: Type[TD64_TYPE], | ||
dt64_type: Type[DT64_TYPE], | ||
) -> Type[DT64_TYPE]: | ||
""" | ||
Expected result when adding, subtracting timedelta64-valued box and | ||
datetime64-valued box or scalar. | ||
""" | ||
dt64 = tm.wrap_value(Timestamp.now(), dt64_type) | ||
td64 = tm.wrap_value(Timedelta(0), td64_type) | ||
return type(dt64 + td64) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so this is going to map Timedelta->Timestamp and TimedeltaArray->DatetimeArray but be an identity mapping for everything else? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I might be misunderstanding the question, but think the gist of what you're suggesting is right (assuming Array stands for every box type). So:
Happy to update the docstring, or use a different pattern, if that's unclear. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My preference (mentioned #46854 (comment)) is to handle the Timedelta/Timestamp cases in the existing tests/scalar/... files, and the non-scalar tests here. Then this wrapping becomes unnecessary and you can just use box_with_array There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about scalar/box tests, e.g. |
||
|
||
|
||
@pytest.fixture( | ||
name="td64_type", | ||
params=[Timedelta, TimedeltaArray, TimedeltaIndex, Series, DataFrame], | ||
scope="module", | ||
) | ||
def fixture_td64_type(request) -> Type[TD64_TYPE]: | ||
return request.param | ||
|
||
|
||
@pytest.fixture( | ||
name="dt64_type", | ||
params=[Timestamp, DatetimeArray, DatetimeIndex, Series, DataFrame], | ||
scope="module", | ||
) | ||
def fixture_dt64_type(request) -> Type[DT64_TYPE]: | ||
return request.param | ||
|
||
|
||
@pytest.fixture(name="max_td64") | ||
def fixture_max_td64(box_with_array) -> TD64_BOX_TYPE: | ||
""" | ||
A 1-elem ExtensionArray/Index/Series, or 2x1 DataFrame, w/ all elements set to | ||
Timestamp.max. | ||
""" | ||
return tm.wrap_value(Timedelta.max, box_with_array) | ||
|
||
|
||
@pytest.fixture( | ||
name="positive_td64", | ||
params=[Timedelta(1), Timedelta(1024), Timedelta.max], | ||
ids=["1ns", "1024ns", "td_max"], | ||
) | ||
def fixture_positive_td64(request, td64_type: Type[TD64_TYPE]) -> TD64_TYPE: | ||
""" | ||
A scalar, 1-elem ExtensionArray/Index/Series, or 2x1 DataFrame. | ||
""" | ||
value = request.param | ||
return tm.wrap_value(value, td64_type) | ||
|
||
|
||
# ------------------------------------------------------------------ | ||
# Timedelta64[ns] dtype Comparisons | ||
|
||
|
@@ -687,30 +770,6 @@ def test_tdarr_add_timestamp_nat_masking(self, box_with_array, str_ts): | |
assert res[1] is NaT | ||
|
||
def test_tdi_add_overflow(self): | ||
# See GH#14068 | ||
# preliminary test scalar analogue of vectorized tests below | ||
# TODO: Make raised error message more informative and test | ||
with pytest.raises(OutOfBoundsDatetime, match="10155196800000000000"): | ||
pd.to_timedelta(106580, "D") + Timestamp("2000") | ||
with pytest.raises(OutOfBoundsDatetime, match="10155196800000000000"): | ||
Timestamp("2000") + pd.to_timedelta(106580, "D") | ||
|
||
_NaT = NaT.value + 1 | ||
msg = "Overflow in int64 addition" | ||
with pytest.raises(OverflowError, match=msg): | ||
pd.to_timedelta([106580], "D") + Timestamp("2000") | ||
with pytest.raises(OverflowError, match=msg): | ||
Timestamp("2000") + pd.to_timedelta([106580], "D") | ||
with pytest.raises(OverflowError, match=msg): | ||
pd.to_timedelta([_NaT]) - Timedelta("1 days") | ||
with pytest.raises(OverflowError, match=msg): | ||
pd.to_timedelta(["5 days", _NaT]) - Timedelta("1 days") | ||
with pytest.raises(OverflowError, match=msg): | ||
( | ||
pd.to_timedelta([_NaT, "5 days", "1 hours"]) | ||
- pd.to_timedelta(["7 seconds", _NaT, "4 hours"]) | ||
) | ||
|
||
# These should not overflow! | ||
exp = TimedeltaIndex([NaT]) | ||
result = pd.to_timedelta([NaT]) - Timedelta("1 days") | ||
|
@@ -2089,18 +2148,104 @@ def test_td64arr_pow_invalid(self, scalar_td, box_with_array): | |
td1**scalar_td | ||
|
||
|
||
def test_add_timestamp_to_timedelta(): | ||
# GH: 35897 | ||
timestamp = Timestamp("2021-01-01") | ||
result = timestamp + timedelta_range("0s", "1s", periods=31) | ||
expected = DatetimeIndex( | ||
class TestAddSub: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this organization makes sense, I can open a follow up PR to migrate many of the existing tests into this and similar classes (and make them fully parameterized across all container types). |
||
""" | ||
Addition/subtraction between a timedelta64-valued | ||
ExtensionArrays/Indexes/Series/DataFrames, and a timedelta64 scalar or | ||
timedelta64-valued ExtensionArray/Index/Series/DataFrame. | ||
""" | ||
|
||
def test_add_raises_if_result_would_overflow( | ||
self, | ||
max_td64: TD64_TYPE, | ||
positive_td64: TD64_BOX_TYPE, | ||
): | ||
with td64_overflow_error(): | ||
max_td64 + positive_td64 | ||
|
||
with td64_overflow_error(): | ||
positive_td64 + max_td64 | ||
|
||
@pytest.mark.parametrize( | ||
["rval", "expected_exs"], | ||
[ | ||
timestamp | ||
+ ( | ||
pd.to_timedelta("0.033333333s") * i | ||
+ pd.to_timedelta("0.000000001s") * divmod(i, 3)[0] | ||
) | ||
for i in range(31) | ||
] | ||
(Timedelta(1), does_not_raise()), | ||
(Timedelta(2), td64_overflow_error()), | ||
(Timedelta.max, td64_overflow_error()), | ||
], | ||
) | ||
tm.assert_index_equal(result, expected) | ||
def test_sub_raises_if_result_would_overflow( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FWIW, I tried rewriting this to keep the test function body constant, and mapping each input value to an appropriate xfail_does_not_raise = pytest.mark.xfail(reason="doesn't raise", strict=True)
xfail_raises_overflow_error = pytest.mark.xfail(
reason="raises wrong error",
raises=OverflowError,
strict=True,
)
# ...
@pytest.mark.parametrize(
"rval",
[
pytest.param(Timedelta(1), marks=xfail_does_not_raise),
pytest.param(Timedelta(2), marks=xfail_raises_overflow_error),
pytest.param(Timedelta.max, marks=xfail_raises_overflow_error),
],
)
def test_sub_raises_td64_specific_error_if_result_would_overflow(
self,
max_td64: TD64_BOX_TYPE,
rval: Timedelta,
td64_type: Type[TD64_TYPE],
):
rvalue = tm.wrap_value(rval, td64_type)
min_td64 = -1 * max_td64
with pytest.raises(OutOfBoundsTimedelta, match="too small"):
min_td64 - rvalue Unfortunately that slowed execution by a factor of ~40 (locally). Decorating the test with a single, catch-all |
||
self, | ||
max_td64: TD64_BOX_TYPE, | ||
rval: Timedelta, | ||
expected_exs: AbstractContextManager, | ||
td64_type: Type[TD64_TYPE], | ||
): | ||
rvalue = tm.wrap_value(rval, td64_type) | ||
min_td64 = -1 * max_td64 | ||
|
||
with expected_exs: | ||
min_td64 - rvalue | ||
|
||
with expected_exs: | ||
-1 * rvalue - max_td64 | ||
|
||
|
||
class TestNumericScalarMulDiv: | ||
""" | ||
Operations on timedelta64-valued ExtensionArray/Index/Series/DataFrame and a | ||
numeric scalar. | ||
""" | ||
|
||
@pytest.mark.xfail(reason="Not implemented") | ||
def test_scalar_mul_raises_if_result_would_overflow(self, max_td64: TD64_BOX_TYPE): | ||
with td64_overflow_error(): | ||
max_td64 * 1.01 | ||
|
||
with td64_overflow_error(): | ||
1.01 * max_td64 | ||
|
||
|
||
class TestAddSubDatetime64: | ||
""" | ||
Operations on timedelta64-valued ExtensionArray/Index/Series/DataFrame, and a | ||
datetime64 scalar or datetime64-valued ExtensionArray/Index/Series/DataFrame. | ||
""" | ||
|
||
def test_add(self, box_with_array, dt64_type: Type[DT64_TYPE]): | ||
# GH: 35897 | ||
dt64 = tm.wrap_value(Timestamp(2020, 1, 2), dt64_type) | ||
td64_box = tm.wrap_value(Timedelta(hours=3), box_with_array) | ||
|
||
expected_type = get_result_type(box_with_array, dt64_type) | ||
expected = tm.wrap_value(Timestamp(2020, 1, 2, 3), expected_type) | ||
result = dt64 + td64_box | ||
|
||
tm.assert_equal(result, expected) | ||
|
||
def test_add_dt64_raises_if_result_would_overflow( | ||
self, | ||
max_td64: TD64_BOX_TYPE, | ||
dt64_type: Type[DT64_TYPE], | ||
): | ||
max_dt64 = tm.wrap_value(Timestamp.max, dt64_type) | ||
ex = (OutOfBoundsDatetime, OverflowError) | ||
msg = "|".join([TD64_OVERFLOW_MSG, "Out of bounds nanosecond timestamp"]) | ||
|
||
with pytest.raises(ex, match=msg): | ||
max_td64 + max_dt64 | ||
|
||
with pytest.raises(ex, match=msg): | ||
max_dt64 + max_td64 | ||
|
||
def test_sub_td64_raises_if_result_would_overflow( | ||
self, | ||
max_td64: TD64_BOX_TYPE, | ||
dt64_type: Type[DT64_TYPE], | ||
): | ||
min_dt64 = tm.wrap_value(Timestamp.min, dt64_type) | ||
ex = (OutOfBoundsDatetime, OverflowError) | ||
msg = "|".join([TD64_OVERFLOW_MSG, "Out of bounds nanosecond timestamp"]) | ||
|
||
with pytest.raises(ex, match=msg): | ||
min_dt64 - max_td64 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The motivation for this was to simplify writing tests that should produce the same output for both scalar and container types, e.g. here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
are there any other thematically similar functions that could be grouped together in a pandas._testing.foo instead of
__init__
? we've kind of stagnated, but in principle there's a goal to get stuff out of this file