Skip to content

Add more overflow tests for timedelta64 operations #46854

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 33 commits into from
Closed
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
f0b34b0
add tests for existing behavior
patrickmckenna Apr 22, 2022
287ca88
note spurious overflow for single elem td64 series
patrickmckenna Apr 23, 2022
992ca95
finer-grained testing for td64 sum overflow errors
patrickmckenna Apr 23, 2022
1ea03e6
styling, ex msg fixes
patrickmckenna Apr 24, 2022
2868200
Merge remote-tracking branch 'upstream/main' into td64-sums
patrickmckenna Apr 24, 2022
eb1f61b
Merge remote-tracking branch 'upstream/main' into td64-sums
patrickmckenna Apr 24, 2022
ac91c58
consolidate, parameterize td64 addition overflow tests
patrickmckenna Apr 25, 2022
b8e4a51
add scalar multiplication tests
patrickmckenna Apr 25, 2022
4c72f1e
add tests for scalar multiplication
patrickmckenna Apr 25, 2022
5ef0a48
Merge remote-tracking branch 'upstream/main' into td64-sums
patrickmckenna Apr 25, 2022
aeef81c
mypy, win38 fixes
patrickmckenna Apr 25, 2022
438339d
use box_expected where possible
patrickmckenna Apr 25, 2022
e86d0df
Merge remote-tracking branch 'upstream/main' into td64-sums
patrickmckenna Apr 25, 2022
bd62e08
consolidate all new, some old td64 overflow tests in new module
patrickmckenna Apr 26, 2022
1424dee
remove hypothesis overuse
patrickmckenna Apr 26, 2022
3369bbc
Merge remote-tracking branch 'upstream/main' into td64-sums
patrickmckenna Apr 27, 2022
6c3b482
address PR code style feedback
patrickmckenna Apr 27, 2022
c14d0c8
put new tests back in pre-existing modules
patrickmckenna Apr 27, 2022
f2e0ba4
dedupe test setup helpers
patrickmckenna Apr 27, 2022
3f89526
Merge remote-tracking branch 'upstream/main' into td64-sums
patrickmckenna Apr 27, 2022
e03a902
obey the linting gods
patrickmckenna Apr 28, 2022
6ca60f5
Merge remote-tracking branch 'upstream/main' into td64-sums
patrickmckenna Apr 28, 2022
15ef834
adjust for platform/env-specific overflow behavior
patrickmckenna Apr 28, 2022
d4d2160
Merge remote-tracking branch 'upstream/main' into td64-sums
patrickmckenna May 3, 2022
13d198e
use newer type hint syntax
patrickmckenna May 3, 2022
b37aa4d
update tests to reflect recent changes
patrickmckenna May 3, 2022
cd59647
remove scalar-box tests, use existing fixtures
patrickmckenna May 3, 2022
05f4137
DRY up some td64 tests
patrickmckenna May 3, 2022
7a6a8cf
platform-specific fixes
patrickmckenna May 3, 2022
7ce2082
Merge branch 'main' into td64-sums
patrickmckenna May 11, 2022
24fbc09
Merge branch 'main' into td64-sums
patrickmckenna May 12, 2022
d6069d3
Merge remote-tracking branch 'upstream/main' into td64-sums
patrickmckenna May 13, 2022
2f24ec7
Merge remote-tracking branch 'upstream/main' into td64-sums
patrickmckenna May 15, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from __future__ import annotations

import collections
from collections import abc
from datetime import datetime
from decimal import Decimal
from functools import wraps
from inspect import isclass
import operator
import os
import re
Expand Down Expand Up @@ -279,6 +281,28 @@ def box_expected(expected, box_cls, transpose=True):
return expected


def wrap_value(value, cls, transpose=False):
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The motivation for this was to simplify writing tests that should produce the same output for both scalar and container types, e.g. here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are there any other thematically similar functions that could be grouped together in a pandas._testing.foo instead of __init__? we've kind of stagnated, but in principle there's a goal to get stuff out of this file

"""
If cls is a scalar type, return value as an instance of it, otherwise return value
wrapped in the box type indicated by cls.

Designed to play nicely with box_expected (and the box_with_array fixture).
"""
if isclass(cls) and not issubclass(cls, pd.core.arraylike.OpsMixin):
return cls(value)

if cls in (np.array, np.ndarray):
pass
elif cls is pd.array or issubclass(cls, ExtensionArray):
cls = pd.array
elif issubclass(cls, Index):
cls = Index

if not isinstance(value, (abc.Sequence, pd.core.arraylike.OpsMixin, np.ndarray)):
value = [value]
return box_expected(value, cls, transpose)


def to_array(obj):
"""
Similar to pd.array, but does not cast numpy dtypes to nullable dtypes.
Expand Down
219 changes: 182 additions & 37 deletions pandas/tests/arithmetic/test_timedelta64.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
# Arithmetic tests for DataFrame/Series/Index/Array classes that should
# behave identically.
from contextlib import (
AbstractContextManager,
nullcontext,
)
from datetime import (
datetime,
timedelta,
)
from functools import partial
from typing import (
Type,
Union,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can't use type and |?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wish, that's a 3.10 feature.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

even if we do from __future__ import annotations? im sure we use pipes elsewhere

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Huh, I'd tried that and switching to pipes, and it was failing for type aliases like TD64_BOX_TYPE = TimedeltaArray | TimedeltaIndex | Series | DataFrame. But TIL: pipes will work for function annotations (with that import).

type does work. I'll switch to that, and replace the aliases with pipes.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Huh, I'd tried that and switching to pipes, and it was failing for type aliases like TD64_BOX_TYPE = TimedeltaArray | TimedeltaIndex | Series | DataFrame. But TIL: pipes will work for function annotations (with that import).

type does work. I'll switch to that, and replace the aliases with pipes.

)

import numpy as np
import pytest
Expand Down Expand Up @@ -31,12 +40,33 @@
Int64Index,
UInt64Index,
)
from pandas.core.arrays import (
DatetimeArray,
TimedeltaArray,
)
from pandas.tests.arithmetic.common import (
assert_invalid_addsub_type,
assert_invalid_comparison,
get_upcast_box,
)

TD64_BOX_TYPE = Union[TimedeltaArray, TimedeltaIndex, Series, DataFrame]
TD64_TYPE = Union[Timedelta, TD64_BOX_TYPE]
DT64_TYPE = Union[Timestamp, DatetimeArray, DatetimeIndex, Series, DataFrame]


TD64_OVERFLOW_MSG = "|".join(
[
"int too big to convert",
"Python int too large to convert to C long",
"Overflow in int64 addition",
]
)


does_not_raise = nullcontext
td64_overflow_error = partial(pytest.raises, OverflowError, match=TD64_OVERFLOW_MSG)


def assert_dtype(obj, expected_dtype):
"""
Expand All @@ -59,6 +89,59 @@ def get_expected_name(box, names):
return exname


def get_result_type(
td64_type: Type[TD64_TYPE],
dt64_type: Type[DT64_TYPE],
) -> Type[DT64_TYPE]:
"""
Expected result when adding, subtracting timedelta64-valued box and
datetime64-valued box or scalar.
"""
dt64 = tm.wrap_value(Timestamp.now(), dt64_type)
td64 = tm.wrap_value(Timedelta(0), td64_type)
return type(dt64 + td64)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so this is going to map Timedelta->Timestamp and TimedeltaArray->DatetimeArray but be an identity mapping for everything else?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might be misunderstanding the question, but think the gist of what you're suggesting is right (assuming Array stands for every box type). So:

Timedelta + Timestamp -> Timestamp
Timedelta + DatetimeArray -> DatetimeArray
Timedelta + DatetimeIndex -> DatetimeIndex
Timedelta + Series -> Series
Timedelta + DataFrame -> DataFrame
TimedeltaArray + Timestamp -> DatetimeArray
TimedeltaArray + DatetimeArray -> DatetimeArray
...
TimedeltaIndex + Timestamp -> DatetimeIndex
TimedeltaIndex + DatetimeArray -> DatetimeIndex
...

Happy to update the docstring, or use a different pattern, if that's unclear.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My preference (mentioned #46854 (comment)) is to handle the Timedelta/Timestamp cases in the existing tests/scalar/... files, and the non-scalar tests here. Then this wrapping becomes unnecessary and you can just use box_with_array

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about scalar/box tests, e.g. Timedelta + DatetimeIndex, where should those live?



@pytest.fixture(
name="td64_type",
params=[Timedelta, TimedeltaArray, TimedeltaIndex, Series, DataFrame],
scope="module",
)
def fixture_td64_type(request) -> Type[TD64_TYPE]:
return request.param


@pytest.fixture(
name="dt64_type",
params=[Timestamp, DatetimeArray, DatetimeIndex, Series, DataFrame],
scope="module",
)
def fixture_dt64_type(request) -> Type[DT64_TYPE]:
return request.param


@pytest.fixture(name="max_td64")
def fixture_max_td64(box_with_array) -> TD64_BOX_TYPE:
"""
A 1-elem ExtensionArray/Index/Series, or 2x1 DataFrame, w/ all elements set to
Timestamp.max.
"""
return tm.wrap_value(Timedelta.max, box_with_array)


@pytest.fixture(
name="positive_td64",
params=[Timedelta(1), Timedelta(1024), Timedelta.max],
ids=["1ns", "1024ns", "td_max"],
)
def fixture_positive_td64(request, td64_type: Type[TD64_TYPE]) -> TD64_TYPE:
"""
A scalar, 1-elem ExtensionArray/Index/Series, or 2x1 DataFrame.
"""
value = request.param
return tm.wrap_value(value, td64_type)


# ------------------------------------------------------------------
# Timedelta64[ns] dtype Comparisons

Expand Down Expand Up @@ -687,30 +770,6 @@ def test_tdarr_add_timestamp_nat_masking(self, box_with_array, str_ts):
assert res[1] is NaT

def test_tdi_add_overflow(self):
# See GH#14068
# preliminary test scalar analogue of vectorized tests below
# TODO: Make raised error message more informative and test
with pytest.raises(OutOfBoundsDatetime, match="10155196800000000000"):
pd.to_timedelta(106580, "D") + Timestamp("2000")
with pytest.raises(OutOfBoundsDatetime, match="10155196800000000000"):
Timestamp("2000") + pd.to_timedelta(106580, "D")

_NaT = NaT.value + 1
msg = "Overflow in int64 addition"
with pytest.raises(OverflowError, match=msg):
pd.to_timedelta([106580], "D") + Timestamp("2000")
with pytest.raises(OverflowError, match=msg):
Timestamp("2000") + pd.to_timedelta([106580], "D")
with pytest.raises(OverflowError, match=msg):
pd.to_timedelta([_NaT]) - Timedelta("1 days")
with pytest.raises(OverflowError, match=msg):
pd.to_timedelta(["5 days", _NaT]) - Timedelta("1 days")
with pytest.raises(OverflowError, match=msg):
(
pd.to_timedelta([_NaT, "5 days", "1 hours"])
- pd.to_timedelta(["7 seconds", _NaT, "4 hours"])
)

# These should not overflow!
exp = TimedeltaIndex([NaT])
result = pd.to_timedelta([NaT]) - Timedelta("1 days")
Expand Down Expand Up @@ -2089,18 +2148,104 @@ def test_td64arr_pow_invalid(self, scalar_td, box_with_array):
td1**scalar_td


def test_add_timestamp_to_timedelta():
# GH: 35897
timestamp = Timestamp("2021-01-01")
result = timestamp + timedelta_range("0s", "1s", periods=31)
expected = DatetimeIndex(
class TestAddSub:
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this organization makes sense, I can open a follow up PR to migrate many of the existing tests into this and similar classes (and make them fully parameterized across all container types).

"""
Addition/subtraction between a timedelta64-valued
ExtensionArrays/Indexes/Series/DataFrames, and a timedelta64 scalar or
timedelta64-valued ExtensionArray/Index/Series/DataFrame.
"""

def test_add_raises_if_result_would_overflow(
self,
max_td64: TD64_TYPE,
positive_td64: TD64_BOX_TYPE,
):
with td64_overflow_error():
max_td64 + positive_td64

with td64_overflow_error():
positive_td64 + max_td64

@pytest.mark.parametrize(
["rval", "expected_exs"],
[
timestamp
+ (
pd.to_timedelta("0.033333333s") * i
+ pd.to_timedelta("0.000000001s") * divmod(i, 3)[0]
)
for i in range(31)
]
(Timedelta(1), does_not_raise()),
(Timedelta(2), td64_overflow_error()),
(Timedelta.max, td64_overflow_error()),
],
)
tm.assert_index_equal(result, expected)
def test_sub_raises_if_result_would_overflow(
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW, I tried rewriting this to keep the test function body constant, and mapping each input value to an appropriate xfail marker:

xfail_does_not_raise = pytest.mark.xfail(reason="doesn't raise", strict=True)
xfail_raises_overflow_error = pytest.mark.xfail(
    reason="raises wrong error",
    raises=OverflowError,
    strict=True,
)
# ...
    @pytest.mark.parametrize(
        "rval",
        [
            pytest.param(Timedelta(1), marks=xfail_does_not_raise),
            pytest.param(Timedelta(2), marks=xfail_raises_overflow_error),
            pytest.param(Timedelta.max, marks=xfail_raises_overflow_error),
        ],
    )    
    def test_sub_raises_td64_specific_error_if_result_would_overflow(
        self,
        max_td64: TD64_BOX_TYPE,
        rval: Timedelta,
        td64_type: Type[TD64_TYPE],
    ):
        rvalue = tm.wrap_value(rval, td64_type)
        min_td64 = -1 * max_td64

        with pytest.raises(OutOfBoundsTimedelta, match="too small"):
            min_td64 - rvalue

Unfortunately that slowed execution by a factor of ~40 (locally). Decorating the test with a single, catch-all xfail marker led to the same results. Passing run=False did help, but it was still ~5x slower.

self,
max_td64: TD64_BOX_TYPE,
rval: Timedelta,
expected_exs: AbstractContextManager,
td64_type: Type[TD64_TYPE],
):
rvalue = tm.wrap_value(rval, td64_type)
min_td64 = -1 * max_td64

with expected_exs:
min_td64 - rvalue

with expected_exs:
-1 * rvalue - max_td64


class TestNumericScalarMulDiv:
"""
Operations on timedelta64-valued ExtensionArray/Index/Series/DataFrame and a
numeric scalar.
"""

@pytest.mark.xfail(reason="Not implemented")
def test_scalar_mul_raises_if_result_would_overflow(self, max_td64: TD64_BOX_TYPE):
with td64_overflow_error():
max_td64 * 1.01

with td64_overflow_error():
1.01 * max_td64


class TestAddSubDatetime64:
"""
Operations on timedelta64-valued ExtensionArray/Index/Series/DataFrame, and a
datetime64 scalar or datetime64-valued ExtensionArray/Index/Series/DataFrame.
"""

def test_add(self, box_with_array, dt64_type: Type[DT64_TYPE]):
# GH: 35897
dt64 = tm.wrap_value(Timestamp(2020, 1, 2), dt64_type)
td64_box = tm.wrap_value(Timedelta(hours=3), box_with_array)

expected_type = get_result_type(box_with_array, dt64_type)
expected = tm.wrap_value(Timestamp(2020, 1, 2, 3), expected_type)
result = dt64 + td64_box

tm.assert_equal(result, expected)

def test_add_dt64_raises_if_result_would_overflow(
self,
max_td64: TD64_BOX_TYPE,
dt64_type: Type[DT64_TYPE],
):
max_dt64 = tm.wrap_value(Timestamp.max, dt64_type)
ex = (OutOfBoundsDatetime, OverflowError)
msg = "|".join([TD64_OVERFLOW_MSG, "Out of bounds nanosecond timestamp"])

with pytest.raises(ex, match=msg):
max_td64 + max_dt64

with pytest.raises(ex, match=msg):
max_dt64 + max_td64

def test_sub_td64_raises_if_result_would_overflow(
self,
max_td64: TD64_BOX_TYPE,
dt64_type: Type[DT64_TYPE],
):
min_dt64 = tm.wrap_value(Timestamp.min, dt64_type)
ex = (OutOfBoundsDatetime, OverflowError)
msg = "|".join([TD64_OVERFLOW_MSG, "Out of bounds nanosecond timestamp"])

with pytest.raises(ex, match=msg):
min_dt64 - max_td64
Loading