From aa47af99b1d7d22cb070d40a7808996facd429d6 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Tue, 3 May 2022 14:51:31 -0700 Subject: [PATCH 01/18] add more Timedelta tests --- .../scalar/timedelta/test_constructors.py | 490 ++++++++++++------ 1 file changed, 320 insertions(+), 170 deletions(-) diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index 7fc7bd3a5a74d..a8b0b86fa23a0 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -1,31 +1,78 @@ from datetime import timedelta -from itertools import product +from itertools import ( + chain, + zip_longest, +) +import re import numpy as np import pytest +from toolz.curried import keyfilter from pandas._libs.tslibs import OutOfBoundsTimedelta from pandas import ( + NA, + NaT, Timedelta, offsets, to_timedelta, ) +TD_KWARGS_UNITS = { + "weeks": ("w",), + "days": ("d", "day", "days"), + "hours": ("h", "hr", "hour", "hours"), + "minutes": ("m", "t", "min", "minute", "minutes"), + "seconds": ("s", "sec", "second", "seconds"), + "milliseconds": ("l", "ms", "milli", "millis", "millisecond", "milliseconds"), + "microseconds": ("u", "us", "µs", "micro", "micros", "microsecond", "microseconds"), + "nanoseconds": ("n", "ns", "nano", "nanos", "nanosecond", "nanoseconds"), +} +TD_MAX_PER_KWARG = { + "nanoseconds": Timedelta.max.value, + "microseconds": Timedelta.max.value // 1_000, + "milliseconds": Timedelta.max.value // 1_000_000, + "seconds": Timedelta.max.value // 1_000_000_000, + "minutes": Timedelta.max.value // (1_000_000_000 * 60), + "hours": Timedelta.max.value // (1_000_000_000 * 60 * 60), + "days": Timedelta.max.value // (1_000_000_000 * 60 * 60 * 24), + "weeks": Timedelta.max.value // (1_000_000_000 * 60 * 60 * 24 * 7), +} +TD_MIN_PER_KWARG = { + "nanoseconds": Timedelta.min.value, + "microseconds": Timedelta.min.value // 1_000, + "milliseconds": Timedelta.min.value // 1_000_000, + "seconds": Timedelta.min.value // 1_000_000_000, + "minutes": Timedelta.min.value // (1_000_000_000 * 60), + "hours": Timedelta.min.value // (1_000_000_000 * 60 * 60), + "days": Timedelta.min.value // (1_000_000_000 * 60 * 60 * 24), + "weeks": Timedelta.min.value // (1_000_000_000 * 60 * 60 * 24 * 7), +} +TD_MAX_PER_UNIT = dict( + chain.from_iterable( + zip_longest(units, (TD_MAX_PER_KWARG[k],), fillvalue=TD_MAX_PER_KWARG[k]) + for k, units in TD_KWARGS_UNITS.items() + ) +) +TD_MIN_PER_UNIT = dict( + chain.from_iterable( + zip_longest(units, (TD_MIN_PER_KWARG[k],), fillvalue=TD_MIN_PER_KWARG[k]) + for k, units in TD_KWARGS_UNITS.items() + ) +) +TD_KWARGS_NP_TD64_UNITS = dict( + zip(TD_MAX_PER_KWARG, ("ns", "us", "ms", "s", "m", "h", "D", "W")) +) +NP_TD64_MAX_PER_UNIT = dict( + zip(("ns", "us", "ms", "s", "m", "h", "D", "W"), TD_MAX_PER_KWARG.values()) +) +NP_TD64_MIN_PER_UNIT = dict( + zip(("ns", "us", "ms", "s", "m", "h", "D", "W"), TD_MIN_PER_KWARG.values()) +) -def test_construct_from_td64_with_unit(): - # ignore the unit, as it may cause silently overflows leading to incorrect - # results, and in non-overflow cases is irrelevant GH#46827 - obj = np.timedelta64(123456789, "h") - - with pytest.raises(OutOfBoundsTimedelta, match="123456789 hours"): - Timedelta(obj, unit="ps") - - with pytest.raises(OutOfBoundsTimedelta, match="123456789 hours"): - Timedelta(obj, unit="ns") - with pytest.raises(OutOfBoundsTimedelta, match="123456789 hours"): - Timedelta(obj) +skip_ns = keyfilter(lambda k: not k.startswith("n")) def test_construction(): @@ -100,37 +147,6 @@ def test_construction(): days=10, hours=1, minutes=1, seconds=31, microseconds=3 ) - # Currently invalid as it has a - on the hh:mm:dd part - # (only allowed on the days) - msg = "only leading negative signs are allowed" - with pytest.raises(ValueError, match=msg): - Timedelta("-10 days -1 h 1.5m 1s 3us") - - # only leading neg signs are allowed - with pytest.raises(ValueError, match=msg): - Timedelta("10 days -1 h 1.5m 1s 3us") - - # no units specified - msg = "no units specified" - with pytest.raises(ValueError, match=msg): - Timedelta("3.1415") - - # invalid construction - msg = "cannot construct a Timedelta" - with pytest.raises(ValueError, match=msg): - Timedelta() - - msg = "unit abbreviation w/o a number" - with pytest.raises(ValueError, match=msg): - Timedelta("foo") - - msg = ( - "cannot construct a Timedelta from " - "the passed arguments, allowed keywords are " - ) - with pytest.raises(ValueError, match=msg): - Timedelta(day=10) - # floats expected = np.timedelta64(10, "s").astype("m8[ns]").view("i8") + np.timedelta64( 500, "ms" @@ -148,33 +164,30 @@ def test_construction(): assert result == expected assert to_timedelta(offsets.Hour(2)) == Timedelta("0 days, 02:00:00") - msg = "unit abbreviation w/o a number" - with pytest.raises(ValueError, match=msg): - Timedelta("foo bar") +@pytest.mark.parametrize("unit", ("ps", "ns")) +def test_from_np_td64_ignores_unit(unit: str): + """ + Ignore the unit, as it may cause silently overflows leading to incorrect results, + and in non-overflow cases is irrelevant GH#46827. + """ + td64 = np.timedelta64(NP_TD64_MAX_PER_UNIT["h"], "h") + assert Timedelta(td64, unit=unit) == Timedelta(td64) + + with pytest.raises(OutOfBoundsTimedelta, match=f"{td64 * 2}"): + Timedelta(td64 * 2, unit=unit) + + +@pytest.mark.parametrize(("td_kwarg", "np_unit"), TD_KWARGS_NP_TD64_UNITS.items()) @pytest.mark.parametrize( - "item", - list( - { - "days": "D", - "seconds": "s", - "microseconds": "us", - "milliseconds": "ms", - "minutes": "m", - "hours": "h", - "weeks": "W", - }.items() - ), -) -@pytest.mark.parametrize( - "npdtype", [np.int64, np.int32, np.int16, np.float64, np.float32, np.float16] + "np_dtype", + (np.int64, np.int32, np.int16, np.float64, np.float32, np.float16), ) -def test_td_construction_with_np_dtypes(npdtype, item): +def test_td_construction_with_np_dtypes(np_dtype: type, td_kwarg: str, np_unit: str): # GH#8757: test construction with np dtypes - pykwarg, npkwarg = item - expected = np.timedelta64(1, npkwarg).astype("m8[ns]").view("i8") - assert Timedelta(**{pykwarg: npdtype(1)}).value == expected + expected_ns = np.timedelta64(1, np_unit).astype("m8[ns]").view("i8") + assert Timedelta(**{td_kwarg: np_dtype(1)}).value == expected_ns @pytest.mark.parametrize( @@ -203,58 +216,6 @@ def test_td_from_repr_roundtrip(val): assert Timedelta(td._repr_base()) == td -def test_overflow_on_construction(): - msg = "int too (large|big) to convert" - - # GH#3374 - value = Timedelta("1day").value * 20169940 - with pytest.raises(OverflowError, match=msg): - Timedelta(value) - - # xref GH#17637 - with pytest.raises(OverflowError, match=msg): - Timedelta(7 * 19999, unit="D") - - with pytest.raises(OutOfBoundsTimedelta, match=msg): - Timedelta(timedelta(days=13 * 19999)) - - -@pytest.mark.parametrize( - "val, unit, name", - [ - (3508, "M", " months"), - (15251, "W", " weeks"), # 1 - (106752, "D", " days"), # change from previous: - (2562048, "h", " hours"), # 0 hours - (153722868, "m", " minutes"), # 13 minutes - (9223372037, "s", " seconds"), # 44 seconds - ], -) -def test_construction_out_of_bounds_td64(val, unit, name): - # TODO: parametrize over units just above/below the implementation bounds - # once GH#38964 is resolved - - # Timedelta.max is just under 106752 days - td64 = np.timedelta64(val, unit) - assert td64.astype("m8[ns]").view("i8") < 0 # i.e. naive astype will be wrong - - msg = str(val) + name - with pytest.raises(OutOfBoundsTimedelta, match=msg): - Timedelta(td64) - - # But just back in bounds and we are OK - assert Timedelta(td64 - 1) == td64 - 1 - - td64 *= -1 - assert td64.astype("m8[ns]").view("i8") > 0 # i.e. naive astype will be wrong - - with pytest.raises(OutOfBoundsTimedelta, match="-" + msg): - Timedelta(td64) - - # But just back in bounds and we are OK - assert Timedelta(td64 + 1) == td64 + 1 - - @pytest.mark.parametrize( "fmt,exp", [ @@ -303,24 +264,6 @@ def test_iso_constructor(fmt, exp): assert Timedelta(fmt) == exp -@pytest.mark.parametrize( - "fmt", - [ - "PPPPPPPPPPPP", - "PDTHMS", - "P0DT999H999M999S", - "P1DT0H0M0.0000000000000S", - "P1DT0H0M0.S", - "P", - "-P", - ], -) -def test_iso_constructor_raises(fmt): - msg = f"Invalid ISO 8601 Duration format - {fmt}" - with pytest.raises(ValueError, match=msg): - Timedelta(fmt) - - @pytest.mark.parametrize( "constructed_td, conversion", [ @@ -348,47 +291,254 @@ def test_td_constructor_on_nanoseconds(constructed_td, conversion): assert constructed_td == Timedelta(conversion) -def test_td_constructor_value_error(): - msg = "Invalid type . Must be int or float." - with pytest.raises(TypeError, match=msg): - Timedelta(nanoseconds="abc") - - -def test_timedelta_constructor_identity(): - # Test for #30543 - expected = Timedelta(np.timedelta64(1, "s")) - result = Timedelta(expected) - assert result is expected - - @pytest.mark.parametrize( - "constructor, value, unit, expectation", + ("args", "kwargs"), [ - (Timedelta, "10s", "ms", (ValueError, "unit must not be specified")), - (to_timedelta, "10s", "ms", (ValueError, "unit must not be specified")), - (to_timedelta, ["1", 2, 3], "s", (ValueError, "unit must not be specified")), + ((), {}), + (("ps",), {}), + (("ns",), {}), + (("ms",), {}), + ((), {"seconds": 3}), + (("ns",), {"minutes": 2}), ], ) -def test_string_with_unit(constructor, value, unit, expectation): - exp, match = expectation - with pytest.raises(exp, match=match): - _ = constructor(value, unit=unit) +def test_other_args_ignored_if_timedelta_value_passed(args: tuple, kwargs: dict): + original = Timedelta(1) + new = Timedelta(original, *args, **kwargs) + + assert new == original + if not any((args, kwargs)): + assert new is original @pytest.mark.parametrize( "value", - [ - "".join(elements) - for repetition in (1, 2) - for elements in product("+-, ", repeat=repetition) - ], + ( + None, + np.nan, + NaT, + pytest.param( + NA, + marks=pytest.mark.xfail( + reason="constructor fails", + raises=ValueError, + strict=True, + ), + ), + ), + ids=("None", "np.nan", "pd.NaT", "pd.NA"), ) -def test_string_without_numbers(value): - # GH39710 Timedelta input string with only symbols and no digits raises an error - msg = ( - "symbols w/o a number" - if value != "--" - else "only leading negative signs are allowed" +def test_returns_nat_for_most_na_values(value): + assert Timedelta(value) is NaT + + +class TestInvalidArgCombosFormats: + def test_raises_if_no_args_passed(self): + msg = re.escape( + "cannot construct a Timedelta without a value/unit or descriptive keywords " + "(days,seconds....)" + ) + + with pytest.raises(ValueError, match=msg): + Timedelta() + + @pytest.mark.parametrize("unit", ("years", "months", "day", "ps")) + def test_raises_for_invalid_kwarg(self, unit: str): + msg = re.escape( + "cannot construct a Timedelta from the passed arguments, allowed keywords " + "are [weeks, days, hours, minutes, seconds, milliseconds, " + "microseconds, nanoseconds]" + ) + + with pytest.raises(ValueError, match=msg): + Timedelta(**{unit: 1}) # type: ignore[arg-type] + + def test_raises_if_kwarg_has_str_value(self): + msg = "Invalid type . Must be int or float." + + with pytest.raises(TypeError, match=msg): + Timedelta(nanoseconds="1") + + @pytest.mark.parametrize( + ("constructor", "value", "unit", "msg"), + ( + (Timedelta, "10s", "ms", "the value is a str"), + (to_timedelta, "10s", "ms", "the input is/contains a str"), + (to_timedelta, ["1", "2", "3"], "s", "the input contains a str"), + ), + ids=("Timedelta", "to_timedelta-scalar", "to_timedelta-sequence"), + ) + def test_raises_if_both_str_value_and_unit_passed( + self, + constructor, + value, + unit, + msg, + ): + msg = "unit must not be specified if " + msg + + with pytest.raises(ValueError, match=msg): + constructor(value, unit=unit) + + @pytest.mark.parametrize( + "value", + [ + "PPPPPPPPPPPP", + "PDTHMS", + "P0DT999H999M999S", + "P1DT0H0M0.0000000000000S", + "P1DT0H0M0.S", + "P", + "-P", + ], + ) + def test_raises_for_invalid_iso_like_str_value(self, value): + msg = f"Invalid ISO 8601 Duration format - {value}" + + with pytest.raises(ValueError, match=msg): + Timedelta(value) + + def test_raises_if_str_value_contains_no_units(self): + msg = "no units specified" + + with pytest.raises(ValueError, match=msg): + Timedelta("3.1415") + + @pytest.mark.parametrize( + ("value", "msg"), + ( + ("us", "unit abbreviation w/o a number"), + ("seconds", "unit abbreviation w/o a number"), + ("garbage", "unit abbreviation w/o a number"), + # GH39710 Timedelta input string with only symbols and no digits raises + ("+", "symbols w/o a number"), + ("-", "symbols w/o a number"), + ), + ) + def test_raises_if_str_value_contains_no_numeric_component( + self, + value: str, + msg: str, + ): + with pytest.raises(ValueError, match=msg): + Timedelta(value) + + @pytest.mark.parametrize( + "value", + ( + "--", + # Currently invalid as it has a - on the hh:mm:dd part + # (only allowed on the days) + "-10 days -1 h 1.5m 1s 3us", + "10 days -1 h 1.5m 1s 3us", + ), + ) + def test_raises_for_str_value_with_minus_sign(self, value: str): + msg = "only leading negative signs are allowed" + with pytest.raises(ValueError, match=msg): + Timedelta(value) + + @pytest.mark.parametrize("unit", ["Y", "y", "M"]) + def test_raises_if_ambiguous_units_passed(self, unit: str): + msg = ( + "Units 'M', 'Y', and 'y' are no longer supported, as they do not " + "represent unambiguous timedelta values durations." + ) + + with pytest.raises(ValueError, match=msg): + Timedelta(1, unit) + + +class TestOverflow: + + msg = "|".join( + ( + "Python int too large to convert to C long", + "int too big to convert", + *TD_KWARGS_UNITS.keys(), + ) ) - with pytest.raises(ValueError, match=msg): - Timedelta(value) + errors = (OverflowError, OutOfBoundsTimedelta) + + @pytest.mark.parametrize(("unit", "max_val"), TD_MAX_PER_UNIT.items()) + def test_int_plus_units_too_big(self, unit: str, max_val: int, request): + if unit == "w": + mark = pytest.mark.xfail( + reason="does not raise", + raises=pytest.fail.Exception, + strict=True, + ) + request.node.add_marker(mark) + + too_big = max_val + 1 + + with pytest.raises(self.errors, match=self.msg): + Timedelta(too_big, unit=unit) + + @pytest.mark.parametrize(("unit", "min_val"), skip_ns(TD_MIN_PER_UNIT).items()) + def test_int_plus_units_too_small(self, unit: str, min_val: int, request): + if unit == "w": + mark = pytest.mark.xfail( + reason="does not raise", + raises=pytest.fail.Exception, + strict=True, + ) + request.node.add_marker(mark) + + too_small = min_val - 1 + + with pytest.raises(self.errors, match=self.msg): + Timedelta(too_small, unit=unit) + + @pytest.mark.parametrize(("kwarg", "max_val"), TD_MAX_PER_KWARG.items()) + def test_kwarg_too_big(self, kwarg: str, max_val: int): + too_big = max_val + 1 + + with pytest.raises(self.errors, match=self.msg): + assert Timedelta(**{kwarg: too_big}) # type: ignore[arg-type] + + @pytest.mark.parametrize(("kwarg", "min_val"), skip_ns(TD_MIN_PER_KWARG).items()) + def test_kwarg_too_small(self, kwarg: str, min_val: int): + too_small = min_val - 1 + + with pytest.raises(self.errors, match=self.msg): + Timedelta(**{kwarg: too_small}) # type: ignore[arg-type] + + @pytest.mark.parametrize(("kwarg", "max_val"), skip_ns(TD_MAX_PER_KWARG).items()) + def test_from_timedelta_too_big(self, kwarg: str, max_val: int): + too_big = timedelta(**{kwarg: max_val + 1}) + + with pytest.raises(self.errors, match=self.msg): + Timedelta(too_big) + + @pytest.mark.parametrize(("kwarg", "min_val"), skip_ns(TD_MIN_PER_KWARG).items()) + def test_from_timedelta_too_small(self, kwarg: str, min_val: int): + too_small = timedelta(**{kwarg: min_val - 1}) + + with pytest.raises(self.errors, match=self.msg): + Timedelta(too_small) + + @pytest.mark.parametrize(("unit", "max_val"), skip_ns(NP_TD64_MAX_PER_UNIT).items()) + def test_from_np_td64_too_big(self, unit: str, max_val: int): + too_big = np.timedelta64(max_val + 1, unit) + + with pytest.raises(self.errors, match=self.msg): + Timedelta(too_big) + + @pytest.mark.parametrize(("unit", "min_val"), skip_ns(NP_TD64_MIN_PER_UNIT).items()) + def test_from_np_td64_too_small(self, unit: str, min_val: int): + too_small = np.timedelta64(min_val - 1, unit) + + with pytest.raises(self.errors, match=self.msg): + Timedelta(too_small) + + def test_too_small_by_1ns_returns_nat(self): + too_small = Timedelta.min.value - 1 + too_small_np_td = np.timedelta64(too_small) + + assert isinstance(too_small, int) + assert isinstance(too_small_np_td, np.timedelta64) + + assert Timedelta(too_small, "ns") is NaT + assert Timedelta(nanoseconds=too_small) is NaT + assert Timedelta(too_small_np_td) is NaT From 0957c2735d0724516b084f1c435c46d63c524cdc Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Tue, 3 May 2022 18:41:17 -0700 Subject: [PATCH 02/18] whoops, toolz is optional --- pandas/tests/scalar/timedelta/test_constructors.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index a8b0b86fa23a0..59608c736ee7a 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -7,7 +7,6 @@ import numpy as np import pytest -from toolz.curried import keyfilter from pandas._libs.tslibs import OutOfBoundsTimedelta @@ -72,7 +71,7 @@ ) -skip_ns = keyfilter(lambda k: not k.startswith("n")) +skip_ns = lambda d: {k: v for k, v in d.items() if not k.startswith("n")} def test_construction(): From 89b243140e75a8f65010cd22c1874ab3cff20364 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Wed, 4 May 2022 10:32:10 -0700 Subject: [PATCH 03/18] s/OverflowError/OutOfBoundsTimedelta where relevant This is a quick and dirty v0 approach. --- pandas/_libs/tslibs/timedeltas.pyi | 1 + pandas/_libs/tslibs/timedeltas.pyx | 193 ++++++++++-------- pandas/tests/scalar/timedelta/conftest.py | 18 ++ .../tests/scalar/timedelta/test_arithmetic.py | 9 +- .../scalar/timedelta/test_constructors.py | 65 +++--- .../tests/scalar/timedelta/test_timedelta.py | 11 +- 6 files changed, 168 insertions(+), 129 deletions(-) create mode 100644 pandas/tests/scalar/timedelta/conftest.py diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi index a04104915cf1f..6dc9de817ad9a 100644 --- a/pandas/_libs/tslibs/timedeltas.pyi +++ b/pandas/_libs/tslibs/timedeltas.pyi @@ -73,6 +73,7 @@ def array_to_timedelta64( ) -> np.ndarray: ... # np.ndarray[m8ns] def parse_timedelta_unit(unit: str | None) -> UnitChoices: ... def delta_to_nanoseconds(delta: np.timedelta64 | timedelta | Tick) -> int: ... +def calculate(op, left: int, right: int) -> int: ... class Timedelta(timedelta): min: ClassVar[Timedelta] diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 8f145d0d66acc..5f227fb02eedb 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -136,6 +136,9 @@ cdef dict timedelta_abbrevs = { _no_input = object() +TIMEDELTA_MIN_NS = np.iinfo(np.int64).min + 1 +TIMEDELTA_MAX_NS = np.iinfo(np.int64).max + # ---------------------------------------------------------------------- # API @@ -217,7 +220,8 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1: + delta.microseconds ) * 1000 except OverflowError as err: - raise OutOfBoundsTimedelta(*err.args) from err + msg = f"{delta} outside allowed range [{TIMEDELTA_MIN_NS}ns, {TIMEDELTA_MAX_NS}ns]" + raise OutOfBoundsTimedelta(msg) from err raise TypeError(type(delta)) @@ -254,7 +258,8 @@ cdef object ensure_td64ns(object ts): # NB: cython#1381 this cannot be *= td64_value = td64_value * mult except OverflowError as err: - raise OutOfBoundsTimedelta(ts) from err + msg = f"{ts} outside allowed range [{TIMEDELTA_MIN_NS}ns, {TIMEDELTA_MAX_NS}ns]" + raise OutOfBoundsTimedelta(msg) from err return np.timedelta64(td64_value, "ns") @@ -679,6 +684,18 @@ def _op_unary_method(func, name): return f +cpdef int64_t calculate(object op, int64_t a, int64_t b) except? -1: + """ + Calculate op(a, b) and return the result, or raise if the operation would overflow. + """ + try: + with cython.overflowcheck(True): + return op(a, b) + except OverflowError as ex: + msg = f"outside allowed range [{TIMEDELTA_MIN_NS}ns, {TIMEDELTA_MAX_NS}ns]" + raise OutOfBoundsTimedelta(msg) from ex + + def _binary_op_method_timedeltalike(op, name): # define a binary operation that only works if the other argument is # timedelta like or an array of timedeltalike @@ -723,13 +740,10 @@ def _binary_op_method_timedeltalike(op, name): if self._reso != other._reso: raise NotImplementedError - res = op(self.value, other.value) - if res == NPY_NAT: - # e.g. test_implementation_limits - # TODO: more generally could do an overflowcheck in op? + result = calculate(op, self.value, other.value) + if result == NPY_NAT: return NaT - - return _timedelta_from_value_and_reso(res, reso=self._reso) + return _timedelta_from_value_and_reso(result, self._reso) f.__name__ = name return f @@ -1443,91 +1457,96 @@ class Timedelta(_Timedelta): def __new__(cls, object value=_no_input, unit=None, **kwargs): cdef _Timedelta td_base - if value is _no_input: - if not len(kwargs): - raise ValueError("cannot construct a Timedelta without a " - "value/unit or descriptive keywords " - "(days,seconds....)") + try: + if value is _no_input: + if not len(kwargs): + raise ValueError("cannot construct a Timedelta without a " + "value/unit or descriptive keywords " + "(days,seconds....)") + + kwargs = {key: _to_py_int_float(kwargs[key]) for key in kwargs} + + unsupported_kwargs = set(kwargs) + unsupported_kwargs.difference_update(cls._req_any_kwargs_new) + if unsupported_kwargs or not cls._req_any_kwargs_new.intersection(kwargs): + raise ValueError( + "cannot construct a Timedelta from the passed arguments, " + "allowed keywords are " + "[weeks, days, hours, minutes, seconds, " + "milliseconds, microseconds, nanoseconds]" + ) + + # GH43764, convert any input to nanoseconds first and then + # create the timestamp. This ensures that any potential + # nanosecond contributions from kwargs parsed as floats + # are taken into consideration. + seconds = int(( + ( + (kwargs.get('days', 0) + kwargs.get('weeks', 0) * 7) * 24 + + kwargs.get('hours', 0) + ) * 3600 + + kwargs.get('minutes', 0) * 60 + + kwargs.get('seconds', 0) + ) * 1_000_000_000 + ) - kwargs = {key: _to_py_int_float(kwargs[key]) for key in kwargs} + value = np.timedelta64( + int(kwargs.get('nanoseconds', 0)) + + int(kwargs.get('microseconds', 0) * 1_000) + + int(kwargs.get('milliseconds', 0) * 1_000_000) + + seconds + ) - unsupported_kwargs = set(kwargs) - unsupported_kwargs.difference_update(cls._req_any_kwargs_new) - if unsupported_kwargs or not cls._req_any_kwargs_new.intersection(kwargs): + if unit in {'Y', 'y', 'M'}: raise ValueError( - "cannot construct a Timedelta from the passed arguments, " - "allowed keywords are " - "[weeks, days, hours, minutes, seconds, " - "milliseconds, microseconds, nanoseconds]" + "Units 'M', 'Y', and 'y' are no longer supported, as they do not " + "represent unambiguous timedelta values durations." ) - # GH43764, convert any input to nanoseconds first and then - # create the timestamp. This ensures that any potential - # nanosecond contributions from kwargs parsed as floats - # are taken into consideration. - seconds = int(( - ( - (kwargs.get('days', 0) + kwargs.get('weeks', 0) * 7) * 24 - + kwargs.get('hours', 0) - ) * 3600 - + kwargs.get('minutes', 0) * 60 - + kwargs.get('seconds', 0) - ) * 1_000_000_000 - ) - - value = np.timedelta64( - int(kwargs.get('nanoseconds', 0)) - + int(kwargs.get('microseconds', 0) * 1_000) - + int(kwargs.get('milliseconds', 0) * 1_000_000) - + seconds - ) - - if unit in {'Y', 'y', 'M'}: - raise ValueError( - "Units 'M', 'Y', and 'y' are no longer supported, as they do not " - "represent unambiguous timedelta values durations." - ) - - # GH 30543 if pd.Timedelta already passed, return it - # check that only value is passed - if isinstance(value, _Timedelta) and unit is None and len(kwargs) == 0: - return value - elif isinstance(value, _Timedelta): - value = value.value - elif isinstance(value, str): - if unit is not None: - raise ValueError("unit must not be specified if the value is a str") - if (len(value) > 0 and value[0] == 'P') or ( - len(value) > 1 and value[:2] == '-P' - ): - value = parse_iso_format_string(value) + # GH 30543 if pd.Timedelta already passed, return it + # check that only value is passed + if isinstance(value, _Timedelta) and unit is None and len(kwargs) == 0: + return value + elif isinstance(value, _Timedelta): + value = value.value + elif isinstance(value, str): + if unit is not None: + raise ValueError("unit must not be specified if the value is a str") + if (len(value) > 0 and value[0] == 'P') or ( + len(value) > 1 and value[:2] == '-P' + ): + value = parse_iso_format_string(value) + else: + value = parse_timedelta_string(value) + value = np.timedelta64(value) + elif PyDelta_Check(value): + value = convert_to_timedelta64(value, 'ns') + elif is_timedelta64_object(value): + value = ensure_td64ns(value) + elif is_tick_object(value): + value = np.timedelta64(value.nanos, 'ns') + elif is_integer_object(value) or is_float_object(value): + # unit=None is de-facto 'ns' + unit = parse_timedelta_unit(unit) + value = convert_to_timedelta64(value, unit) + elif checknull_with_nat(value): + return NaT else: - value = parse_timedelta_string(value) - value = np.timedelta64(value) - elif PyDelta_Check(value): - value = convert_to_timedelta64(value, 'ns') - elif is_timedelta64_object(value): - value = ensure_td64ns(value) - elif is_tick_object(value): - value = np.timedelta64(value.nanos, 'ns') - elif is_integer_object(value) or is_float_object(value): - # unit=None is de-facto 'ns' - unit = parse_timedelta_unit(unit) - value = convert_to_timedelta64(value, unit) - elif checknull_with_nat(value): - return NaT - else: - raise ValueError( - "Value must be Timedelta, string, integer, " - f"float, timedelta or convertible, not {type(value).__name__}" - ) + raise ValueError( + "Value must be Timedelta, string, integer, " + f"float, timedelta or convertible, not {type(value).__name__}" + ) - if is_timedelta64_object(value): - value = value.view('i8') + if is_timedelta64_object(value): + value = value.view('i8') - # nat - if value == NPY_NAT: - return NaT + # nat + if value == NPY_NAT: + return NaT + + except OverflowError as ex: + msg = f"outside allowed range [{TIMEDELTA_MIN_NS}ns, {TIMEDELTA_MAX_NS}ns]" + raise OutOfBoundsTimedelta(msg) from ex return _timedelta_from_value_and_reso(value, NPY_FR_ns) @@ -1824,6 +1843,6 @@ cdef _broadcast_floordiv_td64( # resolution in ns -Timedelta.min = Timedelta(np.iinfo(np.int64).min + 1) -Timedelta.max = Timedelta(np.iinfo(np.int64).max) +Timedelta.min = Timedelta(TIMEDELTA_MIN_NS) +Timedelta.max = Timedelta(TIMEDELTA_MAX_NS) Timedelta.resolution = Timedelta(nanoseconds=1) diff --git a/pandas/tests/scalar/timedelta/conftest.py b/pandas/tests/scalar/timedelta/conftest.py new file mode 100644 index 0000000000000..cdc792b79b669 --- /dev/null +++ b/pandas/tests/scalar/timedelta/conftest.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +import re + +import pytest + +from pandas._libs.tslibs import OutOfBoundsTimedelta + + +@pytest.fixture() +def timedelta_overflow() -> dict: + """ + The expected message and exception when Timedelta ops overflow. + """ + msg = re.escape( + "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" + ) + return {"expected_exception": OutOfBoundsTimedelta, "match": msg} diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 74aa7f045088e..ba7b641e08b21 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -10,8 +10,6 @@ import numpy as np import pytest -from pandas.errors import OutOfBoundsTimedelta - import pandas as pd from pandas import ( NaT, @@ -98,12 +96,11 @@ def test_td_add_datetimelike_scalar(self, op): result = op(td, NaT) assert result is NaT - def test_td_add_timestamp_overflow(self): - msg = "int too (large|big) to convert" - with pytest.raises(OverflowError, match=msg): + def test_td_add_timestamp_overflow(self, timedelta_overflow): + with pytest.raises(**timedelta_overflow): Timestamp("1700-01-01") + Timedelta(13 * 19999, unit="D") - with pytest.raises(OutOfBoundsTimedelta, match=msg): + with pytest.raises(**timedelta_overflow): Timestamp("1700-01-01") + timedelta(days=13 * 19999) @pytest.mark.parametrize("op", [operator.add, ops.radd]) diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index 59608c736ee7a..a89cd3f768432 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -8,8 +8,6 @@ import numpy as np import pytest -from pandas._libs.tslibs import OutOfBoundsTimedelta - from pandas import ( NA, NaT, @@ -165,7 +163,7 @@ def test_construction(): @pytest.mark.parametrize("unit", ("ps", "ns")) -def test_from_np_td64_ignores_unit(unit: str): +def test_from_np_td64_ignores_unit(unit: str, timedelta_overflow): """ Ignore the unit, as it may cause silently overflows leading to incorrect results, and in non-overflow cases is irrelevant GH#46827. @@ -174,7 +172,7 @@ def test_from_np_td64_ignores_unit(unit: str): assert Timedelta(td64, unit=unit) == Timedelta(td64) - with pytest.raises(OutOfBoundsTimedelta, match=f"{td64 * 2}"): + with pytest.raises(**timedelta_overflow): Timedelta(td64 * 2, unit=unit) @@ -449,18 +447,14 @@ def test_raises_if_ambiguous_units_passed(self, unit: str): class TestOverflow: - - msg = "|".join( - ( - "Python int too large to convert to C long", - "int too big to convert", - *TD_KWARGS_UNITS.keys(), - ) - ) - errors = (OverflowError, OutOfBoundsTimedelta) - @pytest.mark.parametrize(("unit", "max_val"), TD_MAX_PER_UNIT.items()) - def test_int_plus_units_too_big(self, unit: str, max_val: int, request): + def test_int_plus_units_too_big( + self, + unit: str, + max_val: int, + request, + timedelta_overflow, + ): if unit == "w": mark = pytest.mark.xfail( reason="does not raise", @@ -471,11 +465,17 @@ def test_int_plus_units_too_big(self, unit: str, max_val: int, request): too_big = max_val + 1 - with pytest.raises(self.errors, match=self.msg): + with pytest.raises(**timedelta_overflow): Timedelta(too_big, unit=unit) @pytest.mark.parametrize(("unit", "min_val"), skip_ns(TD_MIN_PER_UNIT).items()) - def test_int_plus_units_too_small(self, unit: str, min_val: int, request): + def test_int_plus_units_too_small( + self, + unit: str, + min_val: int, + request, + timedelta_overflow, + ): if unit == "w": mark = pytest.mark.xfail( reason="does not raise", @@ -486,49 +486,54 @@ def test_int_plus_units_too_small(self, unit: str, min_val: int, request): too_small = min_val - 1 - with pytest.raises(self.errors, match=self.msg): + with pytest.raises(**timedelta_overflow): Timedelta(too_small, unit=unit) @pytest.mark.parametrize(("kwarg", "max_val"), TD_MAX_PER_KWARG.items()) - def test_kwarg_too_big(self, kwarg: str, max_val: int): + def test_kwarg_too_big(self, kwarg: str, max_val: int, timedelta_overflow): too_big = max_val + 1 - with pytest.raises(self.errors, match=self.msg): + with pytest.raises(**timedelta_overflow): assert Timedelta(**{kwarg: too_big}) # type: ignore[arg-type] @pytest.mark.parametrize(("kwarg", "min_val"), skip_ns(TD_MIN_PER_KWARG).items()) - def test_kwarg_too_small(self, kwarg: str, min_val: int): + def test_kwarg_too_small(self, kwarg: str, min_val: int, timedelta_overflow): too_small = min_val - 1 - with pytest.raises(self.errors, match=self.msg): + with pytest.raises(**timedelta_overflow): Timedelta(**{kwarg: too_small}) # type: ignore[arg-type] @pytest.mark.parametrize(("kwarg", "max_val"), skip_ns(TD_MAX_PER_KWARG).items()) - def test_from_timedelta_too_big(self, kwarg: str, max_val: int): + def test_from_timedelta_too_big(self, kwarg: str, max_val: int, timedelta_overflow): too_big = timedelta(**{kwarg: max_val + 1}) - with pytest.raises(self.errors, match=self.msg): + with pytest.raises(**timedelta_overflow): Timedelta(too_big) @pytest.mark.parametrize(("kwarg", "min_val"), skip_ns(TD_MIN_PER_KWARG).items()) - def test_from_timedelta_too_small(self, kwarg: str, min_val: int): + def test_from_timedelta_too_small( + self, + kwarg: str, + min_val: int, + timedelta_overflow, + ): too_small = timedelta(**{kwarg: min_val - 1}) - with pytest.raises(self.errors, match=self.msg): + with pytest.raises(**timedelta_overflow): Timedelta(too_small) @pytest.mark.parametrize(("unit", "max_val"), skip_ns(NP_TD64_MAX_PER_UNIT).items()) - def test_from_np_td64_too_big(self, unit: str, max_val: int): + def test_from_np_td64_too_big(self, unit: str, max_val: int, timedelta_overflow): too_big = np.timedelta64(max_val + 1, unit) - with pytest.raises(self.errors, match=self.msg): + with pytest.raises(**timedelta_overflow): Timedelta(too_big) @pytest.mark.parametrize(("unit", "min_val"), skip_ns(NP_TD64_MIN_PER_UNIT).items()) - def test_from_np_td64_too_small(self, unit: str, min_val: int): + def test_from_np_td64_too_small(self, unit: str, min_val: int, timedelta_overflow): too_small = np.timedelta64(min_val - 1, unit) - with pytest.raises(self.errors, match=self.msg): + with pytest.raises(**timedelta_overflow): Timedelta(too_small) def test_too_small_by_1ns_returns_nat(self): diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index cf7211e82b799..f250c479826be 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -646,7 +646,7 @@ def test_timedelta_hash_equality(self): ns_td = Timedelta(1, "ns") assert hash(ns_td) != hash(ns_td.to_pytimedelta()) - def test_implementation_limits(self): + def test_implementation_limits(self, timedelta_overflow): min_td = Timedelta(Timedelta.min) max_td = Timedelta(Timedelta.max) @@ -658,21 +658,20 @@ def test_implementation_limits(self): # Beyond lower limit, a NAT before the Overflow assert (min_td - Timedelta(1, "ns")) is NaT - msg = "int too (large|big) to convert" - with pytest.raises(OverflowError, match=msg): + with pytest.raises(**timedelta_overflow): min_td - Timedelta(2, "ns") - with pytest.raises(OverflowError, match=msg): + with pytest.raises(**timedelta_overflow): max_td + Timedelta(1, "ns") # Same tests using the internal nanosecond values td = Timedelta(min_td.value - 1, "ns") assert td is NaT - with pytest.raises(OverflowError, match=msg): + with pytest.raises(**timedelta_overflow): Timedelta(min_td.value - 2, "ns") - with pytest.raises(OverflowError, match=msg): + with pytest.raises(**timedelta_overflow): Timedelta(max_td.value + 1, "ns") def test_total_seconds_precision(self): From 51eb1513c7a2260b67685439059403dd8eacc47f Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Wed, 4 May 2022 12:52:18 -0700 Subject: [PATCH 04/18] catch correct error in Timedelta sub --- pandas/_libs/tslibs/timedeltas.pyi | 1 - pandas/_libs/tslibs/timedeltas.pyx | 18 ++++++++++++++---- pandas/_libs/tslibs/timestamps.pyx | 17 ++++++++--------- .../tests/scalar/{timedelta => }/conftest.py | 0 .../tests/scalar/timestamp/test_arithmetic.py | 15 ++++++++++----- 5 files changed, 32 insertions(+), 19 deletions(-) rename pandas/tests/scalar/{timedelta => }/conftest.py (100%) diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi index 6dc9de817ad9a..a04104915cf1f 100644 --- a/pandas/_libs/tslibs/timedeltas.pyi +++ b/pandas/_libs/tslibs/timedeltas.pyi @@ -73,7 +73,6 @@ def array_to_timedelta64( ) -> np.ndarray: ... # np.ndarray[m8ns] def parse_timedelta_unit(unit: str | None) -> UnitChoices: ... def delta_to_nanoseconds(delta: np.timedelta64 | timedelta | Tick) -> int: ... -def calculate(op, left: int, right: int) -> int: ... class Timedelta(timedelta): min: ClassVar[Timedelta] diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 5f227fb02eedb..a8fa138bdb1ff 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -684,13 +684,23 @@ def _op_unary_method(func, name): return f -cpdef int64_t calculate(object op, int64_t a, int64_t b) except? -1: +@cython.overflowcheck(True) +cpdef int64_t _calculate(object op, int64_t a, int64_t b) except? -1: + """ + Calculate op(a, b) and return the result. Raises OverflowError if either operand + or the result would overflow on conversion to int64_t. + """ + return op(a, b) + + +cpdef int64_t calculate(object op, object a, object b) except? -1: """ - Calculate op(a, b) and return the result, or raise if the operation would overflow. + As above, but raises an OutOfBoundsTimedelta. """ + cdef int64_t int_a, int_b + try: - with cython.overflowcheck(True): - return op(a, b) + return _calculate(op, a, b) except OverflowError as ex: msg = f"outside allowed range [{TIMEDELTA_MIN_NS}ns, {TIMEDELTA_MAX_NS}ns]" raise OutOfBoundsTimedelta(msg) from ex diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index db951027e5794..02d2f6e7ebb58 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -86,7 +86,7 @@ from pandas._libs.tslibs.np_datetime cimport ( pydatetime_to_dt64, ) -from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime +from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime, OutOfBoundsTimedelta from pandas._libs.tslibs.offsets cimport ( BaseOffset, @@ -431,14 +431,13 @@ cdef class _Timestamp(ABCTimestamp): # Timedelta try: return Timedelta(self.value - other.value) - except (OverflowError, OutOfBoundsDatetime) as err: - if isinstance(other, _Timestamp): - if both_timestamps: - raise OutOfBoundsDatetime( - "Result is too large for pandas.Timedelta. Convert inputs " - "to datetime.datetime with 'Timestamp.to_pydatetime()' " - "before subtracting." - ) from err + except OutOfBoundsTimedelta as err: + if both_timestamps: + raise OutOfBoundsTimedelta( + "Result is too large for pandas.Timedelta. Convert inputs " + "to datetime.datetime with 'Timestamp.to_pydatetime()' " + "before subtracting." + ) from err # We get here in stata tests, fall back to stdlib datetime # method and return stdlib timedelta object pass diff --git a/pandas/tests/scalar/timedelta/conftest.py b/pandas/tests/scalar/conftest.py similarity index 100% rename from pandas/tests/scalar/timedelta/conftest.py rename to pandas/tests/scalar/conftest.py diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py index b46962fb82896..bda0d934227b5 100644 --- a/pandas/tests/scalar/timestamp/test_arithmetic.py +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -72,17 +72,22 @@ def test_overflow_offset_raises(self): with pytest.raises(OverflowError, match=lmsg): stamp - offset_overflow - def test_overflow_timestamp_raises(self): + def test_sub_can_return_stdlib_timedelta_to_avoid_overflow(self, timedelta_overflow): # https://github.com/pandas-dev/pandas/issues/31774 - msg = "Result is too large" + msg = "Result is too large for pandas.Timedelta" a = Timestamp("2101-01-01 00:00:00") b = Timestamp("1688-01-01 00:00:00") - with pytest.raises(OutOfBoundsDatetime, match=msg): + with pytest.raises(timedelta_overflow["expected_exception"], match=msg): a - b - # but we're OK for timestamp and datetime.datetime - assert (a - b.to_pydatetime()) == (a.to_pydatetime() - b) + # but we're OK for Timestamp and datetime.datetime + r0 = a - b.to_pydatetime() + r1 = a.to_pydatetime() - b + assert r0 == r1 + assert isinstance(r0, timedelta) + assert isinstance(r1, timedelta) + def test_delta_preserve_nanos(self): val = Timestamp(1337299200000000123) From 36d37dcc15bb59dbb3294ae9f8c90c8cfccc7c5e Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Wed, 4 May 2022 15:52:28 -0700 Subject: [PATCH 05/18] extract overflow-checking wrapper fcn, add tests --- pandas/_libs/ops.pxd | 4 +++ pandas/_libs/ops.pyi | 1 + pandas/_libs/ops.pyx | 10 ++++++ pandas/_libs/tslibs/timedeltas.pyx | 17 +++------- pandas/tests/libs/test_ops.py | 53 ++++++++++++++++++++++++++++++ 5 files changed, 72 insertions(+), 13 deletions(-) create mode 100644 pandas/_libs/ops.pxd create mode 100644 pandas/tests/libs/test_ops.py diff --git a/pandas/_libs/ops.pxd b/pandas/_libs/ops.pxd new file mode 100644 index 0000000000000..d63c1169a0c46 --- /dev/null +++ b/pandas/_libs/ops.pxd @@ -0,0 +1,4 @@ +from numpy cimport int64_t + + +cpdef int64_t calculate(object op, int64_t a, int64_t b) except? -1 diff --git a/pandas/_libs/ops.pyi b/pandas/_libs/ops.pyi index 74a6ad87cd279..2ed29a230c955 100644 --- a/pandas/_libs/ops.pyi +++ b/pandas/_libs/ops.pyi @@ -48,3 +48,4 @@ def maybe_convert_bool( *, convert_to_masked_nullable: Literal[True], ) -> tuple[np.ndarray, np.ndarray]: ... +def calculate(op, left: int, right: int) -> int: ... diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index 308756e378dde..cf3f582e767e3 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -16,6 +16,7 @@ import numpy as np from numpy cimport ( import_array, + int64_t, ndarray, uint8_t, ) @@ -308,3 +309,12 @@ def maybe_convert_bool(ndarray[object] arr, return (arr, None) else: return (result.view(np.bool_), None) + + +@cython.overflowcheck(True) +cpdef int64_t calculate(object op, int64_t a, int64_t b) except? -1: + """ + Calculate op(a, b) and return the result. Raises OverflowError if converting either + operand or the result to an int64_t would overflow. + """ + return op(a, b) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index a8fa138bdb1ff..eff53b25b4ad4 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -29,6 +29,7 @@ from cpython.datetime cimport ( import_datetime() +from pandas._libs cimport ops cimport pandas._libs.tslibs.util as util from pandas._libs.tslibs.base cimport ABCTimestamp from pandas._libs.tslibs.conversion cimport ( @@ -684,23 +685,13 @@ def _op_unary_method(func, name): return f -@cython.overflowcheck(True) -cpdef int64_t _calculate(object op, int64_t a, int64_t b) except? -1: - """ - Calculate op(a, b) and return the result. Raises OverflowError if either operand - or the result would overflow on conversion to int64_t. - """ - return op(a, b) - - cpdef int64_t calculate(object op, object a, object b) except? -1: """ - As above, but raises an OutOfBoundsTimedelta. + Calculate op(a, b), raising if either operand or the resulting value cannot be + safely cast to an int64_t. """ - cdef int64_t int_a, int_b - try: - return _calculate(op, a, b) + return ops.calculate(op, a, b) except OverflowError as ex: msg = f"outside allowed range [{TIMEDELTA_MIN_NS}ns, {TIMEDELTA_MAX_NS}ns]" raise OutOfBoundsTimedelta(msg) from ex diff --git a/pandas/tests/libs/test_ops.py b/pandas/tests/libs/test_ops.py new file mode 100644 index 0000000000000..50c2b9db86750 --- /dev/null +++ b/pandas/tests/libs/test_ops.py @@ -0,0 +1,53 @@ +import operator + +import numpy as np +import pytest + +from pandas._libs import ops + + +@pytest.fixture(name="int_max") +def fixture_int_max() -> int: + return np.iinfo(np.int64).max + + +@pytest.fixture(name="int_min") +def fixture_int_min() -> int: + return np.iinfo(np.int64).min + + +@pytest.fixture(name="overflow_msg") +def fixture_overflow_msg() -> str: + return "Python int too large to convert to C long" + + +def test_raises_for_too_large_arg(int_max: int, overflow_msg: str): + with pytest.raises(OverflowError, match=overflow_msg): + ops.calculate(operator.add, int_max + 1, 1) + + with pytest.raises(OverflowError, match=overflow_msg): + ops.calculate(operator.add, 1, int_max + 1) + + +def test_raises_for_too_small_arg(int_min: int, overflow_msg: str): + with pytest.raises(OverflowError, match=overflow_msg): + ops.calculate(operator.add, int_min - 1, 1) + + with pytest.raises(OverflowError, match=overflow_msg): + ops.calculate(operator.add, 1, int_min - 1) + + +def test_raises_for_too_large_result(int_max: int, overflow_msg: str): + with pytest.raises(OverflowError, match=overflow_msg): + ops.calculate(operator.add, int_max, 1) + + with pytest.raises(OverflowError, match=overflow_msg): + ops.calculate(operator.add, 1, int_max) + + +def test_raises_for_too_small_result(int_min: int, overflow_msg: str): + with pytest.raises(OverflowError, match=overflow_msg): + ops.calculate(operator.sub, int_min, 1) + + with pytest.raises(OverflowError, match=overflow_msg): + ops.calculate(operator.sub, 1, int_min) From d53d1ce0a606dddc2161c07c9adf6c32cf098485 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Wed, 4 May 2022 17:21:02 -0700 Subject: [PATCH 06/18] simplify Timedelta ctor, fix tests --- pandas/_libs/tslibs/timedeltas.pyx | 133 +++++++----------- .../scalar/timedelta/test_constructors.py | 4 +- .../tests/scalar/timestamp/test_arithmetic.py | 3 +- pandas/tests/tools/test_to_datetime.py | 8 +- 4 files changed, 53 insertions(+), 95 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index eff53b25b4ad4..72b990b4d73e5 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -42,6 +42,7 @@ from pandas._libs.tslibs.nattype cimport ( c_NaT as NaT, c_nat_strings as nat_strings, checknull_with_nat, + is_td64nat, ) from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, @@ -1452,104 +1453,68 @@ class Timedelta(_Timedelta): We see that either way we get the same result """ - _req_any_kwargs_new = {"weeks", "days", "hours", "minutes", "seconds", - "milliseconds", "microseconds", "nanoseconds"} + _allowed_kwargs = ( + "weeks", "days", "hours", "minutes", "seconds", "milliseconds", "microseconds", "nanoseconds" + ) def __new__(cls, object value=_no_input, unit=None, **kwargs): cdef _Timedelta td_base - try: - if value is _no_input: - if not len(kwargs): - raise ValueError("cannot construct a Timedelta without a " - "value/unit or descriptive keywords " - "(days,seconds....)") - - kwargs = {key: _to_py_int_float(kwargs[key]) for key in kwargs} - - unsupported_kwargs = set(kwargs) - unsupported_kwargs.difference_update(cls._req_any_kwargs_new) - if unsupported_kwargs or not cls._req_any_kwargs_new.intersection(kwargs): - raise ValueError( - "cannot construct a Timedelta from the passed arguments, " - "allowed keywords are " - "[weeks, days, hours, minutes, seconds, " - "milliseconds, microseconds, nanoseconds]" - ) - - # GH43764, convert any input to nanoseconds first and then - # create the timestamp. This ensures that any potential - # nanosecond contributions from kwargs parsed as floats - # are taken into consideration. - seconds = int(( - ( - (kwargs.get('days', 0) + kwargs.get('weeks', 0) * 7) * 24 - + kwargs.get('hours', 0) - ) * 3600 - + kwargs.get('minutes', 0) * 60 - + kwargs.get('seconds', 0) - ) * 1_000_000_000 - ) + if isinstance(value, _Timedelta): + return value + if checknull_with_nat(value): + return NaT - value = np.timedelta64( - int(kwargs.get('nanoseconds', 0)) - + int(kwargs.get('microseconds', 0) * 1_000) - + int(kwargs.get('milliseconds', 0) * 1_000_000) - + seconds + if unit in {"Y", "y", "M"}: + raise ValueError( + "Units 'M', 'Y', and 'y' are no longer supported, as they do not " + "represent unambiguous timedelta values durations." + ) + if isinstance(value, str) and unit is not None: + raise ValueError("unit must not be specified if the value is a str") + elif value is _no_input: + if not kwargs: + raise ValueError( + "cannot construct a Timedelta without a value/unit " + "or descriptive keywords (days,seconds....)" ) - - if unit in {'Y', 'y', 'M'}: + if not kwargs.keys() <= set(cls._allowed_kwargs): raise ValueError( - "Units 'M', 'Y', and 'y' are no longer supported, as they do not " - "represent unambiguous timedelta values durations." + "cannot construct a Timedelta from the passed arguments, " + f"allowed keywords are {cls._allowed_kwargs}" ) - # GH 30543 if pd.Timedelta already passed, return it - # check that only value is passed - if isinstance(value, _Timedelta) and unit is None and len(kwargs) == 0: - return value - elif isinstance(value, _Timedelta): - value = value.value - elif isinstance(value, str): - if unit is not None: - raise ValueError("unit must not be specified if the value is a str") - if (len(value) > 0 and value[0] == 'P') or ( - len(value) > 1 and value[:2] == '-P' - ): - value = parse_iso_format_string(value) + try: + # GH43764, convert any input to nanoseconds first, to ensure any potential + # nanosecond contributions from kwargs parsed as floats are included + kwargs = collections.defaultdict(int, {key: _to_py_int_float(val) for key, val in kwargs.items()}) + if kwargs: + value = convert_to_timedelta64( + sum(( + kwargs["weeks"] * 7 * 24 * 3600 * 1_000_000_000, + kwargs["days"] * 24 * 3600 * 1_000_000_000, + kwargs["hours"] * 3600 * 1_000_000_000, + kwargs["minutes"] * 60 * 1_000_000_000, + kwargs["seconds"] * 1_000_000_000, + kwargs["milliseconds"] * 1_000_000, + kwargs["microseconds"] * 1_000, + kwargs["nanoseconds"], + )), + "ns", + ) + else: + if is_integer_object(value) or is_float_object(value): + unit = parse_timedelta_unit(unit) else: - value = parse_timedelta_string(value) - value = np.timedelta64(value) - elif PyDelta_Check(value): - value = convert_to_timedelta64(value, 'ns') - elif is_timedelta64_object(value): - value = ensure_td64ns(value) - elif is_tick_object(value): - value = np.timedelta64(value.nanos, 'ns') - elif is_integer_object(value) or is_float_object(value): - # unit=None is de-facto 'ns' - unit = parse_timedelta_unit(unit) + unit = "ns" value = convert_to_timedelta64(value, unit) - elif checknull_with_nat(value): - return NaT - else: - raise ValueError( - "Value must be Timedelta, string, integer, " - f"float, timedelta or convertible, not {type(value).__name__}" - ) - - if is_timedelta64_object(value): - value = value.view('i8') - - # nat - if value == NPY_NAT: - return NaT - except OverflowError as ex: msg = f"outside allowed range [{TIMEDELTA_MIN_NS}ns, {TIMEDELTA_MAX_NS}ns]" raise OutOfBoundsTimedelta(msg) from ex - - return _timedelta_from_value_and_reso(value, NPY_FR_ns) + else: + if is_td64nat(value): + return NaT + return _timedelta_from_value_and_reso(value.view("i8"), NPY_FR_ns) def __setstate__(self, state): if len(state) == 1: diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index a89cd3f768432..cf2ef6975b88a 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -343,8 +343,8 @@ def test_raises_if_no_args_passed(self): def test_raises_for_invalid_kwarg(self, unit: str): msg = re.escape( "cannot construct a Timedelta from the passed arguments, allowed keywords " - "are [weeks, days, hours, minutes, seconds, milliseconds, " - "microseconds, nanoseconds]" + "are ('weeks', 'days', 'hours', 'minutes', 'seconds', 'milliseconds', " + "'microseconds', 'nanoseconds')" ) with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py index bda0d934227b5..0c04427c52537 100644 --- a/pandas/tests/scalar/timestamp/test_arithmetic.py +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -72,7 +72,7 @@ def test_overflow_offset_raises(self): with pytest.raises(OverflowError, match=lmsg): stamp - offset_overflow - def test_sub_can_return_stdlib_timedelta_to_avoid_overflow(self, timedelta_overflow): + def test_sub_returns_stdlib_timedelta_to_avoid_overflow(self, timedelta_overflow): # https://github.com/pandas-dev/pandas/issues/31774 msg = "Result is too large for pandas.Timedelta" a = Timestamp("2101-01-01 00:00:00") @@ -88,7 +88,6 @@ def test_sub_can_return_stdlib_timedelta_to_avoid_overflow(self, timedelta_overf assert isinstance(r0, timedelta) assert isinstance(r1, timedelta) - def test_delta_preserve_nanos(self): val = Timestamp(1337299200000000123) result = val + timedelta(1) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 7597d4345cfce..c04cd8071e477 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1838,14 +1838,8 @@ def test_to_datetime_list_of_integers(self): def test_to_datetime_overflow(self): # gh-17637 # we are overflowing Timedelta range here + msg = "outside allowed range" - msg = "|".join( - [ - "Python int too large to convert to C long", - "long too big to convert", - "int too big to convert", - ] - ) with pytest.raises(OutOfBoundsTimedelta, match=msg): date_range(start="1/1/1700", freq="B", periods=100000) From d995243102d3157728ef0d2e76f0670a7f97e317 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Wed, 4 May 2022 20:48:29 -0700 Subject: [PATCH 07/18] test, linter fixes --- pandas/_libs/tslibs/timedeltas.pyx | 2 +- pandas/_libs/tslibs/timestamps.pyx | 5 +- pandas/tests/libs/test_ops.py | 7 +- pandas/tests/scalar/conftest.py | 18 ---- .../tests/scalar/timedelta/test_arithmetic.py | 12 ++- .../scalar/timedelta/test_constructors.py | 82 +++++++++++-------- .../tests/scalar/timedelta/test_timedelta.py | 15 ++-- .../tests/scalar/timestamp/test_arithmetic.py | 6 +- pandas/tests/tools/test_to_timedelta.py | 9 +- pandas/tests/tslibs/test_timedeltas.py | 15 ---- 10 files changed, 82 insertions(+), 89 deletions(-) delete mode 100644 pandas/tests/scalar/conftest.py diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 72b990b4d73e5..f1b2435a13111 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -29,8 +29,8 @@ from cpython.datetime cimport ( import_datetime() -from pandas._libs cimport ops cimport pandas._libs.tslibs.util as util +from pandas._libs cimport ops from pandas._libs.tslibs.base cimport ABCTimestamp from pandas._libs.tslibs.conversion cimport ( cast_from_unit, diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 02d2f6e7ebb58..5b9fbae19e543 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -86,7 +86,10 @@ from pandas._libs.tslibs.np_datetime cimport ( pydatetime_to_dt64, ) -from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime, OutOfBoundsTimedelta +from pandas._libs.tslibs.np_datetime import ( + OutOfBoundsDatetime, + OutOfBoundsTimedelta, +) from pandas._libs.tslibs.offsets cimport ( BaseOffset, diff --git a/pandas/tests/libs/test_ops.py b/pandas/tests/libs/test_ops.py index 50c2b9db86750..7e152a928ae63 100644 --- a/pandas/tests/libs/test_ops.py +++ b/pandas/tests/libs/test_ops.py @@ -18,7 +18,12 @@ def fixture_int_min() -> int: @pytest.fixture(name="overflow_msg") def fixture_overflow_msg() -> str: - return "Python int too large to convert to C long" + return "|".join( + ( + "Python int too large to convert to C long", + "int too big to convert", + ) + ) def test_raises_for_too_large_arg(int_max: int, overflow_msg: str): diff --git a/pandas/tests/scalar/conftest.py b/pandas/tests/scalar/conftest.py deleted file mode 100644 index cdc792b79b669..0000000000000 --- a/pandas/tests/scalar/conftest.py +++ /dev/null @@ -1,18 +0,0 @@ -from __future__ import annotations - -import re - -import pytest - -from pandas._libs.tslibs import OutOfBoundsTimedelta - - -@pytest.fixture() -def timedelta_overflow() -> dict: - """ - The expected message and exception when Timedelta ops overflow. - """ - msg = re.escape( - "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" - ) - return {"expected_exception": OutOfBoundsTimedelta, "match": msg} diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index ba7b641e08b21..77dbc133deb3b 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -6,10 +6,13 @@ timedelta, ) import operator +import re import numpy as np import pytest +from pandas._libs.tslibs import OutOfBoundsTimedelta + import pandas as pd from pandas import ( NaT, @@ -96,11 +99,14 @@ def test_td_add_datetimelike_scalar(self, op): result = op(td, NaT) assert result is NaT - def test_td_add_timestamp_overflow(self, timedelta_overflow): - with pytest.raises(**timedelta_overflow): + def test_td_add_timestamp_overflow(self): + msg = msg = re.escape( + "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" + ) + with pytest.raises(OutOfBoundsTimedelta, match=msg): Timestamp("1700-01-01") + Timedelta(13 * 19999, unit="D") - with pytest.raises(**timedelta_overflow): + with pytest.raises(OutOfBoundsTimedelta, match=msg): Timestamp("1700-01-01") + timedelta(days=13 * 19999) @pytest.mark.parametrize("op", [operator.add, ops.radd]) diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index cf2ef6975b88a..38e1e294efe69 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -8,6 +8,8 @@ import numpy as np import pytest +from pandas._libs.tslibs import OutOfBoundsTimedelta + from pandas import ( NA, NaT, @@ -163,16 +165,19 @@ def test_construction(): @pytest.mark.parametrize("unit", ("ps", "ns")) -def test_from_np_td64_ignores_unit(unit: str, timedelta_overflow): +def test_from_np_td64_ignores_unit(unit: str): """ Ignore the unit, as it may cause silently overflows leading to incorrect results, and in non-overflow cases is irrelevant GH#46827. """ td64 = np.timedelta64(NP_TD64_MAX_PER_UNIT["h"], "h") + msg = re.escape( + "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" + ) assert Timedelta(td64, unit=unit) == Timedelta(td64) - with pytest.raises(**timedelta_overflow): + with pytest.raises(OutOfBoundsTimedelta, match=msg): Timedelta(td64 * 2, unit=unit) @@ -448,13 +453,7 @@ def test_raises_if_ambiguous_units_passed(self, unit: str): class TestOverflow: @pytest.mark.parametrize(("unit", "max_val"), TD_MAX_PER_UNIT.items()) - def test_int_plus_units_too_big( - self, - unit: str, - max_val: int, - request, - timedelta_overflow, - ): + def test_int_plus_units_too_big(self, unit: str, max_val: int, request): if unit == "w": mark = pytest.mark.xfail( reason="does not raise", @@ -464,18 +463,15 @@ def test_int_plus_units_too_big( request.node.add_marker(mark) too_big = max_val + 1 + msg = re.escape( + "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" + ) - with pytest.raises(**timedelta_overflow): + with pytest.raises(OutOfBoundsTimedelta, match=msg): Timedelta(too_big, unit=unit) @pytest.mark.parametrize(("unit", "min_val"), skip_ns(TD_MIN_PER_UNIT).items()) - def test_int_plus_units_too_small( - self, - unit: str, - min_val: int, - request, - timedelta_overflow, - ): + def test_int_plus_units_too_small(self, unit: str, min_val: int, request): if unit == "w": mark = pytest.mark.xfail( reason="does not raise", @@ -485,55 +481,71 @@ def test_int_plus_units_too_small( request.node.add_marker(mark) too_small = min_val - 1 + msg = re.escape( + "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" + ) - with pytest.raises(**timedelta_overflow): + with pytest.raises(OutOfBoundsTimedelta, match=msg): Timedelta(too_small, unit=unit) @pytest.mark.parametrize(("kwarg", "max_val"), TD_MAX_PER_KWARG.items()) - def test_kwarg_too_big(self, kwarg: str, max_val: int, timedelta_overflow): + def test_kwarg_too_big(self, kwarg: str, max_val: int): too_big = max_val + 1 + msg = re.escape( + "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" + ) - with pytest.raises(**timedelta_overflow): + with pytest.raises(OutOfBoundsTimedelta, match=msg): assert Timedelta(**{kwarg: too_big}) # type: ignore[arg-type] @pytest.mark.parametrize(("kwarg", "min_val"), skip_ns(TD_MIN_PER_KWARG).items()) - def test_kwarg_too_small(self, kwarg: str, min_val: int, timedelta_overflow): + def test_kwarg_too_small(self, kwarg: str, min_val: int): too_small = min_val - 1 + msg = re.escape( + "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" + ) - with pytest.raises(**timedelta_overflow): + with pytest.raises(OutOfBoundsTimedelta, match=msg): Timedelta(**{kwarg: too_small}) # type: ignore[arg-type] @pytest.mark.parametrize(("kwarg", "max_val"), skip_ns(TD_MAX_PER_KWARG).items()) - def test_from_timedelta_too_big(self, kwarg: str, max_val: int, timedelta_overflow): + def test_from_timedelta_too_big(self, kwarg: str, max_val: int): too_big = timedelta(**{kwarg: max_val + 1}) + msg = re.escape( + "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" + ) - with pytest.raises(**timedelta_overflow): + with pytest.raises(OutOfBoundsTimedelta, match=msg): Timedelta(too_big) @pytest.mark.parametrize(("kwarg", "min_val"), skip_ns(TD_MIN_PER_KWARG).items()) - def test_from_timedelta_too_small( - self, - kwarg: str, - min_val: int, - timedelta_overflow, - ): + def test_from_timedelta_too_small(self, kwarg: str, min_val: int): too_small = timedelta(**{kwarg: min_val - 1}) + msg = re.escape( + "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" + ) - with pytest.raises(**timedelta_overflow): + with pytest.raises(OutOfBoundsTimedelta, match=msg): Timedelta(too_small) @pytest.mark.parametrize(("unit", "max_val"), skip_ns(NP_TD64_MAX_PER_UNIT).items()) - def test_from_np_td64_too_big(self, unit: str, max_val: int, timedelta_overflow): + def test_from_np_td64_too_big(self, unit: str, max_val: int): too_big = np.timedelta64(max_val + 1, unit) + msg = re.escape( + "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" + ) - with pytest.raises(**timedelta_overflow): + with pytest.raises(OutOfBoundsTimedelta, match=msg): Timedelta(too_big) @pytest.mark.parametrize(("unit", "min_val"), skip_ns(NP_TD64_MIN_PER_UNIT).items()) - def test_from_np_td64_too_small(self, unit: str, min_val: int, timedelta_overflow): + def test_from_np_td64_too_small(self, unit: str, min_val: int): too_small = np.timedelta64(min_val - 1, unit) + msg = re.escape( + "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" + ) - with pytest.raises(**timedelta_overflow): + with pytest.raises(OutOfBoundsTimedelta, match=msg): Timedelta(too_small) def test_too_small_by_1ns_returns_nat(self): diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index f250c479826be..b1652ef6515af 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -1,5 +1,6 @@ """ test the scalar Timedelta """ from datetime import timedelta +import re from hypothesis import ( given, @@ -11,6 +12,7 @@ from pandas._libs import lib from pandas._libs.tslibs import ( NaT, + OutOfBoundsTimedelta, iNaT, ) @@ -646,7 +648,7 @@ def test_timedelta_hash_equality(self): ns_td = Timedelta(1, "ns") assert hash(ns_td) != hash(ns_td.to_pytimedelta()) - def test_implementation_limits(self, timedelta_overflow): + def test_implementation_limits(self): min_td = Timedelta(Timedelta.min) max_td = Timedelta(Timedelta.max) @@ -658,20 +660,23 @@ def test_implementation_limits(self, timedelta_overflow): # Beyond lower limit, a NAT before the Overflow assert (min_td - Timedelta(1, "ns")) is NaT - with pytest.raises(**timedelta_overflow): + msg = re.escape( + "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" + ) + with pytest.raises(OutOfBoundsTimedelta, match=msg): min_td - Timedelta(2, "ns") - with pytest.raises(**timedelta_overflow): + with pytest.raises(OutOfBoundsTimedelta, match=msg): max_td + Timedelta(1, "ns") # Same tests using the internal nanosecond values td = Timedelta(min_td.value - 1, "ns") assert td is NaT - with pytest.raises(**timedelta_overflow): + with pytest.raises(OutOfBoundsTimedelta, match=msg): Timedelta(min_td.value - 2, "ns") - with pytest.raises(**timedelta_overflow): + with pytest.raises(OutOfBoundsTimedelta, match=msg): Timedelta(max_td.value + 1, "ns") def test_total_seconds_precision(self): diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py index 0c04427c52537..be2d7a024154a 100644 --- a/pandas/tests/scalar/timestamp/test_arithmetic.py +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -8,7 +8,7 @@ import pytest from pandas._libs.tslibs import ( - OutOfBoundsDatetime, + OutOfBoundsTimedelta, Timedelta, Timestamp, offsets, @@ -72,13 +72,13 @@ def test_overflow_offset_raises(self): with pytest.raises(OverflowError, match=lmsg): stamp - offset_overflow - def test_sub_returns_stdlib_timedelta_to_avoid_overflow(self, timedelta_overflow): + def test_sub_returns_stdlib_timedelta_to_avoid_overflow(self): # https://github.com/pandas-dev/pandas/issues/31774 msg = "Result is too large for pandas.Timedelta" a = Timestamp("2101-01-01 00:00:00") b = Timestamp("1688-01-01 00:00:00") - with pytest.raises(timedelta_overflow["expected_exception"], match=msg): + with pytest.raises(OutOfBoundsTimedelta, match=msg): a - b # but we're OK for Timestamp and datetime.datetime diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index 6c11ec42858c0..0825fd1fb7778 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -109,9 +109,7 @@ def test_to_timedelta_invalid_unit(self, arg): def test_to_timedelta_time(self): # time not supported ATM - msg = ( - "Value must be Timedelta, string, integer, float, timedelta or convertible" - ) + msg = "Invalid type for timedelta scalar: " with pytest.raises(ValueError, match=msg): to_timedelta(time(second=1)) assert to_timedelta(time(second=1), errors="coerce") is pd.NaT @@ -264,10 +262,7 @@ def test_to_timedelta_zerodim(self, fixed_now_ts): dt64 = fixed_now_ts.to_datetime64() arg = np.array(dt64) - msg = ( - "Value must be Timedelta, string, integer, float, timedelta " - "or convertible, not datetime64" - ) + msg = "Invalid type for timedelta scalar: " with pytest.raises(ValueError, match=msg): to_timedelta(arg) diff --git a/pandas/tests/tslibs/test_timedeltas.py b/pandas/tests/tslibs/test_timedeltas.py index d9e86d53f2587..6a66eb5d950c8 100644 --- a/pandas/tests/tslibs/test_timedeltas.py +++ b/pandas/tests/tslibs/test_timedeltas.py @@ -61,21 +61,6 @@ def test_huge_nanoseconds_overflow(): assert delta_to_nanoseconds(Timedelta(nanoseconds=1e10)) == 1e10 -@pytest.mark.parametrize( - "kwargs", [{"Seconds": 1}, {"seconds": 1, "Nanoseconds": 1}, {"Foo": 2}] -) -def test_kwarg_assertion(kwargs): - err_message = ( - "cannot construct a Timedelta from the passed arguments, " - "allowed keywords are " - "[weeks, days, hours, minutes, seconds, " - "milliseconds, microseconds, nanoseconds]" - ) - - with pytest.raises(ValueError, match=re.escape(err_message)): - Timedelta(**kwargs) - - class TestArrayToTimedelta64: def test_array_to_timedelta64_string_with_unit_2d_raises(self): # check the 'unit is not None and errors != "coerce"' path From 424728666cc97410f3ab6abad96fe92a57d31e57 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Thu, 5 May 2022 14:18:08 -0700 Subject: [PATCH 08/18] add wrapper for overflow-checked int/float ops --- pandas/_libs/ops.pxd | 3 +- pandas/_libs/ops.pyi | 3 +- pandas/_libs/ops.pyx | 10 ++- pandas/tests/libs/test_ops.py | 133 ++++++++++++++++++++++++++++------ 4 files changed, 122 insertions(+), 27 deletions(-) diff --git a/pandas/_libs/ops.pxd b/pandas/_libs/ops.pxd index d63c1169a0c46..bf634f9a56e0e 100644 --- a/pandas/_libs/ops.pxd +++ b/pandas/_libs/ops.pxd @@ -1,4 +1,5 @@ from numpy cimport int64_t -cpdef int64_t calculate(object op, int64_t a, int64_t b) except? -1 +cpdef int64_t calc_int_int(object op, int64_t a, int64_t b) except? -1 +cpdef int64_t calc_int_float(object op, int64_t a, double b) except? -1 diff --git a/pandas/_libs/ops.pyi b/pandas/_libs/ops.pyi index 2ed29a230c955..5086ed860329a 100644 --- a/pandas/_libs/ops.pyi +++ b/pandas/_libs/ops.pyi @@ -48,4 +48,5 @@ def maybe_convert_bool( *, convert_to_masked_nullable: Literal[True], ) -> tuple[np.ndarray, np.ndarray]: ... -def calculate(op, left: int, right: int) -> int: ... +def calc_int_int(op, left: int, right: int) -> int: ... +def calc_int_float(op, left: int, right: float) -> int: ... diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index cf3f582e767e3..2abdee7c2b68e 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -312,9 +312,17 @@ def maybe_convert_bool(ndarray[object] arr, @cython.overflowcheck(True) -cpdef int64_t calculate(object op, int64_t a, int64_t b) except? -1: +cpdef int64_t calc_int_int(object op, int64_t a, int64_t b) except? -1: """ Calculate op(a, b) and return the result. Raises OverflowError if converting either operand or the result to an int64_t would overflow. """ return op(a, b) + +@cython.overflowcheck(True) +cpdef int64_t calc_int_float(object op, int64_t a, double b) except? -1: + """ + Calculate op(a, b) and return the result. Raises OverflowError if converting either + operand or the result would overflow. + """ + return op(a, b) diff --git a/pandas/tests/libs/test_ops.py b/pandas/tests/libs/test_ops.py index 7e152a928ae63..a334e06212800 100644 --- a/pandas/tests/libs/test_ops.py +++ b/pandas/tests/libs/test_ops.py @@ -6,17 +6,27 @@ from pandas._libs import ops -@pytest.fixture(name="int_max") +@pytest.fixture(name="int_max", scope="module") def fixture_int_max() -> int: return np.iinfo(np.int64).max -@pytest.fixture(name="int_min") +@pytest.fixture(name="int_min", scope="module") def fixture_int_min() -> int: return np.iinfo(np.int64).min -@pytest.fixture(name="overflow_msg") +@pytest.fixture(name="float_max", scope="module") +def fixture_float_max() -> int: + return np.finfo(np.float64).max + + +@pytest.fixture(name="float_min", scope="module") +def fixture_float_min() -> int: + return np.finfo(np.float64).min + + +@pytest.fixture(name="overflow_msg", scope="module") def fixture_overflow_msg() -> str: return "|".join( ( @@ -26,33 +36,108 @@ def fixture_overflow_msg() -> str: ) -def test_raises_for_too_large_arg(int_max: int, overflow_msg: str): - with pytest.raises(OverflowError, match=overflow_msg): - ops.calculate(operator.add, int_max + 1, 1) - - with pytest.raises(OverflowError, match=overflow_msg): - ops.calculate(operator.add, 1, int_max + 1) +class TestCalcIntInt: + def test_raises_for_too_large_arg(self, int_max: int, overflow_msg: str): + with pytest.raises(OverflowError, match=overflow_msg): + ops.calc_int_int(operator.add, int_max + 1, 1) + with pytest.raises(OverflowError, match=overflow_msg): + ops.calc_int_int(operator.add, 1, int_max + 1) -def test_raises_for_too_small_arg(int_min: int, overflow_msg: str): - with pytest.raises(OverflowError, match=overflow_msg): - ops.calculate(operator.add, int_min - 1, 1) + def test_raises_for_too_small_arg(self, int_min: int, overflow_msg: str): + with pytest.raises(OverflowError, match=overflow_msg): + ops.calc_int_int(operator.add, int_min - 1, 1) - with pytest.raises(OverflowError, match=overflow_msg): - ops.calculate(operator.add, 1, int_min - 1) + with pytest.raises(OverflowError, match=overflow_msg): + ops.calc_int_int(operator.add, 1, int_min - 1) + def test_raises_for_too_large_result(self, int_max: int, overflow_msg: str): + with pytest.raises(OverflowError, match=overflow_msg): + ops.calc_int_int(operator.add, int_max, 1) -def test_raises_for_too_large_result(int_max: int, overflow_msg: str): - with pytest.raises(OverflowError, match=overflow_msg): - ops.calculate(operator.add, int_max, 1) + with pytest.raises(OverflowError, match=overflow_msg): + ops.calc_int_int(operator.add, 1, int_max) - with pytest.raises(OverflowError, match=overflow_msg): - ops.calculate(operator.add, 1, int_max) + def test_raises_for_too_small_result(self, int_min: int, overflow_msg: str): + with pytest.raises(OverflowError, match=overflow_msg): + ops.calc_int_int(operator.sub, int_min, 1) + with pytest.raises(OverflowError, match=overflow_msg): + ops.calc_int_int(operator.sub, 1, int_min) -def test_raises_for_too_small_result(int_min: int, overflow_msg: str): - with pytest.raises(OverflowError, match=overflow_msg): - ops.calculate(operator.sub, int_min, 1) - with pytest.raises(OverflowError, match=overflow_msg): - ops.calculate(operator.sub, 1, int_min) +class TestCalcIntFloat: + @pytest.mark.parametrize( + "op,lval,rval,expected", + ( + (operator.add, 1, 1.0, 2), + (operator.sub, 2, 1.0, 1), + (operator.mul, 1, 2.0, 2), + (operator.truediv, 1, 0.5, 2), + ), + ids=("+", "-", "*", "/"), + ) + def test_arithmetic_ops(self, op, lval: int, rval: float, expected: int): + result = ops.calc_int_float(op, lval, rval) + + assert result == expected + assert isinstance(result, int) + + def test_raises_for_too_large_arg( + self, + int_max: int, + float_max: float, + overflow_msg: str, + ): + with pytest.raises(OverflowError, match=overflow_msg): + ops.calc_int_float(operator.add, int_max + 1, 1) + + with pytest.raises(OverflowError, match=overflow_msg): + ops.calc_int_float(operator.add, 1, float_max + 1) + + def test_raises_for_too_small_arg( + self, + int_min: int, + float_min: float, + overflow_msg: str, + ): + with pytest.raises(OverflowError, match=overflow_msg): + ops.calc_int_float(operator.add, int_min - 1, 1) + + with pytest.raises(OverflowError, match=overflow_msg): + ops.calc_int_float(operator.add, 1, float_min - 1) + + def test_raises_for_too_large_result( + self, + int_max: int, + float_max: float, + overflow_msg: str, + ): + with pytest.raises(OverflowError, match=overflow_msg): + ops.calc_int_float(operator.add, int_max, 1) + + with pytest.raises(OverflowError, match=overflow_msg): + ops.calc_int_float(operator.add, 1, float_max) + + @pytest.mark.parametrize( + "value", + ( + pytest.param( + 1024, + marks=pytest.mark.xfail( + reason="TBD", + raises=pytest.fail.Exception, + strict=True, + ), + ), + 1024.1, + ), + ) + def test_raises_for_most_too_small_results( + self, + value: float, + int_min: int, + overflow_msg: str, + ): + with pytest.raises(OverflowError, match=overflow_msg): + ops.calc_int_float(operator.sub, int_min, value) From 33203b3e75d8b1f58e9547a99944962d3d898e7e Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Thu, 5 May 2022 17:01:40 -0700 Subject: [PATCH 09/18] wip: lots more tests, consolidate overflow checking --- pandas/_libs/tslibs/timedeltas.pyx | 85 +- .../tests/scalar/timedelta/test_arithmetic.py | 1291 ++++++++--------- .../tests/scalar/timestamp/test_arithmetic.py | 11 +- 3 files changed, 666 insertions(+), 721 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index f1b2435a13111..7c9eb25e37182 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1,4 +1,5 @@ import collections +import operator import warnings cimport cython @@ -215,20 +216,16 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1: return get_timedelta64_value(ensure_td64ns(delta)) if PyDelta_Check(delta): - try: - return ( - delta.days * 24 * 3600 * 1_000_000 - + delta.seconds * 1_000_000 - + delta.microseconds - ) * 1000 - except OverflowError as err: - msg = f"{delta} outside allowed range [{TIMEDELTA_MIN_NS}ns, {TIMEDELTA_MAX_NS}ns]" - raise OutOfBoundsTimedelta(msg) from err + microseconds = ( + delta.days * 24 * 3600 * 1_000_000 + + delta.seconds * 1_000_000 + + delta.microseconds + ) + return calc_int_int(operator.mul, microseconds, 1000) raise TypeError(type(delta)) -@cython.overflowcheck(True) cdef object ensure_td64ns(object ts): """ Overflow-safe implementation of td64.astype("m8[ns]") @@ -247,25 +244,14 @@ cdef object ensure_td64ns(object ts): str unitstr td64_unit = get_datetime64_unit(ts) - if ( - td64_unit != NPY_DATETIMEUNIT.NPY_FR_ns - and td64_unit != NPY_DATETIMEUNIT.NPY_FR_GENERIC - ): - unitstr = npy_unit_to_abbrev(td64_unit) - - td64_value = get_timedelta64_value(ts) - - mult = precision_from_unit(unitstr)[0] - try: - # NB: cython#1381 this cannot be *= - td64_value = td64_value * mult - except OverflowError as err: - msg = f"{ts} outside allowed range [{TIMEDELTA_MIN_NS}ns, {TIMEDELTA_MAX_NS}ns]" - raise OutOfBoundsTimedelta(msg) from err + if td64_unit in (NPY_DATETIMEUNIT.NPY_FR_ns, NPY_DATETIMEUNIT.NPY_FR_GENERIC): + return ts - return np.timedelta64(td64_value, "ns") + unitstr = npy_unit_to_abbrev(td64_unit) + mult = precision_from_unit(unitstr)[0] + td64_value = calc_int_int(operator.mul, get_timedelta64_value(ts), mult) - return ts + return np.timedelta64(td64_value, "ns") cdef convert_to_timedelta64(object ts, str unit): @@ -686,13 +672,27 @@ def _op_unary_method(func, name): return f -cpdef int64_t calculate(object op, object a, object b) except? -1: +cpdef int64_t calc_int_int(object op, object a, object b) except? -1: """ - Calculate op(a, b), raising if either operand or the resulting value cannot be - safely cast to an int64_t. + Calculate op(a, b), raising if either operand or the result cannot be safely cast + to an int64_t. """ try: - return ops.calculate(op, a, b) + return ops.calc_int_int(op, a, b) + except OverflowError as ex: + msg = f"outside allowed range [{TIMEDELTA_MIN_NS}ns, {TIMEDELTA_MAX_NS}ns]" + raise OutOfBoundsTimedelta(msg) from ex + + +cpdef int64_t calc_int_float(object op, object a, object b) except? -1: + """ + Calculate op(int, double), raising if any of the following aren't safe conversions: + - a to int64_t + - b to double + - result to int64_t + """ + try: + return ops.calc_int_float(op, a, b) except OverflowError as ex: msg = f"outside allowed range [{TIMEDELTA_MIN_NS}ns, {TIMEDELTA_MAX_NS}ns]" raise OutOfBoundsTimedelta(msg) from ex @@ -742,7 +742,7 @@ def _binary_op_method_timedeltalike(op, name): if self._reso != other._reso: raise NotImplementedError - result = calculate(op, self.value, other.value) + result = calc_int_int(op, self.value, other.value) if result == NPY_NAT: return NaT return _timedelta_from_value_and_reso(result, self._reso) @@ -1601,19 +1601,18 @@ class Timedelta(_Timedelta): __rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, '__rsub__') def __mul__(self, other): - if is_integer_object(other) or is_float_object(other): - if util.is_nan(other): - # np.nan * timedelta -> np.timedelta64("NaT"), in this case NaT - return NaT - - return _timedelta_from_value_and_reso( - (other * self.value), - reso=self._reso, - ) - - elif is_array(other): + if util.is_nan(other): + # np.nan * timedelta -> np.timedelta64("NaT"), in this case NaT + return NaT + if is_array(other): # ndarray-like return other * self.to_timedelta64() + if is_integer_object(other): + value = calc_int_int(operator.mul, self.value, other) + return _timedelta_from_value_and_reso(value, self._reso) + if is_float_object(other): + value = calc_int_float(operator.mul, self.value, other) + return _timedelta_from_value_and_reso(value, self._reso) return NotImplemented diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 77dbc133deb3b..590335f8e68db 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -1,6 +1,9 @@ """ Tests for scalar Timedelta arithmetic ops """ + +from __future__ import annotations + from datetime import ( datetime, timedelta, @@ -15,6 +18,7 @@ import pandas as pd from pandas import ( + NA, NaT, Timedelta, Timestamp, @@ -24,12 +28,128 @@ from pandas.core import ops -class TestTimedeltaAdditionSubtraction: - """ - Tests for Timedelta methods: +@pytest.fixture(name="tdlike_cls", params=(Timedelta, timedelta, np.timedelta64)) +def fixture_tdlike_cls(request) -> type: + return request.param + + +@pytest.fixture( + name="tdlike_or_offset_cls", + params=(Timedelta, timedelta, np.timedelta64, offsets.Nano), +) +def fixture_tdlike_or_offset_cls(request) -> type: + return request.param + + +@pytest.fixture(name="ten_days") +def fixture_ten_days() -> Timedelta: + return Timedelta(days=10) + + +@pytest.fixture(name="y2k", params=(Timestamp, np.datetime64, datetime.fromisoformat)) +def fixture_y2k(request): + return request.param("2000-01-01") + + +@pytest.fixture(name="one_day") +def fixture_one_day(tdlike_cls: type): + if tdlike_cls is np.timedelta64: + return np.timedelta64(1, "D") + return tdlike_cls(days=1) + + +@pytest.fixture( + name="na_value", + params=(None, np.nan, np.float64("NaN"), NaT, NA), + ids=("None", "np.nan", "np.float64('NaN')", "NaT", "NA"), +) +def fixture_na_value(request): + return request.param + + +@pytest.fixture(name="add_op", params=(operator.add, ops.radd)) +def fixture_add_op(request): + return request.param + + +@pytest.fixture(name="sub_op", params=(operator.sub, ops.rsub)) +def fixture_sub_op(request): + return request.param + + +@pytest.fixture( + name="add_or_sub", + params=(operator.add, ops.radd, operator.sub, ops.rsub), +) +def fixture_add_or_sub(request): + return request.param + + +@pytest.fixture(name="mul_op", params=(operator.mul, ops.rmul)) +def fixture_mul_op(request): + return request.param + + +@pytest.fixture(name="truediv_op", params=(operator.truediv, ops.rtruediv)) +def fixture_truediv_op(request): + return request.param + + +@pytest.fixture( + name="floor_mod_divmod_op", + params=( + operator.floordiv, + ops.rfloordiv, + operator.mod, + ops.rmod, + divmod, + ops.rdivmod, + ), +) +def fixture_floor_mod_divmod_op(request): + return request.param + + +@pytest.fixture(name="td_overflow_msg") +def fixture_td_overflow_msg() -> str: + return re.escape( + "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" + ) + + +@pytest.fixture(name="invalid_op_msg") +def fixture_invalid_op_msg() -> str: + messages = ( + "cannot use operands with types", + "Concatenation operation is not implemented for NumPy arrays", + "cannot perform", + "not supported between instances of 'Timedelta' and ", + re.escape("unsupported operand type(s)"), + ) + return "|".join(messages) + + +xfail_type_error = pytest.mark.xfail( + reason="unsupported", + raises=TypeError, + strict=True, +) + - __add__, __radd__, - __sub__, __rsub__ +def test_binary_ops_not_implemented_for_arbitrary_types( + ten_days: Timedelta, + invalid_op_msg: str, + all_binary_operators, +): + if all_binary_operators not in (operator.eq, operator.ne): + with pytest.raises(TypeError, match=invalid_op_msg): + all_binary_operators(ten_days, object()) + + +class TestAdditionSubtractionScalar: + """ + Tests for Timedelta.{__add__,__radd__,__sub__,__rsub__} where second operand is a + scalar. """ @pytest.mark.parametrize( @@ -42,7 +162,7 @@ class TestTimedeltaAdditionSubtraction: offsets.Second(10), ], ) - def test_td_add_sub_ten_seconds(self, ten_seconds): + def test_add_sub_ten_seconds(self, ten_seconds): # GH#6808 base = Timestamp("20130101 09:01:12.123456") expected_add = Timestamp("20130101 09:01:22.123456") @@ -64,7 +184,7 @@ def test_td_add_sub_ten_seconds(self, ten_seconds): offsets.Day() + offsets.Second(10), ], ) - def test_td_add_sub_one_day_ten_seconds(self, one_day_ten_secs): + def test_add_sub_one_day_ten_seconds(self, one_day_ten_secs): # GH#6808 base = Timestamp("20130102 09:01:12.123456") expected_add = Timestamp("20130103 09:01:22.123456") @@ -76,184 +196,126 @@ def test_td_add_sub_one_day_ten_seconds(self, one_day_ten_secs): result = base - one_day_ten_secs assert result == expected_sub - @pytest.mark.parametrize("op", [operator.add, ops.radd]) - def test_td_add_datetimelike_scalar(self, op): + @pytest.mark.parametrize("value", (2, 2.0), ids=("int", "float")) + def test_add_or_sub_numeric_raises( + self, + ten_days: Timedelta, + add_or_sub, + value, + invalid_op_msg: str, + ): + with pytest.raises(TypeError, match=invalid_op_msg): + add_or_sub(ten_days, value) + + def test_add_datetimelike(self, ten_days: Timedelta, add_op, y2k): # GH#19738 - td = Timedelta(10, unit="d") + result = add_op(y2k, ten_days) + expected = Timestamp("2000-01-11") - result = op(td, datetime(2016, 1, 1)) - if op is operator.add: + if type(y2k) != datetime and add_op != ops.radd: # datetime + Timedelta does _not_ call Timedelta.__radd__, # so we get a datetime back instead of a Timestamp assert isinstance(result, Timestamp) - assert result == Timestamp(2016, 1, 11) - - result = op(td, Timestamp("2018-01-12 18:09")) - assert isinstance(result, Timestamp) - assert result == Timestamp("2018-01-22 18:09") - - result = op(td, np.datetime64("2018-01-12")) - assert isinstance(result, Timestamp) - assert result == Timestamp("2018-01-22") - - result = op(td, NaT) - assert result is NaT - - def test_td_add_timestamp_overflow(self): - msg = msg = re.escape( - "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" - ) - with pytest.raises(OutOfBoundsTimedelta, match=msg): - Timestamp("1700-01-01") + Timedelta(13 * 19999, unit="D") - - with pytest.raises(OutOfBoundsTimedelta, match=msg): - Timestamp("1700-01-01") + timedelta(days=13 * 19999) - - @pytest.mark.parametrize("op", [operator.add, ops.radd]) - def test_td_add_td(self, op): - td = Timedelta(10, unit="d") - - result = op(td, Timedelta(days=10)) - assert isinstance(result, Timedelta) - assert result == Timedelta(days=20) - - @pytest.mark.parametrize("op", [operator.add, ops.radd]) - def test_td_add_pytimedelta(self, op): - td = Timedelta(10, unit="d") - result = op(td, timedelta(days=9)) - assert isinstance(result, Timedelta) - assert result == Timedelta(days=19) - - @pytest.mark.parametrize("op", [operator.add, ops.radd]) - def test_td_add_timedelta64(self, op): - td = Timedelta(10, unit="d") - result = op(td, np.timedelta64(-4, "D")) - assert isinstance(result, Timedelta) - assert result == Timedelta(days=6) + assert result == expected - @pytest.mark.parametrize("op", [operator.add, ops.radd]) - def test_td_add_offset(self, op): - td = Timedelta(10, unit="d") + def test_sub_datetimelike(self, ten_days: Timedelta, y2k, invalid_op_msg: str): + assert y2k - ten_days == Timestamp("1999-12-22") - result = op(td, offsets.Hour(6)) - assert isinstance(result, Timedelta) - assert result == Timedelta(days=10, hours=6) + with pytest.raises(TypeError, match=invalid_op_msg): + ten_days - y2k - def test_td_sub_td(self): - td = Timedelta(10, unit="d") - expected = Timedelta(0, unit="ns") - result = td - td + def test_add_timedeltalike(self, ten_days: Timedelta, add_op, one_day): + result = add_op(ten_days, one_day) + expected = Timedelta(days=11) assert isinstance(result, Timedelta) assert result == expected - def test_td_sub_pytimedelta(self): - td = Timedelta(10, unit="d") - expected = Timedelta(0, unit="ns") - - result = td - td.to_pytimedelta() + def test_sub_timedeltalike(self, ten_days: Timedelta, one_day, sub_op): + result = sub_op(ten_days, one_day) + expected = Timedelta(days=9) if sub_op is operator.sub else Timedelta(days=-9) assert isinstance(result, Timedelta) assert result == expected - result = td.to_pytimedelta() - td + def test_add_offset(self, ten_days: Timedelta, add_op): + result = add_op(ten_days, offsets.Hour(6)) + expected = Timedelta(days=10, hours=6) assert isinstance(result, Timedelta) assert result == expected - def test_td_sub_timedelta64(self): - td = Timedelta(10, unit="d") - expected = Timedelta(0, unit="ns") - - result = td - td.to_timedelta64() - assert isinstance(result, Timedelta) - assert result == expected + def test_sub_offset(self, ten_days: Timedelta, sub_op): + result = sub_op(ten_days, offsets.Hour(1)) + if sub_op is operator.sub: + expected = Timedelta(hours=239) + else: + expected = Timedelta(hours=-239) - result = td.to_timedelta64() - td assert isinstance(result, Timedelta) assert result == expected - def test_td_sub_nat(self): - # In this context pd.NaT is treated as timedelta-like - td = Timedelta(10, unit="d") - result = td - NaT + def test_with_timedeltadlike_raises_for_any_result_above_td_max( + self, + tdlike_or_offset_cls, + td_overflow_msg: str, + ): + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + Timedelta.max + tdlike_or_offset_cls(1) + + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + Timedelta.max - (tdlike_or_offset_cls(-1)) + + def test_no_error_for_result_1ns_below_td_min(self): + assert Timedelta.min + Timedelta(-1, "ns") is NaT + assert offsets.Nano(-1) + Timedelta.min is NaT + assert Timedelta.min - np.timedelta64(1, "ns") is NaT + + def test_raises_for_any_result_2ns_below_td_min( + self, + tdlike_or_offset_cls: type, + td_overflow_msg: str, + ): + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + Timedelta.min + tdlike_or_offset_cls(-2) + + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + Timedelta.min - tdlike_or_offset_cls(2) + + def test_add_or_sub_na(self, request, ten_days: Timedelta, add_or_sub, na_value): + if na_value is NA: + request.applymarker(xfail_type_error) + result = add_or_sub(ten_days, na_value) assert result is NaT - def test_td_sub_td64_nat(self): - td = Timedelta(10, unit="d") - td_nat = np.timedelta64("NaT") - result = td - td_nat - assert result is NaT - - result = td_nat - td - assert result is NaT - - def test_td_sub_offset(self): - td = Timedelta(10, unit="d") - result = td - offsets.Hour(1) - assert isinstance(result, Timedelta) - assert result == Timedelta(239, unit="h") - - def test_td_add_sub_numeric_raises(self): - td = Timedelta(10, unit="d") - msg = "unsupported operand type" - for other in [2, 2.0, np.int64(2), np.float64(2)]: - with pytest.raises(TypeError, match=msg): - td + other - with pytest.raises(TypeError, match=msg): - other + td - with pytest.raises(TypeError, match=msg): - td - other - with pytest.raises(TypeError, match=msg): - other - td - - def test_td_add_sub_int_ndarray(self): - td = Timedelta("1 day") - other = np.array([1]) - - msg = r"unsupported operand type\(s\) for \+: 'Timedelta' and 'int'" - with pytest.raises(TypeError, match=msg): - td + np.array([1]) - - msg = "|".join( - [ - ( - r"unsupported operand type\(s\) for \+: 'numpy.ndarray' " - "and 'Timedelta'" - ), - # This message goes on to say "Please do not rely on this error; - # it may not be given on all Python implementations" - "Concatenation operation is not implemented for NumPy arrays", - ] - ) - with pytest.raises(TypeError, match=msg): - other + td - msg = r"unsupported operand type\(s\) for -: 'Timedelta' and 'int'" - with pytest.raises(TypeError, match=msg): - td - other - msg = r"unsupported operand type\(s\) for -: 'numpy.ndarray' and 'Timedelta'" - with pytest.raises(TypeError, match=msg): - other - td - - def test_td_rsub_nat(self): - td = Timedelta(10, unit="d") - result = NaT - td - assert result is NaT - - result = np.datetime64("NaT") - td - assert result is NaT +class TestAdditionSubtractionBox: + """ + Tests for Timedelta.{__add__,__radd__,__sub__,__rsub__} where second operand is a + Array/Index/Series/DataFrame. + """ - def test_td_rsub_offset(self): - result = offsets.Hour(1) - Timedelta(10, unit="d") - assert isinstance(result, Timedelta) - assert result == Timedelta(-239, unit="h") + @pytest.mark.parametrize("value", (2, 2.0), ids=("int", "float")) + def test_add_or_sub_numeric_raises( + self, + ten_days: Timedelta, + add_or_sub, + box_with_array, + value, + invalid_op_msg: str, + ): + other = tm.box_expected([value], box_with_array) + with pytest.raises(TypeError, match=invalid_op_msg): + add_or_sub(ten_days, other) + + def test_add_datetimelike(self): + pass - def test_td_sub_timedeltalike_object_dtype_array(self): + def test_sub_from_datetimelike(self, ten_days: Timedelta, box_with_array): # GH#21980 - arr = np.array([Timestamp("20130101 9:01"), Timestamp("20121230 9:02")]) - exp = np.array([Timestamp("20121231 9:01"), Timestamp("20121229 9:02")]) - res = arr - Timedelta("1D") - tm.assert_numpy_array_equal(res, exp) + other = tm.box_expected([np.datetime64("2000-01-11")], box_with_array) + expected = tm.box_expected([np.datetime64("2000-01-01")], box_with_array) + result = other - ten_days + tm.assert_equal(result, expected) - def test_td_sub_mixed_most_timedeltalike_object_dtype_array(self): + def test_sub_mixed_most_timedeltalike_object_dtype_array(self): # GH#21980 now = Timestamp("2021-11-09 09:54:00") arr = np.array([now, Timedelta("1D"), np.timedelta64(2, "h")]) @@ -267,48 +329,39 @@ def test_td_sub_mixed_most_timedeltalike_object_dtype_array(self): res = arr - Timedelta("1D") tm.assert_numpy_array_equal(res, exp) - def test_td_rsub_mixed_most_timedeltalike_object_dtype_array(self): + def test_rsub_mixed_most_timedeltalike_object_dtype_array(self, invalid_op_msg): # GH#21980 now = Timestamp("2021-11-09 09:54:00") arr = np.array([now, Timedelta("1D"), np.timedelta64(2, "h")]) - msg = r"unsupported operand type\(s\) for \-: 'Timedelta' and 'Timestamp'" - with pytest.raises(TypeError, match=msg): + with pytest.raises(TypeError, match=invalid_op_msg): Timedelta("1D") - arr - @pytest.mark.parametrize("op", [operator.add, ops.radd]) - def test_td_add_timedeltalike_object_dtype_array(self, op): + def test_add_timedeltalike_object_dtype_array(self, add_op): # GH#21980 arr = np.array([Timestamp("20130101 9:01"), Timestamp("20121230 9:02")]) exp = np.array([Timestamp("20130102 9:01"), Timestamp("20121231 9:02")]) - res = op(arr, Timedelta("1D")) + res = add_op(arr, Timedelta("1D")) tm.assert_numpy_array_equal(res, exp) - @pytest.mark.parametrize("op", [operator.add, ops.radd]) - def test_td_add_mixed_timedeltalike_object_dtype_array(self, op): + def test_add_mixed_timedeltalike_object_dtype_array(self, add_op): # GH#21980 now = Timestamp("2021-11-09 09:54:00") arr = np.array([now, Timedelta("1D")]) exp = np.array([now + Timedelta("1D"), Timedelta("2D")]) - res = op(arr, Timedelta("1D")) + res = add_op(arr, Timedelta("1D")) tm.assert_numpy_array_equal(res, exp) - def test_td_add_sub_td64_ndarray(self): - td = Timedelta("1 day") - - other = np.array([td.to_timedelta64()]) - expected = np.array([Timedelta("2 Days").to_timedelta64()]) - - result = td + other - tm.assert_numpy_array_equal(result, expected) - result = other + td + def test_add_td64_ndarray(self, ten_days: Timedelta, add_op): + result = add_op(ten_days, np.array([np.timedelta64(1, "D")])) + expected = np.array([Timedelta(days=11).to_timedelta64()]) tm.assert_numpy_array_equal(result, expected) - result = td - other - tm.assert_numpy_array_equal(result, expected * 0) - result = other - td - tm.assert_numpy_array_equal(result, expected * 0) + def test_sub_td64_ndarray(self, ten_days: Timedelta, sub_op): + result = sub_op(ten_days, np.array([np.timedelta64(10, "D")])) + expected = np.array([0], dtype="timedelta64[ns]") + tm.assert_numpy_array_equal(result, expected) - def test_td_add_sub_dt64_ndarray(self): + def test_add_sub_dt64_ndarray(self): td = Timedelta("1 day") other = pd.to_datetime(["2000-01-01"]).values @@ -320,305 +373,296 @@ def test_td_add_sub_dt64_ndarray(self): tm.assert_numpy_array_equal(-td + other, expected) tm.assert_numpy_array_equal(other - td, expected) + def test_na(self): + pass + -class TestTimedeltaMultiplicationDivision: +class TestMultiplicationScalar: """ - Tests for Timedelta methods: - - __mul__, __rmul__, - __div__, __rdiv__, - __truediv__, __rtruediv__, - __floordiv__, __rfloordiv__, - __mod__, __rmod__, - __divmod__, __rdivmod__ + Tests for Timedelta.{__mul__,__rmul__} where second operand is a scalar. """ - # --------------------------------------------------------------- - # Timedelta.__mul__, __rmul__ - @pytest.mark.parametrize( - "td_nat", [NaT, np.timedelta64("NaT", "ns"), np.timedelta64("NaT")] + "factor,expected", + ((2, 20), (1.5, 15), (-1, -10), (-1, -10)), ) - @pytest.mark.parametrize("op", [operator.mul, ops.rmul]) - def test_td_mul_nat(self, op, td_nat): - # GH#19819 - td = Timedelta(10, unit="d") - typs = "|".join(["numpy.timedelta64", "NaTType", "Timedelta"]) - msg = "|".join( - [ - rf"unsupported operand type\(s\) for \*: '{typs}' and '{typs}'", - r"ufunc '?multiply'? cannot use operands with types", - ] - ) - with pytest.raises(TypeError, match=msg): - op(td, td_nat) - - @pytest.mark.parametrize("nan", [np.nan, np.float64("NaN"), float("nan")]) - @pytest.mark.parametrize("op", [operator.mul, ops.rmul]) - def test_td_mul_nan(self, op, nan): - # np.float64('NaN') has a 'dtype' attr, avoid treating as array - td = Timedelta(10, unit="d") - result = op(td, nan) - assert result is NaT - - @pytest.mark.parametrize("op", [operator.mul, ops.rmul]) - def test_td_mul_scalar(self, op): + def test_numeric(self, ten_days: Timedelta, mul_op, factor, expected): # GH#19738 - td = Timedelta(minutes=3) - - result = op(td, 2) - assert result == Timedelta(minutes=6) - - result = op(td, 1.5) - assert result == Timedelta(minutes=4, seconds=30) - - assert op(td, np.nan) is NaT - - assert op(-1, td).value == -1 * td.value - assert op(-1.0, td).value == -1.0 * td.value + result = mul_op(ten_days, factor) + assert result == Timedelta(expected, "D") + assert isinstance(result, Timedelta) - msg = "unsupported operand type" - with pytest.raises(TypeError, match=msg): - # timedelta * datetime is gibberish - op(td, Timestamp(2016, 1, 2)) + @pytest.mark.parametrize("value", (Timestamp("2020-01-02"), Timedelta(1))) + def test_datetimelike_or_timedeltalike_raises( + self, + ten_days: Timedelta, + mul_op, + value, + invalid_op_msg: str, + ): + # timedelta * datetime is gibberish, as is multiplying by another timedelta + with pytest.raises(TypeError, match=invalid_op_msg): + mul_op(ten_days, value) + + def test_offset_raises(self): + pass - with pytest.raises(TypeError, match=msg): - # invalid multiply with another timedelta - op(td, td) + @pytest.mark.parametrize("value", (Timedelta.min, Timedelta.max)) + def test_raises_for_overflow(self, mul_op, td_overflow_msg: str, value: Timedelta): + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + mul_op(value, 2) - def test_td_mul_numeric_ndarray(self): - td = Timedelta("1 day") - other = np.array([2]) - expected = np.array([Timedelta("2 Days").to_timedelta64()]) + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + mul_op(value, 1.1) - result = td * other - tm.assert_numpy_array_equal(result, expected) + def test_na(self, request, ten_days: Timedelta, mul_op, na_value): + if na_value is None or na_value is NaT or na_value is NA: + request.applymarker(xfail_type_error) + result = mul_op(ten_days, na_value) + assert result is NaT - result = other * td - tm.assert_numpy_array_equal(result, expected) - def test_td_mul_td64_ndarray_invalid(self): - td = Timedelta("1 day") - other = np.array([Timedelta("2 Days").to_timedelta64()]) +class TestMultiplicationBox: + """ + Tests for Timedelta.{__mul__,__rmul__} where second operand is a + Array/Index/Series/DataFrame. + """ - msg = ( - "ufunc '?multiply'? cannot use operands with types " - r"dtype\(' right - - assert not left == right - assert left != right diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py index be2d7a024154a..e4aa709d054b4 100644 --- a/pandas/tests/scalar/timestamp/test_arithmetic.py +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -3,6 +3,7 @@ timedelta, timezone, ) +import re import numpy as np import pytest @@ -39,10 +40,8 @@ def test_overflow_offset_raises(self): stamp = Timestamp("2017-01-13 00:00:00") offset_overflow = 20169940 * offsets.Day(1) - msg = ( - "the add operation between " - r"\<-?\d+ \* Days\> and \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} " - "will overflow" + msg = re.escape( + "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" ) lmsg = "|".join( ["Python int too large to convert to C long", "int too big to convert"] @@ -51,7 +50,7 @@ def test_overflow_offset_raises(self): with pytest.raises(OverflowError, match=lmsg): stamp + offset_overflow - with pytest.raises(OverflowError, match=msg): + with pytest.raises(OutOfBoundsTimedelta, match=msg): offset_overflow + stamp with pytest.raises(OverflowError, match=lmsg): @@ -66,7 +65,7 @@ def test_overflow_offset_raises(self): with pytest.raises(OverflowError, match=lmsg): stamp + offset_overflow - with pytest.raises(OverflowError, match=msg): + with pytest.raises(OutOfBoundsTimedelta, match=msg): offset_overflow + stamp with pytest.raises(OverflowError, match=lmsg): From a72d4e07eeefd874759e3441a07d8734beb21ad6 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Fri, 6 May 2022 18:12:41 -0700 Subject: [PATCH 10/18] more test cleanup --- .../tests/scalar/timedelta/test_arithmetic.py | 493 +++++++----------- 1 file changed, 184 insertions(+), 309 deletions(-) diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 590335f8e68db..1632cd78dc4f2 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -392,8 +392,8 @@ def test_numeric(self, ten_days: Timedelta, mul_op, factor, expected): assert result == Timedelta(expected, "D") assert isinstance(result, Timedelta) - @pytest.mark.parametrize("value", (Timestamp("2020-01-02"), Timedelta(1))) - def test_datetimelike_or_timedeltalike_raises( + @pytest.mark.parametrize("value", (Timedelta.min, Timedelta.max, offsets.Day(1))) + def test_raises_for_datetimelike_timedeltalike_offset( self, ten_days: Timedelta, mul_op, @@ -404,16 +404,11 @@ def test_datetimelike_or_timedeltalike_raises( with pytest.raises(TypeError, match=invalid_op_msg): mul_op(ten_days, value) - def test_offset_raises(self): - pass - @pytest.mark.parametrize("value", (Timedelta.min, Timedelta.max)) - def test_raises_for_overflow(self, mul_op, td_overflow_msg: str, value: Timedelta): - with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): - mul_op(value, 2) - + @pytest.mark.parametrize("factor", (1.01, 2), ids=("int", "float")) + def test_raises_for_overflow(self, value, mul_op, factor, td_overflow_msg: str): with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): - mul_op(value, 1.1) + mul_op(value, factor) def test_na(self, request, ten_days: Timedelta, mul_op, na_value): if na_value is None or na_value is NaT or na_value is NA: @@ -431,15 +426,27 @@ class TestMultiplicationBox: @pytest.mark.parametrize("factor,expected", ((2, 20), (1.5, 15))) def test_numeric(self, ten_days, mul_op, factor, expected, box_with_array): other = tm.box_expected([factor], box_with_array) + result = mul_op(ten_days, other) expected = tm.box_expected( [Timedelta(expected, "D").to_timedelta64()], box_with_array, ) - result = mul_op(ten_days, other) tm.assert_equal(result, expected) - @pytest.mark.parametrize("value", (Timestamp.min, Timedelta.max)) - def test_datetimelike_or_timedeltalike_raises( + @pytest.mark.xfail(reason="no overflow check", raises=AssertionError) + @pytest.mark.parametrize("factor", (1.01, 2), ids=("int", "float")) + def test_returns_nat_if_result_overflows(self, mul_op, factor, box_with_array): + numeric_box = tm.box_expected((1, factor), box_with_array, transpose=False) + result = mul_op(pd.Timedelta.max, numeric_box) + expected = tm.box_expected( + (pd.Timedelta.max, NaT), + box_with_array, + transpose=False, + ) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("value", (Timedelta.min, Timedelta.max, offsets.Day(1))) + def test_raises_for_datetimelike_timedeltalike_offset( self, ten_days: Timedelta, mul_op, @@ -447,60 +454,124 @@ def test_datetimelike_or_timedeltalike_raises( box_with_array, invalid_op_msg: str, ): - box = tm.box_expected([value], box_with_array) + other = tm.box_expected([value], box_with_array) with pytest.raises(TypeError, match=invalid_op_msg): - mul_op(ten_days, box) - - def test_offset_raises(self): - pass - - def test_raises_for_overflow(self): - pass + mul_op(ten_days, other) def test_na(self): - pass + ... -class TestTrueDivisionScalar: +class TestDivisionScalar: """ - Tests for Timedelta.{__truediv__,__rtruediv__} where second operand is a scalar. + Tests against the following Timedelta methods, where second operand is a scalar: + + __truediv__,__rtrueidv + __floordiv__,__rfloordiv__ + __mod__,__rmod__ + __divmod__,__rdivmod__ """ - def test_truediv_numeric(self, ten_days: Timedelta, any_real_numpy_dtype): + @pytest.mark.parametrize( + ("div_op", "divisor", "expected"), + ( + (operator.truediv, 2.0, Timedelta(days=5)), + (operator.floordiv, 3, Timedelta(days=3, hours=8)), + (operator.mod, 11, Timedelta(nanoseconds=6)), + (divmod, 3, (Timedelta(days=3, hours=8), Timedelta(0))), + ), + ids=("truediv", "floordiv", "mod", "divmod"), + ) + def test_div_numeric( + self, + ten_days: Timedelta, + div_op, + divisor, + any_real_numpy_dtype, + expected, + ): # GH#19738 - scalar = np.dtype(any_real_numpy_dtype).type(2.0) - result = ten_days / scalar - assert isinstance(result, Timedelta) - assert result == Timedelta(days=5) + scalar = np.dtype(any_real_numpy_dtype).type(divisor) + result = div_op(ten_days, scalar) - def test_rtruediv_numeric_raises( + assert result == expected + if div_op is divmod: + assert all(isinstance(r, Timedelta) for r in result) + else: + assert isinstance(result, Timedelta) + + @pytest.mark.parametrize( + "rdiv_op", + (ops.rtruediv, ops.rfloordiv, ops.rmod, ops.rdivmod), + ) + def test_rdiv_numeric_raises( self, ten_days: Timedelta, - invalid_op_msg: str, + rdiv_op, any_real_numpy_dtype, + invalid_op_msg: str, ): - scalar = np.dtype(any_real_numpy_dtype).type(2.0) + scalar = np.dtype(any_real_numpy_dtype).type(1) with pytest.raises(TypeError, match=invalid_op_msg): - scalar / ten_days + rdiv_op(ten_days, scalar) + @pytest.mark.parametrize( + "any_div_op", + ( + operator.truediv, + ops.rtruediv, + operator.floordiv, + ops.rfloordiv, + operator.mod, + ops.rmod, + divmod, + ops.rdivmod, + ), + ) def test_datetimelike_raises( self, ten_days: Timedelta, - truediv_op, + any_div_op, y2k, invalid_op_msg: str, ): with pytest.raises(TypeError, match=invalid_op_msg): - truediv_op(ten_days, y2k) + any_div_op(ten_days, y2k) - def test_timedeltalike(self, ten_days: Timedelta, one_day): + @pytest.mark.parametrize( + ("div_op", "expected"), + ( + (operator.truediv, 10), + (operator.floordiv, 10), + (operator.mod, Timedelta(0)), + (divmod, (10, Timedelta(0))), + (ops.rtruediv, 0.1), + (ops.rfloordiv, 0), + (ops.rmod, Timedelta(days=1)), + (ops.rdivmod, (0, Timedelta(days=1))), + ), + ) + def test_timedeltalike(self, ten_days: Timedelta, div_op, one_day, expected): # GH#19738 - assert ten_days / one_day == 10 - assert one_day / ten_days == 0.1 + result = div_op(ten_days, one_day) + assert result == expected - def test_offset(self, ten_days: Timedelta): - assert ten_days / offsets.Hour(12) == 20 - assert offsets.Hour(12) / ten_days == 0.05 + @pytest.mark.parametrize( + ("div_op", "expected"), + ( + (operator.truediv, 10), + (operator.floordiv, 10), + (operator.mod, Timedelta(0)), + (divmod, (10, Timedelta(0))), + (ops.rtruediv, 0.1), + (ops.rfloordiv, 0), + (ops.rmod, Timedelta(days=1)), + (ops.rdivmod, (0, Timedelta(days=1))), + ), + ) + def test_offset(self, ten_days: Timedelta, div_op, expected): + result = div_op(ten_days, offsets.Day(1)) + assert result == expected def test_na(self, request, ten_days: Timedelta, truediv_op, na_value): expected = NaT @@ -513,271 +584,6 @@ def test_na(self, request, ten_days: Timedelta, truediv_op, na_value): result = truediv_op(ten_days, na_value) assert result is expected - -class TestTrueDivisionBox: - """ - Tests for Timedelta.{__floordiv__,__rfloordiv__,__truediv__,__rtruediv__} where - second operand is a Array/Index/Series/DataFrame. - """ - - def test_truediv_numeric(self, ten_days: Timedelta, any_real_numpy_dtype): - # GH#19738 - scalar = np.dtype(any_real_numpy_dtype).type(2.0) - result = ten_days / scalar - assert isinstance(result, Timedelta) - assert result == Timedelta(days=5) - - def test_rtruediv_numeric_raises( - self, - ten_days: Timedelta, - invalid_op_msg: str, - any_real_numpy_dtype, - ): - scalar = np.dtype(any_real_numpy_dtype).type(2.0) - with pytest.raises(TypeError, match=invalid_op_msg): - scalar / ten_days - - def test_datetimelike_raises( - self, - ten_days: Timedelta, - truediv_op, - y2k, - box_with_array, - invalid_op_msg: str, - ): - other = tm.box_expected((y2k,), box_with_array) - with pytest.raises(TypeError, match=invalid_op_msg): - truediv_op(ten_days, other) - - def test_timedeltalike( - self, - ten_days: Timedelta, - truediv_op, - tdlike_cls, - box_with_array, - ): - # TODO: - elem = tdlike_cls(days=10) if tdlike_cls is timedelta else tdlike_cls(10, "D") - other = tm.box_expected((elem,), box_with_array) - - if box_with_array is pd.array: - expected = np.array((1.0,)) - else: - expected = tm.box_expected((1.0,), box_with_array) - - result = truediv_op(ten_days, other) - tm.assert_equal(result, expected) - - def test_offset(self, ten_days: Timedelta): - ... - - def test_na(self, request, ten_days: Timedelta, truediv_op, na_value): - ... - - -class TestFloorModuloDivisionScalar: - """ - Timedelta.{__floordiv__,__rfloordiv__,__mod__,__rmod__,__divmod__,__rdivmod__} tests - where second operand is a scalar. - """ - - def test_floordiv_numeric(self): - pass - - def test_rfloordiv_numeric( - self, - ten_days: Timedelta, - any_real_numpy_dtype, - invalid_op_msg: str, - ): - # int32 deprecated GH#19761, enforced GH#29797 - scalar = np.dtype(any_real_numpy_dtype).type(1.0) - assert ten_days.__rfloordiv__(scalar) is NotImplemented - with pytest.raises(TypeError, match=invalid_op_msg): - scalar // ten_days - - def test_mod_numeric(self): - # GH#19365 - td = Timedelta(hours=37) - - # Numeric Others - result = td % 2 - assert isinstance(result, Timedelta) - assert result == Timedelta(0) - - result = td % 1e12 - assert isinstance(result, Timedelta) - assert result == Timedelta(minutes=3, seconds=20) - - result = td % int(1e12) - assert isinstance(result, Timedelta) - assert result == Timedelta(minutes=3, seconds=20) - - def test_rmod_numeric(self): - # GH#19365 - td = Timedelta(minutes=3) - - msg = "unsupported operand" - with pytest.raises(TypeError, match=msg): - Timestamp("2018-01-22") % td - - with pytest.raises(TypeError, match=msg): - 15 % td - - with pytest.raises(TypeError, match=msg): - 16.0 % td - - msg = "Invalid dtype int" - with pytest.raises(TypeError, match=msg): - np.array([22, 24]) % td - - def test_divmod_numeric(self): - # GH#19365 - td = Timedelta(days=2, hours=6) - - result = divmod(td, 53 * 3600 * 1e9) - assert result[0] == Timedelta(1, unit="ns") - assert isinstance(result[1], Timedelta) - assert result[1] == Timedelta(hours=1) - - assert result - result = divmod(td, np.nan) - assert result[0] is NaT - assert result[1] is NaT - - def test_rdivmod_numeric(self): - pass - - def test_datetimelike_raises( - self, - ten_days: Timedelta, - floor_mod_divmod_op, - y2k, - invalid_op_msg: str, - ): - # GH#18846 - with pytest.raises(TypeError, match=invalid_op_msg): - floor_mod_divmod_op(ten_days, y2k) - - def test_floordiv_timedeltalike(self): - pass - - def test_rfloordiv_timedeltalike(self): - # GH#18846 - td = Timedelta(hours=3, minutes=3) - scalar = Timedelta(hours=3, minutes=4) - - # scalar others - # x // Timedelta is defined only for timedelta-like x. int-like, - # float-like, and date-like, in particular, should all either - # a) raise TypeError directly or - # b) return NotImplemented, following which the reversed - # operation will raise TypeError. - assert td.__rfloordiv__(scalar) == 1 - assert (-td).__rfloordiv__(scalar.to_pytimedelta()) == -2 - assert (2 * td).__rfloordiv__(scalar.to_timedelta64()) == 0 - - def test_mod_timedeltalike(self): - # GH#19365 - td = Timedelta(hours=37) - - # Timedelta-like others - result = td % Timedelta(hours=6) - assert isinstance(result, Timedelta) - assert result == Timedelta(hours=1) - - result = td % timedelta(minutes=60) - assert isinstance(result, Timedelta) - assert result == Timedelta(0) - - result = td % NaT - assert result is NaT - - def test_mod_timedelta64(self): - # GH#19365 - td = Timedelta(hours=37) - - result = td % np.timedelta64(2, "h") - assert isinstance(result, Timedelta) - assert result == Timedelta(hours=1) - - def test_rmod_timedeltalike(self): - # GH#19365 - td = Timedelta(minutes=3) - - result = timedelta(minutes=4) % td - assert isinstance(result, Timedelta) - assert result == Timedelta(minutes=1) - - def test_rmod_timedelta64(self): - # GH#19365 - td = Timedelta(minutes=3) - result = np.timedelta64(5, "m") % td - assert isinstance(result, Timedelta) - assert result == Timedelta(minutes=2) - - def test_divmod_timedeltalike(self): - # GH#19365 - td = Timedelta(days=2, hours=6) - - result = divmod(td, timedelta(days=1)) - assert result[0] == 2 - assert isinstance(result[1], Timedelta) - assert result[1] == Timedelta(hours=6) - - result = divmod(td, 54) - assert result[0] == Timedelta(hours=1) - assert isinstance(result[1], Timedelta) - assert result[1] == Timedelta(0) - - result = divmod(td, NaT) - assert np.isnan(result[0]) - assert result[1] is NaT - - def test_rdivmod_pytimedelta(self): - # GH#19365 - result = divmod(timedelta(days=2, hours=6), Timedelta(days=1)) - assert result[0] == 2 - assert isinstance(result[1], Timedelta) - assert result[1] == Timedelta(hours=6) - - def test_floordiv_offsets(self): - # GH#19738 - td = Timedelta(hours=3, minutes=4) - assert td // offsets.Hour(1) == 3 - - assert td // offsets.Minute(2) == 92 - - def test_rfloordiv_offsets(self): - # GH#19738 - assert offsets.Hour(1) // Timedelta(minutes=25) == 2 - - def test_mod_offset(self): - # GH#19365 - td = Timedelta(hours=37) - - result = td % offsets.Hour(5) - assert isinstance(result, Timedelta) - assert result == Timedelta(hours=2) - - def test_rmod_offset(self): - pass - - def test_divmod_offset(self): - # GH#19365 - td = Timedelta(days=2, hours=6) - - result = divmod(td, offsets.Hour(-4)) - assert result[0] == -14 - assert isinstance(result[1], Timedelta) - assert result[1] == Timedelta(hours=-2) - - def test_rdivmod_offset(self): - result = divmod(offsets.Hour(54), Timedelta(hours=-4)) - assert result[0] == -14 - assert isinstance(result[1], Timedelta) - assert result[1] == Timedelta(hours=-2) - def test_floordiv_na(self, request, ten_days: Timedelta, na_value): expected = NaT if na_value is NA: @@ -830,12 +636,81 @@ def test_rdivmod_na(self, request, ten_days: Timedelta, na_value): assert result == expected -class TestFloorModuloDivisionBox: +class TestDivisionBox: """ - Timedelta.{__floordiv__,__rfloordiv__,__mod__,__rmod__,__divmod__, __rdivmod__} - tests where second operand is a Array/Index/Series/DataFrame. + Tests against the following Timedelta methods, where second operand is a + Array/Index/Series/DataFrame: + + __truediv__,__rtrueidv + __floordiv__,__rfloordiv__ + __mod__,__rmod__ + __divmod__,__rdivmod__ """ + def test_truediv_numeric(self, ten_days: Timedelta, any_real_numpy_dtype): + # GH#19738 + scalar = np.dtype(any_real_numpy_dtype).type(2.0) + result = ten_days / scalar + assert isinstance(result, Timedelta) + assert result == Timedelta(days=5) + + def test_rtruediv_numeric_raises( + self, + ten_days: Timedelta, + invalid_op_msg: str, + any_real_numpy_dtype, + ): + scalar = np.dtype(any_real_numpy_dtype).type(2.0) + with pytest.raises(TypeError, match=invalid_op_msg): + scalar / ten_days + + @pytest.mark.parametrize( + "any_div_op", + ( + operator.truediv, + ops.rtruediv, + operator.floordiv, + ops.rfloordiv, + operator.mod, + ops.rmod, + divmod, + ops.rdivmod, + ), + ) + def test_datetimelike_raises( + self, + ten_days: Timedelta, + any_div_op, + y2k, + box_with_array, + invalid_op_msg: str, + ): + other = tm.box_expected((y2k,), box_with_array) + with pytest.raises(TypeError, match=invalid_op_msg): + any_div_op(ten_days, other) + + def test_timedeltalike( + self, + ten_days: Timedelta, + truediv_op, + tdlike_cls, + box_with_array, + ): + # TODO: + elem = tdlike_cls(days=10) if tdlike_cls is timedelta else tdlike_cls(10, "D") + other = tm.box_expected((elem,), box_with_array) + + if box_with_array is pd.array: + expected = np.array((1.0,)) + else: + expected = tm.box_expected((1.0,), box_with_array) + + result = truediv_op(ten_days, other) + tm.assert_equal(result, expected) + + def test_offset(self, ten_days: Timedelta): + ... + def test_floordiv_numeric_series(self): # GH#18846 td = Timedelta(hours=3, minutes=4) From 7a7418ad85487a952cccb286bbc72e5c36ae5808 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Sat, 7 May 2022 20:28:21 -0700 Subject: [PATCH 11/18] handle older and newer cython semantics --- pandas/_libs/tslibs/timestamps.pyx | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index bd6c6baf0496c..555ad4af57bea 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -390,6 +390,9 @@ cdef class _Timestamp(ABCTimestamp): return NotImplemented def __sub__(self, other): + # nb: counterintuitive semantics of __sub__, __rsub__ for cython < 3.x + # github.com/cython/cython/blob/2795a4/docs/src/userguide/special_methods.rst#arithmetic-methods + # GH#28286 if isinstance(self, _Timestamp) and self._reso != NPY_FR_ns: raise NotImplementedError(self._reso) @@ -445,8 +448,6 @@ cdef class _Timestamp(ABCTimestamp): # method and return stdlib timedelta object pass elif is_datetime64_object(self): - # GH#28286 cython semantics for __rsub__, `other` is actually - # the Timestamp # TODO(cython3): remove this, this moved to __rsub__ return type(other)(self) - other @@ -459,7 +460,7 @@ cdef class _Timestamp(ABCTimestamp): if PyDateTime_Check(other): try: return type(self)(other) - self - except (OverflowError, OutOfBoundsDatetime) as err: + except (OverflowError, OutOfBoundsDatetime, OutOfBoundsTimedelta) as err: # We get here in stata tests, fall back to stdlib datetime # method and return stdlib timedelta object pass From b5e62d6a7b90d2aa33bcef07e4cd63375a41d9f9 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Sat, 7 May 2022 21:10:11 -0700 Subject: [PATCH 12/18] misc. small fixes --- pandas/tests/libs/test_ops.py | 15 +++- .../tests/scalar/timedelta/test_arithmetic.py | 70 ++++++++++++------- 2 files changed, 58 insertions(+), 27 deletions(-) diff --git a/pandas/tests/libs/test_ops.py b/pandas/tests/libs/test_ops.py index a334e06212800..4a8a86d5eca15 100644 --- a/pandas/tests/libs/test_ops.py +++ b/pandas/tests/libs/test_ops.py @@ -1,4 +1,5 @@ import operator +from platform import architecture import numpy as np import pytest @@ -17,12 +18,12 @@ def fixture_int_min() -> int: @pytest.fixture(name="float_max", scope="module") -def fixture_float_max() -> int: +def fixture_float_max() -> np.float64: return np.finfo(np.float64).max @pytest.fixture(name="float_min", scope="module") -def fixture_float_min() -> int: +def fixture_float_min() -> np.float64: return np.finfo(np.float64).min @@ -130,7 +131,15 @@ def test_raises_for_too_large_result( strict=True, ), ), - 1024.1, + pytest.param( + 1024.1, + marks=pytest.mark.xfail( + condition=architecture()[0] == "32bit", + reason="overflows earlier", + raises=pytest.fail.Exception, + strict=True, + ), + ), ), ) def test_raises_for_most_too_small_results( diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 1632cd78dc4f2..c8c61e8422bec 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -9,12 +9,16 @@ timedelta, ) import operator +from platform import architecture import re import numpy as np import pytest -from pandas._libs.tslibs import OutOfBoundsTimedelta +from pandas._libs.tslibs import ( + NaTType, + OutOfBoundsTimedelta, +) import pandas as pd from pandas import ( @@ -33,6 +37,7 @@ def fixture_tdlike_cls(request) -> type: return request.param +# Tick, too? @pytest.fixture( name="tdlike_or_offset_cls", params=(Timedelta, timedelta, np.timedelta64, offsets.Nano), @@ -148,8 +153,10 @@ def test_binary_ops_not_implemented_for_arbitrary_types( class TestAdditionSubtractionScalar: """ - Tests for Timedelta.{__add__,__radd__,__sub__,__rsub__} where second operand is a - scalar. + Tests against the following Timedelta methods, where second operand is a scalar: + + __add__,__radd__, + __sub__,__rsub__ """ @pytest.mark.parametrize( @@ -230,7 +237,7 @@ def test_add_timedeltalike(self, ten_days: Timedelta, add_op, one_day): assert isinstance(result, Timedelta) assert result == expected - def test_sub_timedeltalike(self, ten_days: Timedelta, one_day, sub_op): + def test_sub_timedeltalike(self, ten_days: Timedelta, sub_op, one_day): result = sub_op(ten_days, one_day) expected = Timedelta(days=9) if sub_op is operator.sub else Timedelta(days=-9) assert isinstance(result, Timedelta) @@ -252,7 +259,7 @@ def test_sub_offset(self, ten_days: Timedelta, sub_op): assert isinstance(result, Timedelta) assert result == expected - def test_with_timedeltadlike_raises_for_any_result_above_td_max( + def test_add_sub_tdlike_raises_for_any_result_above_td_max( self, tdlike_or_offset_cls, td_overflow_msg: str, @@ -263,12 +270,12 @@ def test_with_timedeltadlike_raises_for_any_result_above_td_max( with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): Timedelta.max - (tdlike_or_offset_cls(-1)) - def test_no_error_for_result_1ns_below_td_min(self): + def test_add_sub_tdlike_raises_no_error_for_result_1ns_below_td_min(self): assert Timedelta.min + Timedelta(-1, "ns") is NaT assert offsets.Nano(-1) + Timedelta.min is NaT assert Timedelta.min - np.timedelta64(1, "ns") is NaT - def test_raises_for_any_result_2ns_below_td_min( + def test_add_sub_tdlike_raises_for_any_result_2ns_below_td_min( self, tdlike_or_offset_cls: type, td_overflow_msg: str, @@ -288,8 +295,11 @@ def test_add_or_sub_na(self, request, ten_days: Timedelta, add_or_sub, na_value) class TestAdditionSubtractionBox: """ - Tests for Timedelta.{__add__,__radd__,__sub__,__rsub__} where second operand is a - Array/Index/Series/DataFrame. + Tests against the following Timedelta methods, where second operand is a + Array/Index/Series/DataFrame: + + __add__,__radd__, + __sub__,__rsub__ """ @pytest.mark.parametrize("value", (2, 2.0), ids=("int", "float")) @@ -379,7 +389,9 @@ def test_na(self): class TestMultiplicationScalar: """ - Tests for Timedelta.{__mul__,__rmul__} where second operand is a scalar. + Tests against the following Timedelta methods, where second operand is a scalar: + + __mul__,__rmul__ """ @pytest.mark.parametrize( @@ -419,8 +431,10 @@ def test_na(self, request, ten_days: Timedelta, mul_op, na_value): class TestMultiplicationBox: """ - Tests for Timedelta.{__mul__,__rmul__} where second operand is a - Array/Index/Series/DataFrame. + Tests against the following Timedelta methods, where second operand is a + Array/Index/Series/DataFrame: + + __mul__,__rmul__ """ @pytest.mark.parametrize("factor,expected", ((2, 20), (1.5, 15))) @@ -433,13 +447,17 @@ def test_numeric(self, ten_days, mul_op, factor, expected, box_with_array): ) tm.assert_equal(result, expected) - @pytest.mark.xfail(reason="no overflow check", raises=AssertionError) + @pytest.mark.xfail( + condition=architecture()[0] != "32bit", + reason="no overflow check", + raises=AssertionError, + ) @pytest.mark.parametrize("factor", (1.01, 2), ids=("int", "float")) def test_returns_nat_if_result_overflows(self, mul_op, factor, box_with_array): numeric_box = tm.box_expected((1, factor), box_with_array, transpose=False) - result = mul_op(pd.Timedelta.max, numeric_box) + result = mul_op(Timedelta.max, numeric_box) expected = tm.box_expected( - (pd.Timedelta.max, NaT), + (Timedelta.max, NaT), box_with_array, transpose=False, ) @@ -574,22 +592,25 @@ def test_offset(self, ten_days: Timedelta, div_op, expected): assert result == expected def test_na(self, request, ten_days: Timedelta, truediv_op, na_value): - expected = NaT - if na_value is NA or ( + expected: NaTType | float = NaT + + if na_value is None or na_value is NaT: + expected = np.nan + elif na_value is NA or ( truediv_op is ops.rtruediv and isinstance(na_value, float) ): request.applymarker(xfail_type_error) - elif na_value is None or na_value is NaT: - expected = np.nan + result = truediv_op(ten_days, na_value) assert result is expected def test_floordiv_na(self, request, ten_days: Timedelta, na_value): - expected = NaT - if na_value is NA: - request.applymarker(xfail_type_error) - elif na_value is None or na_value is NaT: + expected: NaTType | float = NaT + + if na_value is None or na_value is NaT: expected = np.nan + elif na_value is NA: + request.applymarker(xfail_type_error) result = ten_days // na_value assert result is expected @@ -618,7 +639,8 @@ def test_rmod_na(self, request, ten_days: Timedelta, na_value): assert result is NaT def test_divmod_na(self, request, ten_days: Timedelta, na_value): - expected = (NaT, NaT) + expected: tuple[NaTType | float, NaTType] = (NaT, NaT) + if na_value is None or na_value is NA: request.applymarker(xfail_type_error) elif na_value is NaT: From 42e597c3d081c26169ad1d0dafb3deea17a1b207 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Sun, 8 May 2022 15:28:25 -0700 Subject: [PATCH 13/18] DRY up tests --- pandas/_libs/tslibs/timedeltas.pyx | 92 +- .../tests/scalar/timedelta/test_arithmetic.py | 10 +- .../scalar/timedelta/test_constructors.py | 560 ------- pandas/tests/scalar/timedelta/test_formats.py | 44 - .../tests/scalar/timedelta/test_timedelta.py | 1382 ++++++++++------- 5 files changed, 900 insertions(+), 1188 deletions(-) delete mode 100644 pandas/tests/scalar/timedelta/test_constructors.py delete mode 100644 pandas/tests/scalar/timedelta/test_formats.py diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index f04066e74eae8..16d77203d1503 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -308,6 +308,21 @@ cdef convert_to_timedelta64(object ts, str unit): return ts.astype("timedelta64[ns]") +cpdef to_timedelta64(object value, str unit): + """ + Wrapper around convert_to_timedelta64() that does overflow checks. + TODO: also construct non-nano + TODO: do all overflow-unsafe operations here + TODO: constrain unit to a more specific type + """ + with cython.overflowcheck(True): + try: + return convert_to_timedelta64(value, unit) + except OverflowError as ex: + msg = f"{value} outside allowed range [{TIMEDELTA_MIN_NS}ns, {TIMEDELTA_MAX_NS}ns]" + raise OutOfBoundsTimedelta(msg) from ex + + @cython.boundscheck(False) @cython.wraparound(False) def array_to_timedelta64( @@ -1473,49 +1488,44 @@ class Timedelta(_Timedelta): ) if isinstance(value, str) and unit is not None: raise ValueError("unit must not be specified if the value is a str") - elif value is _no_input: - if not kwargs: - raise ValueError( - "cannot construct a Timedelta without a value/unit " - "or descriptive keywords (days,seconds....)" - ) - if not kwargs.keys() <= set(cls._allowed_kwargs): - raise ValueError( - "cannot construct a Timedelta from the passed arguments, " - f"allowed keywords are {cls._allowed_kwargs}" - ) + elif value is _no_input and not kwargs: + raise ValueError( + "cannot construct a Timedelta without a value/unit " + "or descriptive keywords (days,seconds....)" + ) + if not kwargs.keys() <= set(cls._allowed_kwargs): + raise ValueError( + "cannot construct a Timedelta from the passed arguments, " + f"allowed keywords are {cls._allowed_kwargs}" + ) - try: - # GH43764, convert any input to nanoseconds first, to ensure any potential - # nanosecond contributions from kwargs parsed as floats are included - kwargs = collections.defaultdict(int, {key: _to_py_int_float(val) for key, val in kwargs.items()}) - if kwargs: - value = convert_to_timedelta64( - sum(( - kwargs["weeks"] * 7 * 24 * 3600 * 1_000_000_000, - kwargs["days"] * 24 * 3600 * 1_000_000_000, - kwargs["hours"] * 3600 * 1_000_000_000, - kwargs["minutes"] * 60 * 1_000_000_000, - kwargs["seconds"] * 1_000_000_000, - kwargs["milliseconds"] * 1_000_000, - kwargs["microseconds"] * 1_000, - kwargs["nanoseconds"], - )), - "ns", - ) - else: - if is_integer_object(value) or is_float_object(value): - unit = parse_timedelta_unit(unit) - else: - unit = "ns" - value = convert_to_timedelta64(value, unit) - except OverflowError as ex: - msg = f"outside allowed range [{TIMEDELTA_MIN_NS}ns, {TIMEDELTA_MAX_NS}ns]" - raise OutOfBoundsTimedelta(msg) from ex + # GH43764, convert any input to nanoseconds first, to ensure any potential + # nanosecond contributions from kwargs parsed as floats are included + kwargs = collections.defaultdict(int, {key: _to_py_int_float(val) for key, val in kwargs.items()}) + if kwargs: + value = to_timedelta64( + sum(( + kwargs["weeks"] * 7 * 24 * 3600 * 1_000_000_000, + kwargs["days"] * 24 * 3600 * 1_000_000_000, + kwargs["hours"] * 3600 * 1_000_000_000, + kwargs["minutes"] * 60 * 1_000_000_000, + kwargs["seconds"] * 1_000_000_000, + kwargs["milliseconds"] * 1_000_000, + kwargs["microseconds"] * 1_000, + kwargs["nanoseconds"], + )), + "ns", + ) else: - if is_td64nat(value): - return NaT - return _timedelta_from_value_and_reso(value.view("i8"), NPY_FR_ns) + if is_integer_object(value) or is_float_object(value): + unit = parse_timedelta_unit(unit) + else: + unit = "ns" + value = to_timedelta64(value, unit) + + if is_td64nat(value): + return NaT + return _timedelta_from_value_and_reso(value.view("i8"), NPY_FR_ns) def __setstate__(self, state): if len(state) == 1: diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index c8c61e8422bec..9870507b39e78 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -1,5 +1,6 @@ """ -Tests for scalar Timedelta arithmetic ops +Tests for arithmetic ops between a Timedelta scalar and another scalar, or a Timedelta +scalar and a Array/Index/Series/DataFrame. """ from __future__ import annotations @@ -9,7 +10,6 @@ timedelta, ) import operator -from platform import architecture import re import numpy as np @@ -447,11 +447,7 @@ def test_numeric(self, ten_days, mul_op, factor, expected, box_with_array): ) tm.assert_equal(result, expected) - @pytest.mark.xfail( - condition=architecture()[0] != "32bit", - reason="no overflow check", - raises=AssertionError, - ) + @pytest.mark.xfail(reason="no overflow check", raises=AssertionError, strict=True) @pytest.mark.parametrize("factor", (1.01, 2), ids=("int", "float")) def test_returns_nat_if_result_overflows(self, mul_op, factor, box_with_array): numeric_box = tm.box_expected((1, factor), box_with_array, transpose=False) diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py deleted file mode 100644 index 38e1e294efe69..0000000000000 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ /dev/null @@ -1,560 +0,0 @@ -from datetime import timedelta -from itertools import ( - chain, - zip_longest, -) -import re - -import numpy as np -import pytest - -from pandas._libs.tslibs import OutOfBoundsTimedelta - -from pandas import ( - NA, - NaT, - Timedelta, - offsets, - to_timedelta, -) - -TD_KWARGS_UNITS = { - "weeks": ("w",), - "days": ("d", "day", "days"), - "hours": ("h", "hr", "hour", "hours"), - "minutes": ("m", "t", "min", "minute", "minutes"), - "seconds": ("s", "sec", "second", "seconds"), - "milliseconds": ("l", "ms", "milli", "millis", "millisecond", "milliseconds"), - "microseconds": ("u", "us", "µs", "micro", "micros", "microsecond", "microseconds"), - "nanoseconds": ("n", "ns", "nano", "nanos", "nanosecond", "nanoseconds"), -} -TD_MAX_PER_KWARG = { - "nanoseconds": Timedelta.max.value, - "microseconds": Timedelta.max.value // 1_000, - "milliseconds": Timedelta.max.value // 1_000_000, - "seconds": Timedelta.max.value // 1_000_000_000, - "minutes": Timedelta.max.value // (1_000_000_000 * 60), - "hours": Timedelta.max.value // (1_000_000_000 * 60 * 60), - "days": Timedelta.max.value // (1_000_000_000 * 60 * 60 * 24), - "weeks": Timedelta.max.value // (1_000_000_000 * 60 * 60 * 24 * 7), -} -TD_MIN_PER_KWARG = { - "nanoseconds": Timedelta.min.value, - "microseconds": Timedelta.min.value // 1_000, - "milliseconds": Timedelta.min.value // 1_000_000, - "seconds": Timedelta.min.value // 1_000_000_000, - "minutes": Timedelta.min.value // (1_000_000_000 * 60), - "hours": Timedelta.min.value // (1_000_000_000 * 60 * 60), - "days": Timedelta.min.value // (1_000_000_000 * 60 * 60 * 24), - "weeks": Timedelta.min.value // (1_000_000_000 * 60 * 60 * 24 * 7), -} -TD_MAX_PER_UNIT = dict( - chain.from_iterable( - zip_longest(units, (TD_MAX_PER_KWARG[k],), fillvalue=TD_MAX_PER_KWARG[k]) - for k, units in TD_KWARGS_UNITS.items() - ) -) -TD_MIN_PER_UNIT = dict( - chain.from_iterable( - zip_longest(units, (TD_MIN_PER_KWARG[k],), fillvalue=TD_MIN_PER_KWARG[k]) - for k, units in TD_KWARGS_UNITS.items() - ) -) -TD_KWARGS_NP_TD64_UNITS = dict( - zip(TD_MAX_PER_KWARG, ("ns", "us", "ms", "s", "m", "h", "D", "W")) -) -NP_TD64_MAX_PER_UNIT = dict( - zip(("ns", "us", "ms", "s", "m", "h", "D", "W"), TD_MAX_PER_KWARG.values()) -) -NP_TD64_MIN_PER_UNIT = dict( - zip(("ns", "us", "ms", "s", "m", "h", "D", "W"), TD_MIN_PER_KWARG.values()) -) - - -skip_ns = lambda d: {k: v for k, v in d.items() if not k.startswith("n")} - - -def test_construction(): - expected = np.timedelta64(10, "D").astype("m8[ns]").view("i8") - assert Timedelta(10, unit="d").value == expected - assert Timedelta(10.0, unit="d").value == expected - assert Timedelta("10 days").value == expected - assert Timedelta(days=10).value == expected - assert Timedelta(days=10.0).value == expected - - expected += np.timedelta64(10, "s").astype("m8[ns]").view("i8") - assert Timedelta("10 days 00:00:10").value == expected - assert Timedelta(days=10, seconds=10).value == expected - assert Timedelta(days=10, milliseconds=10 * 1000).value == expected - assert Timedelta(days=10, microseconds=10 * 1000 * 1000).value == expected - - # rounding cases - assert Timedelta(82739999850000).value == 82739999850000 - assert "0 days 22:58:59.999850" in str(Timedelta(82739999850000)) - assert Timedelta(123072001000000).value == 123072001000000 - assert "1 days 10:11:12.001" in str(Timedelta(123072001000000)) - - # string conversion with/without leading zero - # GH#9570 - assert Timedelta("0:00:00") == timedelta(hours=0) - assert Timedelta("00:00:00") == timedelta(hours=0) - assert Timedelta("-1:00:00") == -timedelta(hours=1) - assert Timedelta("-01:00:00") == -timedelta(hours=1) - - # more strings & abbrevs - # GH#8190 - assert Timedelta("1 h") == timedelta(hours=1) - assert Timedelta("1 hour") == timedelta(hours=1) - assert Timedelta("1 hr") == timedelta(hours=1) - assert Timedelta("1 hours") == timedelta(hours=1) - assert Timedelta("-1 hours") == -timedelta(hours=1) - assert Timedelta("1 m") == timedelta(minutes=1) - assert Timedelta("1.5 m") == timedelta(seconds=90) - assert Timedelta("1 minute") == timedelta(minutes=1) - assert Timedelta("1 minutes") == timedelta(minutes=1) - assert Timedelta("1 s") == timedelta(seconds=1) - assert Timedelta("1 second") == timedelta(seconds=1) - assert Timedelta("1 seconds") == timedelta(seconds=1) - assert Timedelta("1 ms") == timedelta(milliseconds=1) - assert Timedelta("1 milli") == timedelta(milliseconds=1) - assert Timedelta("1 millisecond") == timedelta(milliseconds=1) - assert Timedelta("1 us") == timedelta(microseconds=1) - assert Timedelta("1 µs") == timedelta(microseconds=1) - assert Timedelta("1 micros") == timedelta(microseconds=1) - assert Timedelta("1 microsecond") == timedelta(microseconds=1) - assert Timedelta("1.5 microsecond") == Timedelta("00:00:00.000001500") - assert Timedelta("1 ns") == Timedelta("00:00:00.000000001") - assert Timedelta("1 nano") == Timedelta("00:00:00.000000001") - assert Timedelta("1 nanosecond") == Timedelta("00:00:00.000000001") - - # combos - assert Timedelta("10 days 1 hour") == timedelta(days=10, hours=1) - assert Timedelta("10 days 1 h") == timedelta(days=10, hours=1) - assert Timedelta("10 days 1 h 1m 1s") == timedelta( - days=10, hours=1, minutes=1, seconds=1 - ) - assert Timedelta("-10 days 1 h 1m 1s") == -timedelta( - days=10, hours=1, minutes=1, seconds=1 - ) - assert Timedelta("-10 days 1 h 1m 1s") == -timedelta( - days=10, hours=1, minutes=1, seconds=1 - ) - assert Timedelta("-10 days 1 h 1m 1s 3us") == -timedelta( - days=10, hours=1, minutes=1, seconds=1, microseconds=3 - ) - assert Timedelta("-10 days 1 h 1.5m 1s 3us") == -timedelta( - days=10, hours=1, minutes=1, seconds=31, microseconds=3 - ) - - # floats - expected = np.timedelta64(10, "s").astype("m8[ns]").view("i8") + np.timedelta64( - 500, "ms" - ).astype("m8[ns]").view("i8") - assert Timedelta(10.5, unit="s").value == expected - - # offset - assert to_timedelta(offsets.Hour(2)) == Timedelta(hours=2) - assert Timedelta(offsets.Hour(2)) == Timedelta(hours=2) - assert Timedelta(offsets.Second(2)) == Timedelta(seconds=2) - - # GH#11995: unicode - expected = Timedelta("1H") - result = Timedelta("1H") - assert result == expected - assert to_timedelta(offsets.Hour(2)) == Timedelta("0 days, 02:00:00") - - -@pytest.mark.parametrize("unit", ("ps", "ns")) -def test_from_np_td64_ignores_unit(unit: str): - """ - Ignore the unit, as it may cause silently overflows leading to incorrect results, - and in non-overflow cases is irrelevant GH#46827. - """ - td64 = np.timedelta64(NP_TD64_MAX_PER_UNIT["h"], "h") - msg = re.escape( - "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" - ) - - assert Timedelta(td64, unit=unit) == Timedelta(td64) - - with pytest.raises(OutOfBoundsTimedelta, match=msg): - Timedelta(td64 * 2, unit=unit) - - -@pytest.mark.parametrize(("td_kwarg", "np_unit"), TD_KWARGS_NP_TD64_UNITS.items()) -@pytest.mark.parametrize( - "np_dtype", - (np.int64, np.int32, np.int16, np.float64, np.float32, np.float16), -) -def test_td_construction_with_np_dtypes(np_dtype: type, td_kwarg: str, np_unit: str): - # GH#8757: test construction with np dtypes - expected_ns = np.timedelta64(1, np_unit).astype("m8[ns]").view("i8") - assert Timedelta(**{td_kwarg: np_dtype(1)}).value == expected_ns - - -@pytest.mark.parametrize( - "val", - [ - "1s", - "-1s", - "1us", - "-1us", - "1 day", - "-1 day", - "-23:59:59.999999", - "-1 days +23:59:59.999999", - "-1ns", - "1ns", - "-23:59:59.999999999", - ], -) -def test_td_from_repr_roundtrip(val): - # round-trip both for string and value - td = Timedelta(val) - assert Timedelta(td.value) == td - - assert Timedelta(str(td)) == td - assert Timedelta(td._repr_base(format="all")) == td - assert Timedelta(td._repr_base()) == td - - -@pytest.mark.parametrize( - "fmt,exp", - [ - ( - "P6DT0H50M3.010010012S", - Timedelta( - days=6, - minutes=50, - seconds=3, - milliseconds=10, - microseconds=10, - nanoseconds=12, - ), - ), - ( - "P-6DT0H50M3.010010012S", - Timedelta( - days=-6, - minutes=50, - seconds=3, - milliseconds=10, - microseconds=10, - nanoseconds=12, - ), - ), - ("P4DT12H30M5S", Timedelta(days=4, hours=12, minutes=30, seconds=5)), - ("P0DT0H0M0.000000123S", Timedelta(nanoseconds=123)), - ("P0DT0H0M0.00001S", Timedelta(microseconds=10)), - ("P0DT0H0M0.001S", Timedelta(milliseconds=1)), - ("P0DT0H1M0S", Timedelta(minutes=1)), - ("P1DT25H61M61S", Timedelta(days=1, hours=25, minutes=61, seconds=61)), - ("PT1S", Timedelta(seconds=1)), - ("PT0S", Timedelta(seconds=0)), - ("P1WT0S", Timedelta(days=7, seconds=0)), - ("P1D", Timedelta(days=1)), - ("P1DT1H", Timedelta(days=1, hours=1)), - ("P1W", Timedelta(days=7)), - ("PT300S", Timedelta(seconds=300)), - ("P1DT0H0M00000000000S", Timedelta(days=1)), - ("PT-6H3M", Timedelta(hours=-6, minutes=3)), - ("-PT6H3M", Timedelta(hours=-6, minutes=-3)), - ("-PT-6H+3M", Timedelta(hours=6, minutes=-3)), - ], -) -def test_iso_constructor(fmt, exp): - assert Timedelta(fmt) == exp - - -@pytest.mark.parametrize( - "constructed_td, conversion", - [ - (Timedelta(nanoseconds=100), "100ns"), - ( - Timedelta( - days=1, - hours=1, - minutes=1, - weeks=1, - seconds=1, - milliseconds=1, - microseconds=1, - nanoseconds=1, - ), - 694861001001001, - ), - (Timedelta(microseconds=1) + Timedelta(nanoseconds=1), "1us1ns"), - (Timedelta(microseconds=1) - Timedelta(nanoseconds=1), "999ns"), - (Timedelta(microseconds=1) + 5 * Timedelta(nanoseconds=-2), "990ns"), - ], -) -def test_td_constructor_on_nanoseconds(constructed_td, conversion): - # GH#9273 - assert constructed_td == Timedelta(conversion) - - -@pytest.mark.parametrize( - ("args", "kwargs"), - [ - ((), {}), - (("ps",), {}), - (("ns",), {}), - (("ms",), {}), - ((), {"seconds": 3}), - (("ns",), {"minutes": 2}), - ], -) -def test_other_args_ignored_if_timedelta_value_passed(args: tuple, kwargs: dict): - original = Timedelta(1) - new = Timedelta(original, *args, **kwargs) - - assert new == original - if not any((args, kwargs)): - assert new is original - - -@pytest.mark.parametrize( - "value", - ( - None, - np.nan, - NaT, - pytest.param( - NA, - marks=pytest.mark.xfail( - reason="constructor fails", - raises=ValueError, - strict=True, - ), - ), - ), - ids=("None", "np.nan", "pd.NaT", "pd.NA"), -) -def test_returns_nat_for_most_na_values(value): - assert Timedelta(value) is NaT - - -class TestInvalidArgCombosFormats: - def test_raises_if_no_args_passed(self): - msg = re.escape( - "cannot construct a Timedelta without a value/unit or descriptive keywords " - "(days,seconds....)" - ) - - with pytest.raises(ValueError, match=msg): - Timedelta() - - @pytest.mark.parametrize("unit", ("years", "months", "day", "ps")) - def test_raises_for_invalid_kwarg(self, unit: str): - msg = re.escape( - "cannot construct a Timedelta from the passed arguments, allowed keywords " - "are ('weeks', 'days', 'hours', 'minutes', 'seconds', 'milliseconds', " - "'microseconds', 'nanoseconds')" - ) - - with pytest.raises(ValueError, match=msg): - Timedelta(**{unit: 1}) # type: ignore[arg-type] - - def test_raises_if_kwarg_has_str_value(self): - msg = "Invalid type . Must be int or float." - - with pytest.raises(TypeError, match=msg): - Timedelta(nanoseconds="1") - - @pytest.mark.parametrize( - ("constructor", "value", "unit", "msg"), - ( - (Timedelta, "10s", "ms", "the value is a str"), - (to_timedelta, "10s", "ms", "the input is/contains a str"), - (to_timedelta, ["1", "2", "3"], "s", "the input contains a str"), - ), - ids=("Timedelta", "to_timedelta-scalar", "to_timedelta-sequence"), - ) - def test_raises_if_both_str_value_and_unit_passed( - self, - constructor, - value, - unit, - msg, - ): - msg = "unit must not be specified if " + msg - - with pytest.raises(ValueError, match=msg): - constructor(value, unit=unit) - - @pytest.mark.parametrize( - "value", - [ - "PPPPPPPPPPPP", - "PDTHMS", - "P0DT999H999M999S", - "P1DT0H0M0.0000000000000S", - "P1DT0H0M0.S", - "P", - "-P", - ], - ) - def test_raises_for_invalid_iso_like_str_value(self, value): - msg = f"Invalid ISO 8601 Duration format - {value}" - - with pytest.raises(ValueError, match=msg): - Timedelta(value) - - def test_raises_if_str_value_contains_no_units(self): - msg = "no units specified" - - with pytest.raises(ValueError, match=msg): - Timedelta("3.1415") - - @pytest.mark.parametrize( - ("value", "msg"), - ( - ("us", "unit abbreviation w/o a number"), - ("seconds", "unit abbreviation w/o a number"), - ("garbage", "unit abbreviation w/o a number"), - # GH39710 Timedelta input string with only symbols and no digits raises - ("+", "symbols w/o a number"), - ("-", "symbols w/o a number"), - ), - ) - def test_raises_if_str_value_contains_no_numeric_component( - self, - value: str, - msg: str, - ): - with pytest.raises(ValueError, match=msg): - Timedelta(value) - - @pytest.mark.parametrize( - "value", - ( - "--", - # Currently invalid as it has a - on the hh:mm:dd part - # (only allowed on the days) - "-10 days -1 h 1.5m 1s 3us", - "10 days -1 h 1.5m 1s 3us", - ), - ) - def test_raises_for_str_value_with_minus_sign(self, value: str): - msg = "only leading negative signs are allowed" - with pytest.raises(ValueError, match=msg): - Timedelta(value) - - @pytest.mark.parametrize("unit", ["Y", "y", "M"]) - def test_raises_if_ambiguous_units_passed(self, unit: str): - msg = ( - "Units 'M', 'Y', and 'y' are no longer supported, as they do not " - "represent unambiguous timedelta values durations." - ) - - with pytest.raises(ValueError, match=msg): - Timedelta(1, unit) - - -class TestOverflow: - @pytest.mark.parametrize(("unit", "max_val"), TD_MAX_PER_UNIT.items()) - def test_int_plus_units_too_big(self, unit: str, max_val: int, request): - if unit == "w": - mark = pytest.mark.xfail( - reason="does not raise", - raises=pytest.fail.Exception, - strict=True, - ) - request.node.add_marker(mark) - - too_big = max_val + 1 - msg = re.escape( - "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" - ) - - with pytest.raises(OutOfBoundsTimedelta, match=msg): - Timedelta(too_big, unit=unit) - - @pytest.mark.parametrize(("unit", "min_val"), skip_ns(TD_MIN_PER_UNIT).items()) - def test_int_plus_units_too_small(self, unit: str, min_val: int, request): - if unit == "w": - mark = pytest.mark.xfail( - reason="does not raise", - raises=pytest.fail.Exception, - strict=True, - ) - request.node.add_marker(mark) - - too_small = min_val - 1 - msg = re.escape( - "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" - ) - - with pytest.raises(OutOfBoundsTimedelta, match=msg): - Timedelta(too_small, unit=unit) - - @pytest.mark.parametrize(("kwarg", "max_val"), TD_MAX_PER_KWARG.items()) - def test_kwarg_too_big(self, kwarg: str, max_val: int): - too_big = max_val + 1 - msg = re.escape( - "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" - ) - - with pytest.raises(OutOfBoundsTimedelta, match=msg): - assert Timedelta(**{kwarg: too_big}) # type: ignore[arg-type] - - @pytest.mark.parametrize(("kwarg", "min_val"), skip_ns(TD_MIN_PER_KWARG).items()) - def test_kwarg_too_small(self, kwarg: str, min_val: int): - too_small = min_val - 1 - msg = re.escape( - "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" - ) - - with pytest.raises(OutOfBoundsTimedelta, match=msg): - Timedelta(**{kwarg: too_small}) # type: ignore[arg-type] - - @pytest.mark.parametrize(("kwarg", "max_val"), skip_ns(TD_MAX_PER_KWARG).items()) - def test_from_timedelta_too_big(self, kwarg: str, max_val: int): - too_big = timedelta(**{kwarg: max_val + 1}) - msg = re.escape( - "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" - ) - - with pytest.raises(OutOfBoundsTimedelta, match=msg): - Timedelta(too_big) - - @pytest.mark.parametrize(("kwarg", "min_val"), skip_ns(TD_MIN_PER_KWARG).items()) - def test_from_timedelta_too_small(self, kwarg: str, min_val: int): - too_small = timedelta(**{kwarg: min_val - 1}) - msg = re.escape( - "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" - ) - - with pytest.raises(OutOfBoundsTimedelta, match=msg): - Timedelta(too_small) - - @pytest.mark.parametrize(("unit", "max_val"), skip_ns(NP_TD64_MAX_PER_UNIT).items()) - def test_from_np_td64_too_big(self, unit: str, max_val: int): - too_big = np.timedelta64(max_val + 1, unit) - msg = re.escape( - "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" - ) - - with pytest.raises(OutOfBoundsTimedelta, match=msg): - Timedelta(too_big) - - @pytest.mark.parametrize(("unit", "min_val"), skip_ns(NP_TD64_MIN_PER_UNIT).items()) - def test_from_np_td64_too_small(self, unit: str, min_val: int): - too_small = np.timedelta64(min_val - 1, unit) - msg = re.escape( - "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" - ) - - with pytest.raises(OutOfBoundsTimedelta, match=msg): - Timedelta(too_small) - - def test_too_small_by_1ns_returns_nat(self): - too_small = Timedelta.min.value - 1 - too_small_np_td = np.timedelta64(too_small) - - assert isinstance(too_small, int) - assert isinstance(too_small_np_td, np.timedelta64) - - assert Timedelta(too_small, "ns") is NaT - assert Timedelta(nanoseconds=too_small) is NaT - assert Timedelta(too_small_np_td) is NaT diff --git a/pandas/tests/scalar/timedelta/test_formats.py b/pandas/tests/scalar/timedelta/test_formats.py deleted file mode 100644 index 753186ee4b738..0000000000000 --- a/pandas/tests/scalar/timedelta/test_formats.py +++ /dev/null @@ -1,44 +0,0 @@ -import pytest - -from pandas import Timedelta - - -@pytest.mark.parametrize( - "td, expected_repr", - [ - (Timedelta(10, unit="d"), "Timedelta('10 days 00:00:00')"), - (Timedelta(10, unit="s"), "Timedelta('0 days 00:00:10')"), - (Timedelta(10, unit="ms"), "Timedelta('0 days 00:00:00.010000')"), - (Timedelta(-10, unit="ms"), "Timedelta('-1 days +23:59:59.990000')"), - ], -) -def test_repr(td, expected_repr): - assert repr(td) == expected_repr - - -@pytest.mark.parametrize( - "td, expected_iso", - [ - ( - Timedelta( - days=6, - minutes=50, - seconds=3, - milliseconds=10, - microseconds=10, - nanoseconds=12, - ), - "P6DT0H50M3.010010012S", - ), - (Timedelta(days=4, hours=12, minutes=30, seconds=5), "P4DT12H30M5S"), - (Timedelta(nanoseconds=123), "P0DT0H0M0.000000123S"), - # trim nano - (Timedelta(microseconds=10), "P0DT0H0M0.00001S"), - # trim micro - (Timedelta(milliseconds=1), "P0DT0H0M0.001S"), - # don't strip every 0 - (Timedelta(minutes=1), "P0DT0H1M0S"), - ], -) -def test_isoformat(td, expected_iso): - assert td.isoformat() == expected_iso diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index b1652ef6515af..03629cc992d66 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -1,5 +1,17 @@ -""" test the scalar Timedelta """ +""" +Most Timedelta scalar tests; See test_arithmetic for tests of binary operations with a +Timedelta scalar. +""" + +from __future__ import annotations + from datetime import timedelta +from itertools import ( + chain, + product, + zip_longest, +) +import operator import re from hypothesis import ( @@ -11,13 +23,14 @@ from pandas._libs import lib from pandas._libs.tslibs import ( - NaT, OutOfBoundsTimedelta, iNaT, ) import pandas as pd from pandas import ( + NA, + NaT, Timedelta, TimedeltaIndex, offsets, @@ -25,82 +38,607 @@ ) import pandas._testing as tm +TD_UNITS = ( + ("n", "ns", "nano", "nanos", "nanosecond", "nanoseconds"), + ("u", "us", "µs", "micro", "micros", "microsecond", "microseconds"), + ("l", "ms", "milli", "millis", "millisecond", "milliseconds"), + ("s", "sec", "second", "seconds"), + ("m", "t", "min", "minute", "minutes"), + ("h", "hr", "hour", "hours"), + ("d", "day", "days"), + ("w",), +) +TD_UNITS_UNIQUE = tuple(map(operator.itemgetter(0), TD_UNITS)) +TD_KWARGS = ( + "nanoseconds", + "microseconds", + "milliseconds", + "seconds", + "minutes", + "hours", + "days", + "weeks", +) +TD_COMPONENTS = tuple(reversed(TD_KWARGS[:-1])) +TD64_UNITS = ("ns", "us", "ms", "s", "m", "h", "D", "W") + +TD_KWARGS_TD_UNITS = dict(zip(TD_KWARGS, TD_UNITS)) +TD_UNITS_TD_KWARGS = dict( + chain.from_iterable( + zip_longest(units, (kwarg,), fillvalue=kwarg) + for kwarg, units in TD_KWARGS_TD_UNITS.items() + ) +) +TD_KWARGS_TD64_UNITS = dict(zip(TD_KWARGS, TD64_UNITS)) +TD_UNITS_TD64_UNITS = dict( + chain.from_iterable( + zip_longest(td_units, (TD64_UNITS[i],), fillvalue=TD64_UNITS[i]) + for i, td_units in enumerate(TD_UNITS) + ) +) + +TD_MAX_PER_KWARG = { + "nanoseconds": Timedelta.max.value, + "microseconds": Timedelta.max.value // 1_000, + "milliseconds": Timedelta.max.value // 1_000_000, + "seconds": Timedelta.max.value // 1_000_000_000, + "minutes": Timedelta.max.value // (1_000_000_000 * 60), + "hours": Timedelta.max.value // (1_000_000_000 * 60 * 60), + "days": Timedelta.max.value // (1_000_000_000 * 60 * 60 * 24), + "weeks": Timedelta.max.value // (1_000_000_000 * 60 * 60 * 24 * 7), +} +TD_MIN_PER_KWARG = { + "nanoseconds": Timedelta.min.value, + "microseconds": Timedelta.min.value // 1_000 + 1, + "milliseconds": Timedelta.min.value // 1_000_000 + 1, + "seconds": Timedelta.min.value // 1_000_000_000 + 1, + "minutes": Timedelta.min.value // (1_000_000_000 * 60) + 1, + "hours": Timedelta.min.value // (1_000_000_000 * 60 * 60) + 1, + "days": Timedelta.min.value // (1_000_000_000 * 60 * 60 * 24) + 1, + "weeks": Timedelta.min.value // (1_000_000_000 * 60 * 60 * 24 * 7) + 1, +} +# simplified to include only one key corresponding to each unit +TD_MAX_PER_UNIT = dict(zip(TD_UNITS_UNIQUE, TD_MAX_PER_KWARG.values())) +TD_MIN_PER_UNIT = dict(zip(TD_UNITS_UNIQUE, TD_MIN_PER_KWARG.values())) +TD64_MAX_PER_UNIT = dict(zip(TD64_UNITS, TD_MAX_PER_KWARG.values())) +TD64_MIN_PER_UNIT = dict(zip(TD64_UNITS, TD_MIN_PER_KWARG.values())) + +xfail_does_not_raise = pytest.mark.xfail( + reason="does not raise", + raises=pytest.fail.Exception, + strict=True, +) +skip_ns = lambda s: (u for u in s if not u.startswith("n")) + + +@pytest.fixture(name="timedelta_kwarg", params=skip_ns(TD_KWARGS)) +def fixture_timedelta_kwarg(request) -> str: + return request.param + + +@pytest.fixture(name="td_max_per_unit", params=TD_MAX_PER_UNIT) +def fixture_td_max_per_unit(request) -> tuple: + unit = request.param + if request.cls is TestOverflow and unit == "w": + request.applymarker(xfail_does_not_raise) + + return unit, TD_MAX_PER_UNIT[unit] + + +@pytest.fixture(name="td_min_per_unit", params=TD_MIN_PER_UNIT) +def fixture_td_min_per_unit(request) -> tuple: + unit = request.param + if request.cls is TestOverflow and unit == "w": + request.applymarker(xfail_does_not_raise) + + return unit, TD_MIN_PER_UNIT[unit] + + +@pytest.fixture(name="td_max_per_kwarg", params=TD_MAX_PER_KWARG) +def fixture_td_max_per_kwarg(request) -> tuple: + kwarg = request.param + return kwarg, TD_MAX_PER_KWARG[kwarg] + + +@pytest.fixture(name="td_min_per_kwarg", params=TD_MIN_PER_KWARG) +def fixture_td_min_per_kwarg(request) -> tuple: + kwarg = request.param + return kwarg, TD_MIN_PER_KWARG[kwarg] + + +@pytest.fixture(name="td64_max_per_unit", params=skip_ns(TD64_MAX_PER_UNIT)) +def fixture_td64_max_per_unit(request) -> tuple: + unit = request.param + return unit, TD64_MAX_PER_UNIT[unit] + + +@pytest.fixture(name="td64_min_per_unit", params=skip_ns(TD64_MIN_PER_UNIT)) +def fixture_td64_min_per_unit(request) -> tuple: + unit = request.param + return unit, TD64_MIN_PER_UNIT[unit] + + +@pytest.fixture(name="td_overflow_msg") +def fixture_td_overflow_msg() -> str: + return re.escape( + "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" + ) + + +@pytest.fixture(name="non_nano_reso", params=(7, 8, 9)) +def fixture_non_nano_reso(request): + """7, 8, 9 correspond to second, millisecond, and microsecond, respectively""" + return request.param + + +@pytest.fixture(name="non_nano_td") +def fixture_non_nano_td(non_nano_reso: int) -> Timedelta: + # microsecond that would be just out of bounds for nano + us = np.int64((TD_MAX_PER_KWARG["days"] + 1) * 86_400 * 1_000_000) + values = { + 9: us, + 8: us // 1000, + 7: us // 1_000_000, + } + + return Timedelta._from_value_and_reso(values[non_nano_reso], non_nano_reso) + + +class TestConstruction: + """ + Tests of the public constructor, Timedelta.__new__(). + """ + + def test_type(self): + td = Timedelta(1) + + assert isinstance(td, Timedelta) + assert isinstance(td, timedelta) + + @pytest.mark.parametrize("td_unit, td64_unit", TD_UNITS_TD64_UNITS.items()) + def test_from_value_and_unit( + self, + td_unit: str, + td64_unit: str, + any_real_numpy_dtype: str, + ): + """GH#8757: test construction with np dtypes""" + expected_ns = np.timedelta64(1, td64_unit).astype("m8[ns]").view("i8") + one = np.dtype(any_real_numpy_dtype).type(1) + td = Timedelta(one, td_unit) + + assert td.value == expected_ns + + @pytest.mark.parametrize("subset", map(slice, range(1, len(TD_UNITS_UNIQUE)))) + def test_from_str(self, subset: slice): + """GH#8190""" + td64s = tuple(np.timedelta64(1, u) for u in TD64_UNITS[subset]) + str_value = " ".join(tuple(f"1 {u}" for u in TD_UNITS_UNIQUE[subset])) + expected_ns = np.sum(td64s).astype("m8[ns]").view("i8") + td = Timedelta(str_value) + neg_td = Timedelta("-" + str_value) + + assert td.value == expected_ns + assert neg_td.value == -1 * expected_ns + + @pytest.mark.parametrize( + "value, expected_hours", + ( + ("0:00:00", 0), + ("1:00:00", 1), + ), + ) + def test_from_str_with_without_leading_zero(self, value: str, expected_hours: int): + """GH#9570""" + expected_ns = np.timedelta64(expected_hours, "h").astype("m8[ns]").view("i8") + td0 = Timedelta(value) + td1 = Timedelta("0" + value) + + assert td0.value == expected_ns + assert td1.value == expected_ns + + @pytest.mark.parametrize( + ("value", "expected"), + ( + ( + "P6DT0H50M3.010010012S", + Timedelta( + days=6, + minutes=50, + seconds=3, + milliseconds=10, + microseconds=10, + nanoseconds=12, + ), + ), + ( + "P-6DT0H50M3.010010012S", + Timedelta( + days=-6, + minutes=50, + seconds=3, + milliseconds=10, + microseconds=10, + nanoseconds=12, + ), + ), + ("P4DT12H30M5S", Timedelta(days=4, hours=12, minutes=30, seconds=5)), + ("P0DT0H0M0.000000123S", Timedelta(nanoseconds=123)), + ("P0DT0H0M0.00001S", Timedelta(microseconds=10)), + ("P0DT0H0M0.001S", Timedelta(milliseconds=1)), + ("P0DT0H1M0S", Timedelta(minutes=1)), + ("P1DT25H61M61S", Timedelta(days=1, hours=25, minutes=61, seconds=61)), + ("PT1S", Timedelta(seconds=1)), + ("PT0S", Timedelta(seconds=0)), + ("P1WT0S", Timedelta(days=7, seconds=0)), + ("P1D", Timedelta(days=1)), + ("P1DT1H", Timedelta(days=1, hours=1)), + ("P1W", Timedelta(days=7)), + ("PT300S", Timedelta(seconds=300)), + ("P1DT0H0M00000000000S", Timedelta(days=1)), + ("PT-6H3M", Timedelta(hours=-6, minutes=3)), + ("-PT6H3M", Timedelta(hours=-6, minutes=-3)), + ("-PT-6H+3M", Timedelta(hours=6, minutes=-3)), + ), + ) + def test_from_isoformat_str(self, value: str, expected: Timedelta): + assert Timedelta(value) == expected + + @pytest.mark.parametrize("subset", map(slice, range(1, len(TD_KWARGS)))) + def test_from_kwargs(self, subset: slice, any_real_numpy_dtype: str): + td64s = tuple(np.timedelta64(1, u) for u in TD64_UNITS[subset]) + kwargs = {u: np.dtype(any_real_numpy_dtype).type(1) for u in TD_KWARGS[subset]} + expected_ns = np.sum(td64s).astype("m8[ns]").view("i8") + td = Timedelta(**kwargs) + + assert td.value == expected_ns + + @pytest.mark.parametrize("td_unit, td_kwarg", TD_UNITS_TD_KWARGS.items()) + def test_kwarg_unit_equivalence(self, request, td_unit: str, td_kwarg: str): + if td_kwarg == "weeks": + request.node.add_marker( + pytest.mark.xfail( + reason="this one isn't valid", + raises=ValueError, + strict=True, + ) + ) + + from_unit = Timedelta(1, td_unit) + from_kwarg = Timedelta(**{td_kwarg: 1}) # type: ignore[arg-type] + from_str_unit = Timedelta(f"1 {td_unit}") + from_str_kwarg = Timedelta(f"1 {td_kwarg}") + + assert from_unit == from_kwarg == from_str_unit == from_str_kwarg + + @pytest.mark.parametrize( + "value, td_unit, expected_ns", + ( + (9.123, "us", 9123), + (9.123456, "ms", 9123456), + (9.123456789, "s", 9123456789), + ), + ) + def test_float_values_not_rounded( + self, + value: float, + td_unit: str, + expected_ns: int, + ): + """GH#12690""" + td_kwarg = TD_UNITS_TD_KWARGS[td_unit] + from_float = Timedelta(value, td_unit) + from_str = Timedelta(f"{value} {td_unit}") + from_kwarg = Timedelta(**{td_kwarg: value}) # type: ignore[arg-type] + + assert from_float.value == expected_ns + assert from_str.value == expected_ns + assert from_kwarg.value == expected_ns + + def test_from_offset(self, tick_classes): + offset = tick_classes(1) + assert Timedelta(offset).value == offset.nanos + + @pytest.mark.parametrize("td_unit", TD_UNITS) + def test_from_td64_ignores_unit(self, td_unit: str, td_overflow_msg: str): + """ + Ignore the unit, as it may cause silently overflows leading to incorrect + results, and in non-overflow cases is irrelevant GH#46827. + """ + td64 = np.timedelta64(TD64_MAX_PER_UNIT["h"], "h") + + assert Timedelta(td64, td_unit) == Timedelta(td64) + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + Timedelta(td64 * 2, td_unit) + + @pytest.mark.parametrize( + ("args", "kwargs"), + [ + ((), {}), + (("ps",), {}), + (("ns",), {}), + (("ms",), {}), + ((), {"seconds": 3}), + (("ns",), {"minutes": 2}), + ], + ) + def test_from_td_ignores_other_args(self, args: tuple, kwargs: dict): + original = Timedelta(1) + new = Timedelta(original, *args, **kwargs) + + assert new == original + if not any((args, kwargs)): + assert new is original + + def test_from_timedelta(self, timedelta_kwarg: str): + kwargs = {timedelta_kwarg: 1} + assert Timedelta(**kwargs) == timedelta(**kwargs) # type: ignore[arg-type] + + @pytest.mark.parametrize( + "value", + ( + None, + np.nan, + NaT, + pytest.param( + NA, + marks=pytest.mark.xfail( + reason="constructor fails", + raises=ValueError, + strict=True, + ), + ), + ), + ids=("None", "np.nan", "pd.NaT", "pd.NA"), + ) + def test_from_na_value_returns_nat(self, value): + assert Timedelta(value) is NaT + + def test_raises_if_no_args_passed(self): + msg = ( + "cannot construct a Timedelta without a value/unit or descriptive keywords" + ) + + with pytest.raises(ValueError, match=msg): + Timedelta() + + @pytest.mark.parametrize("unit", ("years", "months", "day", "ps", "reso", "_reso")) + def test_raises_for_invalid_kwarg(self, unit: str): + msg = "cannot construct a Timedelta from the passed arguments" + + with pytest.raises(ValueError, match=msg): + Timedelta(**{unit: 1}) # type: ignore[arg-type] + + def test_raises_if_kwarg_has_str_value(self): + msg = "Invalid type . Must be int or float." + + with pytest.raises(TypeError, match=msg): + Timedelta(nanoseconds="1") + + @pytest.mark.parametrize( + ("constructor", "value", "unit", "msg"), + ( + (Timedelta, "10s", "ms", "the value is a str"), + (to_timedelta, "10s", "ms", "the input is/contains a str"), + (to_timedelta, ["1", "2", "3"], "s", "the input contains a str"), + ), + ids=("Timedelta", "to_timedelta-scalar", "to_timedelta-sequence"), + ) + def test_raises_if_both_str_value_and_unit_passed( + self, + constructor, + value, + unit, + msg, + ): + msg = "unit must not be specified if " + msg + + with pytest.raises(ValueError, match=msg): + constructor(value, unit=unit) + + @pytest.mark.parametrize( + "value", + [ + "PPPPPPPPPPPP", + "PDTHMS", + "P0DT999H999M999S", + "P1DT0H0M0.0000000000000S", + "P1DT0H0M0.S", + "P", + "-P", + ], + ) + def test_raises_for_invalid_isolike_str_value(self, value): + msg = f"Invalid ISO 8601 Duration format - {value}" + + with pytest.raises(ValueError, match=msg): + Timedelta(value) + + def test_raises_if_str_value_contains_no_units(self): + msg = "no units specified" + + with pytest.raises(ValueError, match=msg): + Timedelta("3.1415") + + @pytest.mark.parametrize( + ("value", "msg"), + ( + ("us", "unit abbreviation w/o a number"), + ("seconds", "unit abbreviation w/o a number"), + ("garbage", "unit abbreviation w/o a number"), + # GH39710 Timedelta input string with only symbols and no digits raises + ("+", "symbols w/o a number"), + ("-", "symbols w/o a number"), + ), + ) + def test_raises_if_str_value_has_no_numeric_component(self, value: str, msg: str): + with pytest.raises(ValueError, match=msg): + Timedelta(value) + + @pytest.mark.parametrize( + "value", + ( + "--", + # Currently invalid as it has a - on the hh:mm:dd part + # (only allowed on the days) + "-10 days -1 h 1.5m 1s 3us", + "10 days -1 h 1.5m 1s 3us", + ), + ) + def test_raises_for_str_value_with_second_minus_sign(self, value: str): + msg = "only leading negative signs are allowed" + with pytest.raises(ValueError, match=msg): + Timedelta(value) + + @pytest.mark.parametrize( + ("unit", "func"), + product(("Y", "y", "M"), (Timedelta, to_timedelta)), + ) + def test_warns_or_raises_if_ambiguous_unit_passed(self, unit: str, func): + msg = "Units 'M', 'Y', and 'y' are no longer supported" + + with pytest.raises(ValueError, match=msg): + func(1, unit) + + def test_reso_invariant_if_td_created_via_public_api(self, td_max_per_unit: tuple): + unit, max_value = td_max_per_unit + td_small = Timedelta(1, unit) + td_max = Timedelta(max_value, unit) + msg = "attribute '_reso' of 'pandas._libs.tslibs.timedeltas._Timedelta'" + + assert getattr(td_small, "_reso") == 10 + assert getattr(td_max, "_reso") == 10 + with pytest.raises(AttributeError, match=msg): + setattr(td_max, "_reso", 9) + + def test_reso_configurable_via_private_api(self, non_nano_reso: int): + td = Timedelta._from_value_and_reso(np.int64(1), non_nano_reso) + assert td.value == 1 + assert getattr(td, "_reso") == non_nano_reso + + +class TestOverflow: + def test_value_unit_too_big(self, td_max_per_unit: tuple, td_overflow_msg: str): + unit, value = td_max_per_unit + + assert Timedelta(value, unit) <= Timedelta.max + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + Timedelta(value + 1, unit) + + def test_value_unit_too_small(self, td_min_per_unit: tuple, td_overflow_msg: str): + unit, value = td_min_per_unit + too_small = value - 1 + + assert Timedelta(value, unit) >= Timedelta.min + if unit == "n": + result = Timedelta(too_small, unit) + assert result is NaT # type: ignore[comparison-overlap] + too_small -= 1 + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + Timedelta(too_small, unit) + + def test_kwarg_too_big(self, td_max_per_kwarg: tuple, td_overflow_msg: str): + kwarg, value = td_max_per_kwarg + + assert Timedelta(**{kwarg: value}) <= Timedelta.max + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + assert Timedelta(**{kwarg: value + 1}) + + def test_kwarg_too_small(self, td_min_per_kwarg: tuple, td_overflow_msg: str): + kwarg, value = td_min_per_kwarg + too_small = value - 1 + + assert Timedelta(**{kwarg: value}) >= Timedelta.min + if kwarg == "nanoseconds": + result = Timedelta(**{kwarg: too_small}) + assert result is NaT # type: ignore[comparison-overlap] + too_small -= 1 + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + Timedelta(**{kwarg: too_small}) + + def test_from_timedelta_too_big(self, timedelta_kwarg: str, td_overflow_msg: str): + max_val = TD_MAX_PER_KWARG[timedelta_kwarg] + + assert Timedelta(timedelta(**{timedelta_kwarg: max_val})) <= Timedelta.max + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + Timedelta(timedelta(**{timedelta_kwarg: max_val + 1})) + + def test_from_timedelta_too_small(self, timedelta_kwarg: str, td_overflow_msg: str): + min_val = TD_MIN_PER_KWARG[timedelta_kwarg] + + assert Timedelta(timedelta(**{timedelta_kwarg: min_val})) >= Timedelta.min + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + Timedelta(timedelta(**{timedelta_kwarg: min_val - 1})) + + def test_from_td64_too_big(self, td64_max_per_unit: tuple, td_overflow_msg: str): + unit, value = td64_max_per_unit + + assert Timedelta(np.timedelta64(value, unit)) <= Timedelta.max + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + Timedelta(np.timedelta64(value + 1, unit)) + + def test_from_td64_too_small(self, td64_min_per_unit: tuple, td_overflow_msg: str): + unit, value = td64_min_per_unit + + assert Timedelta(np.timedelta64(value, unit)) >= Timedelta.min + with pytest.raises(OutOfBoundsTimedelta, match=td_overflow_msg): + Timedelta(np.timedelta64(value - 1, unit)) + class TestNonNano: - @pytest.fixture(params=[7, 8, 9]) - def unit(self, request): - # 7, 8, 9 correspond to second, millisecond, and microsecond, respectively - return request.param - - @pytest.fixture - def val(self, unit): - # microsecond that would be just out of bounds for nano - us = 9223372800000000 - if unit == 9: - value = us - elif unit == 8: - value = us // 1000 - else: - value = us // 1_000_000 - return value - - @pytest.fixture - def td(self, unit, val): - return Timedelta._from_value_and_reso(val, unit) - - def test_from_value_and_reso(self, unit, val): - # Just checking that the fixture is giving us what we asked for - td = Timedelta._from_value_and_reso(val, unit) - assert td.value == val - assert td._reso == unit - assert td.days == 106752 - - def test_unary_non_nano(self, td, unit): - assert abs(td)._reso == unit - assert (-td)._reso == unit - assert (+td)._reso == unit - - def test_sub_preserves_reso(self, td, unit): - res = td - td - expected = Timedelta._from_value_and_reso(0, unit) + """ + WIP. + """ + + def test_unary_non_nano(self, non_nano_td, non_nano_reso): + assert abs(non_nano_td)._reso == non_nano_reso + assert (-non_nano_td)._reso == non_nano_reso + assert (+non_nano_td)._reso == non_nano_reso + + def test_sub_preserves_reso(self, non_nano_td, non_nano_reso): + res = non_nano_td - non_nano_td + expected = Timedelta._from_value_and_reso(0, non_nano_reso) assert res == expected - assert res._reso == unit + assert res._reso == non_nano_reso - def test_mul_preserves_reso(self, td, unit): - # The td fixture should always be far from the implementation + def test_mul_preserves_reso(self, non_nano_td, non_nano_reso): + # The non_nano_td fixture should always be far from the implementation # bound, so doubling does not risk overflow. - res = td * 2 - assert res.value == td.value * 2 - assert res._reso == unit + res = non_nano_td * 2 + assert res.value == non_nano_td.value * 2 + assert res._reso == non_nano_reso - def test_cmp_cross_reso(self, td): + def test_cmp_cross_reso(self, non_nano_td): # numpy gets this wrong because of silent overflow - other = Timedelta(days=106751, unit="ns") - assert other < td - assert td > other - assert not other == td - assert td != other - - def test_to_pytimedelta(self, td): - res = td.to_pytimedelta() + assert Timedelta.max < non_nano_td + assert non_nano_td > Timedelta.max + assert not Timedelta.max == non_nano_td + assert non_nano_td != Timedelta.max + + def test_to_pytimedelta(self, non_nano_td): + res = non_nano_td.to_pytimedelta() expected = timedelta(days=106752) assert type(res) is timedelta assert res == expected - def test_to_timedelta64(self, td, unit): - for res in [td.to_timedelta64(), td.to_numpy(), td.asm8]: + @pytest.mark.parametrize( + "converter", + ( + operator.methodcaller("to_timedelta64"), + operator.methodcaller("to_numpy"), + operator.attrgetter("asm8"), + ), + ) + def test_to_timedelta64(self, non_nano_td, converter): + td64 = converter(non_nano_td) + reso_dtype = {7: "m8[s]", 8: "m8[ms]", 9: "m8[us]"} - assert isinstance(res, np.timedelta64) - assert res.view("i8") == td.value - if unit == 7: - assert res.dtype == "m8[s]" - elif unit == 8: - assert res.dtype == "m8[ms]" - elif unit == 9: - assert res.dtype == "m8[us]" + assert isinstance(td64, np.timedelta64) + assert td64.view("i8") == non_nano_td.value + assert td64.dtype == reso_dtype[non_nano_td._reso] -class TestTimedeltaUnaryOps: +class TestUnaryOps: def test_invert(self): td = Timedelta(10, unit="d") @@ -130,294 +668,144 @@ def test_unary_ops(self): assert abs(-td) == Timedelta("10d") -class TestTimedeltas: - @pytest.mark.parametrize( - "unit, value, expected", - [ - ("us", 9.999, 9999), - ("ms", 9.999999, 9999999), - ("s", 9.999999999, 9999999999), - ], - ) - def test_rounding_on_int_unit_construction(self, unit, value, expected): - # GH 12690 - result = Timedelta(value, unit=unit) - assert result.value == expected - result = Timedelta(str(value) + unit) - assert result.value == expected - - def test_total_seconds_scalar(self): - # see gh-10939 - rng = Timedelta("1 days, 10:11:12.100123456") - expt = 1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456.0 / 1e9 - tm.assert_almost_equal(rng.total_seconds(), expt) +class TestAttributes: + def test_min_max_correspond_to_int64_boundaries(self): + """GH#12727""" + assert Timedelta.min.value == iNaT + 1 + assert Timedelta.max.value == lib.i8max - rng = Timedelta(np.nan) - assert np.isnan(rng.total_seconds()) + def test_fields(self): + """GH#10050: compat with datetime.timedelta; GH#31354""" + fields = ("days", "seconds", "microseconds", "nanoseconds") + td = Timedelta("1 days, 10:11:12") - def test_conversion(self): + assert td.days == 1 + assert td.seconds == 10 * 3600 + 11 * 60 + 12 + assert td.microseconds == 0 + assert td.nanoseconds == 0 + assert all(isinstance(v, int) for v in operator.attrgetter(*fields)(td)) + assert td.days * 24 * 3600 * int(1e9) + td.seconds * int(1e9) == td.value - for td in [Timedelta(10, unit="d"), Timedelta("1 days, 10:11:12.012345")]: - pydt = td.to_pytimedelta() - assert td == Timedelta(pydt) - assert td == pydt - assert isinstance(pydt, timedelta) and not isinstance(pydt, Timedelta) + @pytest.mark.parametrize("field", ("hours", "minutes", "milliseconds")) + def test_fields_not_exposed(self, field: str): + msg = f"'Timedelta' object has no attribute '{field}'" - assert td == np.timedelta64(td.value, "ns") - td64 = td.to_timedelta64() + with pytest.raises(AttributeError, match=msg): + getattr(Timedelta.max, field) - assert td64 == np.timedelta64(td.value, "ns") - assert td == td64 + @pytest.mark.parametrize( + "td, expected_values", + ( + (Timedelta("-1 us"), (-1, 23, 59, 59, 999, 999, 0)), + (Timedelta("-1 days 1 us"), (-2, 23, 59, 59, 999, 999, 0)), + ), + ) + def test_components(self, td, expected_values: tuple[int]): + values = operator.attrgetter(*TD_COMPONENTS)(td.components) - assert isinstance(td64, np.timedelta64) + assert values == expected_values + assert all(isinstance(v, int) for v in values) - # this is NOT equal and cannot be roundtripped (because of the nanos) - td = Timedelta("1 days, 10:11:12.012345678") - assert td != td.to_pytimedelta() + def test_resolution_string(self): + assert Timedelta(days=1).resolution_string == "D" + assert Timedelta(hours=1).resolution_string == "H" + assert Timedelta(minutes=1).resolution_string == "T" + assert Timedelta(seconds=1).resolution_string == "S" + assert Timedelta(milliseconds=1).resolution_string == "L" + assert Timedelta(microseconds=1).resolution_string == "U" + assert Timedelta(nanoseconds=1).resolution_string == "N" - def test_fields(self): - def check(value): - # that we are int - assert isinstance(value, int) - - # compat to datetime.timedelta - rng = to_timedelta("1 days, 10:11:12") - assert rng.days == 1 - assert rng.seconds == 10 * 3600 + 11 * 60 + 12 - assert rng.microseconds == 0 - assert rng.nanoseconds == 0 - - msg = "'Timedelta' object has no attribute '{}'" - with pytest.raises(AttributeError, match=msg.format("hours")): - rng.hours - with pytest.raises(AttributeError, match=msg.format("minutes")): - rng.minutes - with pytest.raises(AttributeError, match=msg.format("milliseconds")): - rng.milliseconds - - # GH 10050 - check(rng.days) - check(rng.seconds) - check(rng.microseconds) - check(rng.nanoseconds) - - td = Timedelta("-1 days, 10:11:12") - assert abs(td) == Timedelta("13:48:48") - assert str(td) == "-1 days +10:11:12" - assert -td == Timedelta("0 days 13:48:48") - assert -Timedelta("-1 days, 10:11:12").value == 49728000000000 - assert Timedelta("-1 days, 10:11:12").value == -49728000000000 - - rng = to_timedelta("-1 days, 10:11:12.100123456") - assert rng.days == -1 - assert rng.seconds == 10 * 3600 + 11 * 60 + 12 - assert rng.microseconds == 100 * 1000 + 123 - assert rng.nanoseconds == 456 - msg = "'Timedelta' object has no attribute '{}'" - with pytest.raises(AttributeError, match=msg.format("hours")): - rng.hours - with pytest.raises(AttributeError, match=msg.format("minutes")): - rng.minutes - with pytest.raises(AttributeError, match=msg.format("milliseconds")): - rng.milliseconds - - # components - tup = to_timedelta(-1, "us").components - assert tup.days == -1 - assert tup.hours == 23 - assert tup.minutes == 59 - assert tup.seconds == 59 - assert tup.milliseconds == 999 - assert tup.microseconds == 999 - assert tup.nanoseconds == 0 - - # GH 10050 - check(tup.days) - check(tup.hours) - check(tup.minutes) - check(tup.seconds) - check(tup.milliseconds) - check(tup.microseconds) - check(tup.nanoseconds) - - tup = Timedelta("-1 days 1 us").components - assert tup.days == -2 - assert tup.hours == 23 - assert tup.minutes == 59 - assert tup.seconds == 59 - assert tup.milliseconds == 999 - assert tup.microseconds == 999 - assert tup.nanoseconds == 0 + @pytest.mark.parametrize("td_units", TD_UNITS) + def test_resolution_is_class_attr(self, td_units: str): + """GH#21344; mirrors datetime.timedelta""" + td = Timedelta(1, td_units[0]) - def test_iso_conversion(self): - # GH #21877 - expected = Timedelta(1, unit="s") - assert to_timedelta("P0DT0H0M1S") == expected + assert td.resolution is Timedelta.resolution + assert Timedelta.resolution == Timedelta(1, "ns") - def test_nat_converters(self): - result = to_timedelta("nat").to_numpy() - assert result.dtype.kind == "M" - assert result.astype("int64") == iNaT + def test_asm8_is_alias_for_to_timedelta64(self): + result = Timedelta.max.asm8 - result = to_timedelta("nan").to_numpy() - assert result.dtype.kind == "M" - assert result.astype("int64") == iNaT + assert result == Timedelta.max.to_timedelta64() + assert isinstance(result, np.timedelta64) @pytest.mark.parametrize( - "unit, np_unit", - [(value, "W") for value in ["W", "w"]] - + [(value, "D") for value in ["D", "d", "days", "day", "Days", "Day"]] - + [ - (value, "m") - for value in [ - "m", - "minute", - "min", - "minutes", - "t", - "Minute", - "Min", - "Minutes", - "T", - ] - ] - + [ - (value, "s") - for value in [ - "s", - "seconds", - "sec", - "second", - "S", - "Seconds", - "Sec", - "Second", - ] - ] - + [ - (value, "ms") - for value in [ - "ms", - "milliseconds", - "millisecond", - "milli", - "millis", - "l", - "MS", - "Milliseconds", - "Millisecond", - "Milli", - "Millis", - "L", - ] - ] - + [ - (value, "us") - for value in [ - "us", - "microseconds", - "microsecond", - "micro", - "micros", - "u", - "US", - "Microseconds", - "Microsecond", - "Micro", - "Micros", - "U", - ] - ] - + [ - (value, "ns") - for value in [ - "ns", - "nanoseconds", - "nanosecond", - "nano", - "nanos", - "n", - "NS", - "Nanoseconds", - "Nanosecond", - "Nano", - "Nanos", - "N", - ] - ], + "attr, expected_value", + (("delta", 1), ("freq", None), ("is_populated", False)), ) - @pytest.mark.parametrize("wrapper", [np.array, list, pd.Index]) - def test_unit_parser(self, unit, np_unit, wrapper): - # validate all units, GH 6855, GH 21762 - # array-likes - expected = TimedeltaIndex( - [np.timedelta64(i, np_unit) for i in np.arange(5).tolist()] - ) - result = to_timedelta(wrapper(range(5)), unit=unit) - tm.assert_index_equal(result, expected) - result = TimedeltaIndex(wrapper(range(5)), unit=unit) - tm.assert_index_equal(result, expected) - - str_repr = [f"{x}{unit}" for x in np.arange(5)] - result = to_timedelta(wrapper(str_repr)) - tm.assert_index_equal(result, expected) - result = to_timedelta(wrapper(str_repr)) - tm.assert_index_equal(result, expected) + def test_deprecated_attrs(self, attr: str, expected_value): + """GH#46430, GH#46476""" + td = Timedelta(1, "ns") + msg = f"Timedelta.{attr}" + with tm.assert_produces_warning(FutureWarning, match=msg): + getattr(td, attr) == expected_value - # scalar - expected = Timedelta(np.timedelta64(2, np_unit).astype("timedelta64[ns]")) - result = to_timedelta(2, unit=unit) - assert result == expected - result = Timedelta(2, unit=unit) - assert result == expected + with pytest.raises(AttributeError, match="is not writable"): + setattr(td, attr, "coconut") - result = to_timedelta(f"2{unit}") - assert result == expected - result = Timedelta(f"2{unit}") - assert result == expected - @pytest.mark.parametrize("unit", ["Y", "y", "M"]) - def test_unit_m_y_raises(self, unit): - msg = "Units 'M', 'Y', and 'y' are no longer supported" - with pytest.raises(ValueError, match=msg): - Timedelta(10, unit) +class TestMethods: + @pytest.mark.parametrize( + "value, expected", + ( + ( + "1 days, 10:11:12.123456789", + 1 * 86400 + 10 * 3600 + 11 * 60 + 12.123456, + ), + ("30S", 30.0), + ("0", 0.0), + ("-2S", -2.0), + ("5.324S", 5.324), + ), + ) + def test_total_seconds(self, value: str, expected: float): + # see gh-10939 + td = Timedelta(value) + assert td.total_seconds() == expected - with pytest.raises(ValueError, match=msg): - to_timedelta(10, unit) + def test_to_pytimedelta(self): + td = Timedelta("1 days, 10:11:12.012345") + py_td = td.to_pytimedelta() - with pytest.raises(ValueError, match=msg): - to_timedelta([1, 2], unit) + assert py_td == td + assert Timedelta(py_td) == td + assert isinstance(py_td, timedelta) + assert not isinstance(py_td, Timedelta) - def test_numeric_conversions(self): - assert Timedelta(0) == np.timedelta64(0, "ns") - assert Timedelta(10) == np.timedelta64(10, "ns") - assert Timedelta(10, unit="ns") == np.timedelta64(10, "ns") + @pytest.mark.parametrize( + "td, expected", + ( + (Timedelta(500, "ns"), timedelta(0)), + (Timedelta(501, "ns"), timedelta(microseconds=1)), + ), + ) + def test_to_pytimedelta_rounds_ns(self, td: Timedelta, expected: timedelta): + assert td.to_pytimedelta() == expected - assert Timedelta(10, unit="us") == np.timedelta64(10, "us") - assert Timedelta(10, unit="ms") == np.timedelta64(10, "ms") - assert Timedelta(10, unit="s") == np.timedelta64(10, "s") - assert Timedelta(10, unit="d") == np.timedelta64(10, "D") + def test_to_timedelta64(self): + td64 = Timedelta.max.to_timedelta64() - def test_timedelta_conversions(self): - assert Timedelta(timedelta(seconds=1)) == np.timedelta64(1, "s").astype( - "m8[ns]" - ) - assert Timedelta(timedelta(microseconds=1)) == np.timedelta64(1, "us").astype( - "m8[ns]" - ) - assert Timedelta(timedelta(days=1)) == np.timedelta64(1, "D").astype("m8[ns]") + assert td64 == Timedelta.max + assert Timedelta(td64) == Timedelta.max + assert isinstance(td64, np.timedelta64) - def test_to_numpy_alias(self): - # GH 24653: alias .to_numpy() for scalars - td = Timedelta("10m7s") - assert td.to_timedelta64() == td.to_numpy() + def test_to_numpy(self): + """GH#24653: alias .to_numpy() for scalars""" + assert Timedelta.max.to_timedelta64() == Timedelta.max.to_numpy() + @pytest.mark.parametrize( + "args, kwargs", + ( + (("m8[ns]",), {}), + ((), {"copy": True}), + (("m8[ns]",), {"copy": True}), + ), + ) + def test_to_numpy_raises_if_args_passed(self, args, kwargs): # GH#44460 msg = "dtype and copy arguments are ignored" with pytest.raises(ValueError, match=msg): - td.to_numpy("m8[s]") - with pytest.raises(ValueError, match=msg): - td.to_numpy(copy=True) + Timedelta.max.to_numpy(*args, **kwargs) @pytest.mark.parametrize( "freq,s1,s2", @@ -529,240 +917,162 @@ def test_round_sanity(self, val, method): assert np.abs((res - td).value) < nanos assert res.value % nanos == 0 - def test_contains(self): - # Checking for any NaT-like objects - # GH 13603 - td = to_timedelta(range(5), unit="d") + offsets.Hour(1) - for v in [NaT, None, float("nan"), np.nan]: - assert not (v in td) - - td = to_timedelta([NaT]) - for v in [NaT, None, float("nan"), np.nan]: - assert v in td - - def test_identity(self): - - td = Timedelta(10, unit="d") - assert isinstance(td, Timedelta) - assert isinstance(td, timedelta) - - def test_short_format_converters(self): - def conv(v): - return v.astype("m8[ns]") - - assert Timedelta("10") == np.timedelta64(10, "ns") - assert Timedelta("10ns") == np.timedelta64(10, "ns") - assert Timedelta("100") == np.timedelta64(100, "ns") - assert Timedelta("100ns") == np.timedelta64(100, "ns") - - assert Timedelta("1000") == np.timedelta64(1000, "ns") - assert Timedelta("1000ns") == np.timedelta64(1000, "ns") - assert Timedelta("1000NS") == np.timedelta64(1000, "ns") - - assert Timedelta("10us") == np.timedelta64(10000, "ns") - assert Timedelta("100us") == np.timedelta64(100000, "ns") - assert Timedelta("1000us") == np.timedelta64(1000000, "ns") - assert Timedelta("1000Us") == np.timedelta64(1000000, "ns") - assert Timedelta("1000uS") == np.timedelta64(1000000, "ns") - - assert Timedelta("1ms") == np.timedelta64(1000000, "ns") - assert Timedelta("10ms") == np.timedelta64(10000000, "ns") - assert Timedelta("100ms") == np.timedelta64(100000000, "ns") - assert Timedelta("1000ms") == np.timedelta64(1000000000, "ns") - - assert Timedelta("-1s") == -np.timedelta64(1000000000, "ns") - assert Timedelta("1s") == np.timedelta64(1000000000, "ns") - assert Timedelta("10s") == np.timedelta64(10000000000, "ns") - assert Timedelta("100s") == np.timedelta64(100000000000, "ns") - assert Timedelta("1000s") == np.timedelta64(1000000000000, "ns") - - assert Timedelta("1d") == conv(np.timedelta64(1, "D")) - assert Timedelta("-1d") == -conv(np.timedelta64(1, "D")) - assert Timedelta("1D") == conv(np.timedelta64(1, "D")) - assert Timedelta("10D") == conv(np.timedelta64(10, "D")) - assert Timedelta("100D") == conv(np.timedelta64(100, "D")) - assert Timedelta("1000D") == conv(np.timedelta64(1000, "D")) - assert Timedelta("10000D") == conv(np.timedelta64(10000, "D")) - - # space - assert Timedelta(" 10000D ") == conv(np.timedelta64(10000, "D")) - assert Timedelta(" - 10000D ") == -conv(np.timedelta64(10000, "D")) - - # invalid - msg = "invalid unit abbreviation" - with pytest.raises(ValueError, match=msg): - Timedelta("1foo") - msg = "unit abbreviation w/o a number" - with pytest.raises(ValueError, match=msg): - Timedelta("foo") - - def test_full_format_converters(self): - def conv(v): - return v.astype("m8[ns]") - - d1 = np.timedelta64(1, "D") - - assert Timedelta("1days") == conv(d1) - assert Timedelta("1days,") == conv(d1) - assert Timedelta("- 1days,") == -conv(d1) - - assert Timedelta("00:00:01") == conv(np.timedelta64(1, "s")) - assert Timedelta("06:00:01") == conv(np.timedelta64(6 * 3600 + 1, "s")) - assert Timedelta("06:00:01.0") == conv(np.timedelta64(6 * 3600 + 1, "s")) - assert Timedelta("06:00:01.01") == conv( - np.timedelta64(1000 * (6 * 3600 + 1) + 10, "ms") - ) - - assert Timedelta("- 1days, 00:00:01") == conv(-d1 + np.timedelta64(1, "s")) - assert Timedelta("1days, 06:00:01") == conv( - d1 + np.timedelta64(6 * 3600 + 1, "s") - ) - assert Timedelta("1days, 06:00:01.01") == conv( - d1 + np.timedelta64(1000 * (6 * 3600 + 1) + 10, "ms") - ) - - # invalid - msg = "have leftover units" - with pytest.raises(ValueError, match=msg): - Timedelta("- 1days, 00") - def test_pickle(self): + assert Timedelta.max == tm.round_trip_pickle(Timedelta.max) - v = Timedelta("1 days 10:11:12.0123456") - v_p = tm.round_trip_pickle(v) - assert v == v_p + @pytest.mark.parametrize("num_days", range(20)) + def test_hash_equals_timedelta_hash(self, num_days: int): + """GH#11129""" + kwargs = {"days": num_days, "seconds": 1} + td = Timedelta(**kwargs) # type: ignore[arg-type] - def test_timedelta_hash_equality(self): - # GH 11129 - v = Timedelta(1, "D") - td = timedelta(days=1) - assert hash(v) == hash(td) + assert hash(td) == hash(timedelta(**kwargs)) - d = {td: 2} - assert d[v] == 2 + @pytest.mark.parametrize("ns", (1, 500)) + def test_hash_differs_from_timedelta_hash_if_ns_lost(self, ns: int): + td = Timedelta(ns, "ns") + assert hash(td) != hash(td.to_pytimedelta()) - tds = [Timedelta(seconds=1) + Timedelta(days=n) for n in range(20)] - assert all(hash(td) == hash(td.to_pytimedelta()) for td in tds) + @pytest.mark.parametrize("td_kwarg", TD_KWARGS) + def test_only_zero_value_falsy(self, td_kwarg): + """GH#21484""" + assert bool(Timedelta(**{td_kwarg: 0})) is False + assert bool(Timedelta(**{td_kwarg: 1})) is True + assert bool(Timedelta(**{td_kwarg: -1})) is True - # python timedeltas drop ns resolution - ns_td = Timedelta(1, "ns") - assert hash(ns_td) != hash(ns_td.to_pytimedelta()) + @pytest.mark.parametrize( + "td, expected_iso", + [ + ( + Timedelta(days=6, milliseconds=123, nanoseconds=45), + "P6DT0H0M0.123000045S", + ), + (Timedelta(days=4, hours=12, minutes=30, seconds=5), "P4DT12H30M5S"), + (Timedelta(nanoseconds=123), "P0DT0H0M0.000000123S"), + # trim nano + (Timedelta(microseconds=10), "P0DT0H0M0.00001S"), + # trim micro + (Timedelta(milliseconds=1), "P0DT0H0M0.001S"), + # don't strip every 0 + (Timedelta(minutes=1), "P0DT0H1M0S"), + ], + ) + def test_isoformat(self, td, expected_iso): + assert td.isoformat() == expected_iso - def test_implementation_limits(self): - min_td = Timedelta(Timedelta.min) - max_td = Timedelta(Timedelta.max) + @pytest.mark.parametrize( + ("value, expected"), + ( + ("1 W", "7 days 00:00:00"), + ("-1 W", "-7 days +00:00:00"), + ("1 D", "1 days 00:00:00"), + ("-1 D", "-1 days +00:00:00"), + ("1 H", "0 days 01:00:00"), + ("-1 H", "-1 days +23:00:00"), + ("1 m", "0 days 00:01:00"), + ("-1 m", "-1 days +23:59:00"), + ("1 m", "0 days 00:01:00"), + ("-1 m", "-1 days +23:59:00"), + ("1 s", "0 days 00:00:01"), + ("-1 s", "-1 days +23:59:59"), + ("1 ms", "0 days 00:00:00.001000"), + ("-1 ms", "-1 days +23:59:59.999000"), + ("1 us", "0 days 00:00:00.000001"), + ("-1 us", "-1 days +23:59:59.999999"), + ("1 ns", "0 days 00:00:00.000000001"), + ("-1 ns", "-1 days +23:59:59.999999999"), + ), + ) + def test_str_and_repr(self, value: str, expected: str): + expected_repr = f"Timedelta('{expected}')" + td = Timedelta(value) - # GH 12727 - # timedelta limits correspond to int64 boundaries - assert min_td.value == iNaT + 1 - assert max_td.value == lib.i8max + assert str(td) == expected + assert repr(td) == expected_repr + assert Timedelta(expected) == td - # Beyond lower limit, a NAT before the Overflow - assert (min_td - Timedelta(1, "ns")) is NaT - msg = re.escape( - "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" - ) - with pytest.raises(OutOfBoundsTimedelta, match=msg): - min_td - Timedelta(2, "ns") +class TestToTimedelta: + """Move elsewhere""" - with pytest.raises(OutOfBoundsTimedelta, match=msg): - max_td + Timedelta(1, "ns") + def test_iso_conversion(self): + # GH #21877 + expected = Timedelta(1, unit="s") + assert to_timedelta("P0DT0H0M1S") == expected - # Same tests using the internal nanosecond values - td = Timedelta(min_td.value - 1, "ns") - assert td is NaT + def test_nat_converters(self): + result = to_timedelta("nat").to_numpy() + assert result.dtype.kind == "M" + assert result.astype("int64") == iNaT - with pytest.raises(OutOfBoundsTimedelta, match=msg): - Timedelta(min_td.value - 2, "ns") + result = to_timedelta("nan").to_numpy() + assert result.dtype.kind == "M" + assert result.astype("int64") == iNaT - with pytest.raises(OutOfBoundsTimedelta, match=msg): - Timedelta(max_td.value + 1, "ns") + def test_contains(self): + # Checking for any NaT-like objects + # GH 13603 + td = to_timedelta(range(5), unit="d") + offsets.Hour(1) + for v in [NaT, None, float("nan"), np.nan]: + assert not (v in td) - def test_total_seconds_precision(self): - # GH 19458 - assert Timedelta("30S").total_seconds() == 30.0 - assert Timedelta("0").total_seconds() == 0.0 - assert Timedelta("-2S").total_seconds() == -2.0 - assert Timedelta("5.324S").total_seconds() == 5.324 - assert (Timedelta("30S").total_seconds() - 30.0) < 1e-20 - assert (30.0 - Timedelta("30S").total_seconds()) < 1e-20 + td = to_timedelta([NaT]) + for v in [NaT, None, float("nan"), np.nan]: + assert v in td - def test_resolution_string(self): - assert Timedelta(days=1).resolution_string == "D" - assert Timedelta(days=1, hours=6).resolution_string == "H" - assert Timedelta(days=1, minutes=6).resolution_string == "T" - assert Timedelta(days=1, seconds=6).resolution_string == "S" - assert Timedelta(days=1, milliseconds=6).resolution_string == "L" - assert Timedelta(days=1, microseconds=6).resolution_string == "U" - assert Timedelta(days=1, nanoseconds=6).resolution_string == "N" + # invalid + msg = "have leftover units" + with pytest.raises(ValueError, match=msg): + Timedelta("- 1days, 00") - def test_resolution_deprecated(self): - # GH#21344 - td = Timedelta(days=4, hours=3) - result = td.resolution - assert result == Timedelta(nanoseconds=1) + @pytest.mark.parametrize("unit, np_unit", TD_UNITS_TD64_UNITS.items()) + @pytest.mark.parametrize("wrapper", [np.array, list, pd.Index]) + def test_unit_parser(self, unit, np_unit, wrapper): + # validate all units, GH 6855, GH 21762 + # array-likes + expected = TimedeltaIndex( + [np.timedelta64(i, np_unit) for i in np.arange(5).tolist()] + ) + result = to_timedelta(wrapper(range(5)), unit=unit) + tm.assert_index_equal(result, expected) + result = TimedeltaIndex(wrapper(range(5)), unit=unit) + tm.assert_index_equal(result, expected) - # Check that the attribute is available on the class, mirroring - # the stdlib timedelta behavior - result = Timedelta.resolution - assert result == Timedelta(nanoseconds=1) + str_repr = [f"{x}{unit}" for x in np.arange(5)] + result = to_timedelta(wrapper(str_repr)) + tm.assert_index_equal(result, expected) + result = to_timedelta(wrapper(str_repr)) + tm.assert_index_equal(result, expected) @pytest.mark.parametrize( - "value, expected", + "constructed_td, conversion", [ - (Timedelta("10S"), True), - (Timedelta("-10S"), True), - (Timedelta(10, unit="ns"), True), - (Timedelta(0, unit="ns"), False), - (Timedelta(-10, unit="ns"), True), - (Timedelta(None), True), - (NaT, True), + (Timedelta(nanoseconds=100), "100ns"), + ( + Timedelta( + days=1, + hours=1, + minutes=1, + weeks=1, + seconds=1, + milliseconds=1, + microseconds=1, + nanoseconds=1, + ), + 694861001001001, + ), + (Timedelta(microseconds=1) + Timedelta(nanoseconds=1), "1us1ns"), + (Timedelta(microseconds=1) - Timedelta(nanoseconds=1), "999ns"), + (Timedelta(microseconds=1) + 5 * Timedelta(nanoseconds=-2), "990ns"), ], ) -def test_truthiness(value, expected): - # https://github.com/pandas-dev/pandas/issues/21484 - assert bool(value) is expected - - -def test_timedelta_attribute_precision(): - # GH 31354 - td = Timedelta(1552211999999999872, unit="ns") - result = td.days * 86400 - result += td.seconds - result *= 1000000 - result += td.microseconds - result *= 1000 - result += td.nanoseconds - expected = td.value - assert result == expected - - -def test_freq_deprecated(): - # GH#46430 - td = Timedelta(123456546, unit="ns") - with tm.assert_produces_warning(FutureWarning, match="Timedelta.freq"): - freq = td.freq - - assert freq is None - - with pytest.raises(AttributeError, match="is not writable"): - td.freq = offsets.Day() - - -def test_is_populated_deprecated(): - # GH#46430 - td = Timedelta(123456546, unit="ns") - with tm.assert_produces_warning(FutureWarning, match="Timedelta.is_populated"): - td.is_populated - - with pytest.raises(AttributeError, match="is not writable"): - td.is_populated = 1 +def test_td_constructor_on_nanoseconds(constructed_td, conversion): + # GH#9273 + assert constructed_td == Timedelta(conversion) -def test_delta_deprecated(): - # GH#46476 - td = Timedelta(123456546, unit="ns") - with tm.assert_produces_warning(FutureWarning, match="Timedelta.delta is"): - td.delta +def test_nan_total_seconds(): + # put elsewhere? a test of NaT, not Timedelta, behavior + rng = Timedelta(np.nan) + assert np.isnan(rng.total_seconds()) From c9e209e525e7c30ff44b65f5f2322e00d654039b Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Tue, 10 May 2022 13:51:23 -0700 Subject: [PATCH 14/18] consolidate Timedelta creation --- pandas/_libs/tslibs/timedeltas.pyx | 175 +++++++++--------- .../tests/scalar/timedelta/test_timedelta.py | 12 +- 2 files changed, 88 insertions(+), 99 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 16d77203d1503..31cf3d736870e 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -32,6 +32,7 @@ import_datetime() cimport pandas._libs.tslibs.util as util from pandas._libs cimport ops +from pandas._libs.missing cimport C_NA from pandas._libs.tslibs.base cimport ABCTimestamp from pandas._libs.tslibs.conversion cimport ( cast_from_unit, @@ -308,21 +309,6 @@ cdef convert_to_timedelta64(object ts, str unit): return ts.astype("timedelta64[ns]") -cpdef to_timedelta64(object value, str unit): - """ - Wrapper around convert_to_timedelta64() that does overflow checks. - TODO: also construct non-nano - TODO: do all overflow-unsafe operations here - TODO: constrain unit to a more specific type - """ - with cython.overflowcheck(True): - try: - return convert_to_timedelta64(value, unit) - except OverflowError as ex: - msg = f"{value} outside allowed range [{TIMEDELTA_MIN_NS}ns, {TIMEDELTA_MAX_NS}ns]" - raise OutOfBoundsTimedelta(msg) from ex - - @cython.boundscheck(False) @cython.wraparound(False) def array_to_timedelta64( @@ -682,8 +668,7 @@ cdef bint _validate_ops_compat(other): def _op_unary_method(func, name): def f(self): - new_value = func(self.value) - return _timedelta_from_value_and_reso(new_value, self._reso) + return create_timedelta(func(self.value), "ignore", self._reso) f.__name__ = name return f @@ -700,20 +685,6 @@ cpdef int64_t calc_int_int(object op, object a, object b) except? -1: raise OutOfBoundsTimedelta(msg) from ex -cpdef int64_t calc_int_float(object op, object a, object b) except? -1: - """ - Calculate op(int, double), raising if any of the following aren't safe conversions: - - a to int64_t - - b to double - - result to int64_t - """ - try: - return ops.calc_int_float(op, a, b) - except OverflowError as ex: - msg = f"outside allowed range [{TIMEDELTA_MIN_NS}ns, {TIMEDELTA_MAX_NS}ns]" - raise OutOfBoundsTimedelta(msg) from ex - - def _binary_op_method_timedeltalike(op, name): # define a binary operation that only works if the other argument is # timedelta like or an array of timedeltalike @@ -758,10 +729,7 @@ def _binary_op_method_timedeltalike(op, name): if self._reso != other._reso: raise NotImplementedError - result = calc_int_int(op, self.value, other.value) - if result == NPY_NAT: - return NaT - return _timedelta_from_value_and_reso(result, self._reso) + return create_timedelta(op(self.value, other.value), "ignore", self._reso) f.__name__ = name return f @@ -892,7 +860,7 @@ cdef _to_py_int_float(v): def _timedelta_unpickle(value, reso): - return _timedelta_from_value_and_reso(value, reso) + return create_timedelta(value, "ignore", reso) cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso): @@ -923,6 +891,44 @@ cdef _timedelta_from_value_and_reso(int64_t value, NPY_DATETIMEUNIT reso): return td_base +@cython.overflowcheck(True) +cdef object create_timedelta(object value, str in_unit, NPY_DATETIMEUNIT out_reso): + """ + Timedelta factory. + + Timedelta.__new__ just does arg validation (at least currently). Also, some internal + functions expect to be able to create non-nano reso Timedeltas, but Timedelta.__new__ + doesn't yet expose that. + + _timedelta_from_value_and_reso does, but only accepts limited args, and doesn't check for overflow. + """ + cdef: + int64_t out_value + + if isinstance(value, _Timedelta): + return value + if value is C_NA: + raise ValueError("Not supported") + + try: + # if unit == "ns", no need to create an m8[ns] just to read the (same) value back + # if unit == "ignore", assume caller wants to invoke an overflow-safe version of + # _timedelta_from_value_and_reso, and that any float rounding is acceptable + if (is_integer_object(value) or is_float_object(value)) and in_unit in ("ns", "ignore"): + if util.is_nan(value): + return NaT + out_value = value + else: + out_value = convert_to_timedelta64(value, in_unit).view(np.int64) + except OverflowError as ex: + msg = f"{value} outside allowed range [{TIMEDELTA_MIN_NS}ns, {TIMEDELTA_MAX_NS}ns]" + raise OutOfBoundsTimedelta(msg) from ex + + if out_value == NPY_NAT: + return NaT + return _timedelta_from_value_and_reso(out_value, out_reso) + + # Similar to Timestamp/datetime, this is a construction requirement for # timedeltas that we need to do object instantiation in python. This will # serve as a C extension type that shadows the Python class, where we do any @@ -1406,7 +1412,7 @@ cdef class _Timedelta(timedelta): @classmethod def _from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso): # exposing as classmethod for testing - return _timedelta_from_value_and_reso(value, reso) + return create_timedelta(value, "ignore", reso) # Python front end to C extension type _Timedelta @@ -1474,37 +1480,27 @@ class Timedelta(_Timedelta): ) def __new__(cls, object value=_no_input, unit=None, **kwargs): - cdef _Timedelta td_base - - if isinstance(value, _Timedelta): - return value - if checknull_with_nat(value): - return NaT - - if unit in {"Y", "y", "M"}: - raise ValueError( - "Units 'M', 'Y', and 'y' are no longer supported, as they do not " - "represent unambiguous timedelta values durations." - ) - if isinstance(value, str) and unit is not None: - raise ValueError("unit must not be specified if the value is a str") - elif value is _no_input and not kwargs: - raise ValueError( - "cannot construct a Timedelta without a value/unit " - "or descriptive keywords (days,seconds....)" - ) - if not kwargs.keys() <= set(cls._allowed_kwargs): - raise ValueError( - "cannot construct a Timedelta from the passed arguments, " - f"allowed keywords are {cls._allowed_kwargs}" - ) + cdef: + _Timedelta td_base + NPY_DATETIMEUNIT out_reso = NPY_FR_ns - # GH43764, convert any input to nanoseconds first, to ensure any potential - # nanosecond contributions from kwargs parsed as floats are included - kwargs = collections.defaultdict(int, {key: _to_py_int_float(val) for key, val in kwargs.items()}) - if kwargs: - value = to_timedelta64( - sum(( + # process kwargs iff no value passed + if value is _no_input: + if not kwargs: + raise ValueError( + "cannot construct a Timedelta without a value/unit " + "or descriptive keywords (days,seconds....)" + ) + if not kwargs.keys() <= set(cls._allowed_kwargs): + raise ValueError( + "cannot construct a Timedelta from the passed arguments, " + f"allowed keywords are {cls._allowed_kwargs}" + ) + # GH43764, convert any input to nanoseconds first, to ensure any potential + # nanosecond contributions from kwargs parsed as floats are included + kwargs = collections.defaultdict(int, {key: _to_py_int_float(val) for key, val in kwargs.items()}) + ns = sum( + ( kwargs["weeks"] * 7 * 24 * 3600 * 1_000_000_000, kwargs["days"] * 24 * 3600 * 1_000_000_000, kwargs["hours"] * 3600 * 1_000_000_000, @@ -1513,19 +1509,18 @@ class Timedelta(_Timedelta): kwargs["milliseconds"] * 1_000_000, kwargs["microseconds"] * 1_000, kwargs["nanoseconds"], - )), - "ns", + ) ) - else: - if is_integer_object(value) or is_float_object(value): - unit = parse_timedelta_unit(unit) - else: - unit = "ns" - value = to_timedelta64(value, unit) + return create_timedelta(ns, "ns", out_reso) - if is_td64nat(value): - return NaT - return _timedelta_from_value_and_reso(value.view("i8"), NPY_FR_ns) + if isinstance(value, str) and unit is not None: + raise ValueError("unit must not be specified if the value is a str") + elif unit in {"Y", "y", "M"}: + raise ValueError( + "Units 'M', 'Y', and 'y' are no longer supported, as they do not " + "represent unambiguous timedelta values durations." + ) + return create_timedelta(value, parse_timedelta_unit(unit), out_reso) def __setstate__(self, state): if len(state) == 1: @@ -1602,14 +1597,14 @@ class Timedelta(_Timedelta): # Arithmetic Methods # TODO: Can some of these be defined in the cython class? - __neg__ = _op_unary_method(lambda x: -x, '__neg__') - __pos__ = _op_unary_method(lambda x: x, '__pos__') - __abs__ = _op_unary_method(lambda x: abs(x), '__abs__') + __neg__ = _op_unary_method(operator.neg, "__neg__") + __pos__ = _op_unary_method(operator.pos, "__pos__") + __abs__ = _op_unary_method(operator.abs, "__abs__") - __add__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__add__') - __radd__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__radd__') - __sub__ = _binary_op_method_timedeltalike(lambda x, y: x - y, '__sub__') - __rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, '__rsub__') + __add__ = _binary_op_method_timedeltalike(operator.add, "__add__") + __radd__ = _binary_op_method_timedeltalike(operator.add, "__radd__") + __sub__ = _binary_op_method_timedeltalike(operator.sub, "__sub__") + __rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, "__rsub__") def __mul__(self, other): if util.is_nan(other): @@ -1618,13 +1613,9 @@ class Timedelta(_Timedelta): if is_array(other): # ndarray-like return other * self.to_timedelta64() - if is_integer_object(other): - value = calc_int_int(operator.mul, self.value, other) - return _timedelta_from_value_and_reso(value, self._reso) - if is_float_object(other): - value = calc_int_float(operator.mul, self.value, other) - return _timedelta_from_value_and_reso(value, self._reso) - + if is_integer_object(other) or is_float_object(other): + # can't call Timedelta b/c it doesn't (yet) expose reso + return create_timedelta(self.value * other, "ignore", self._reso) return NotImplemented __rmul__ = __mul__ diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 03629cc992d66..287435b505d1b 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -339,7 +339,7 @@ def test_from_offset(self, tick_classes): offset = tick_classes(1) assert Timedelta(offset).value == offset.nanos - @pytest.mark.parametrize("td_unit", TD_UNITS) + @pytest.mark.parametrize("td_unit", chain.from_iterable(TD_UNITS)) def test_from_td64_ignores_unit(self, td_unit: str, td_overflow_msg: str): """ Ignore the unit, as it may cause silently overflows leading to incorrect @@ -355,7 +355,6 @@ def test_from_td64_ignores_unit(self, td_unit: str, td_overflow_msg: str): ("args", "kwargs"), [ ((), {}), - (("ps",), {}), (("ns",), {}), (("ms",), {}), ((), {"seconds": 3}), @@ -367,8 +366,6 @@ def test_from_td_ignores_other_args(self, args: tuple, kwargs: dict): new = Timedelta(original, *args, **kwargs) assert new == original - if not any((args, kwargs)): - assert new is original def test_from_timedelta(self, timedelta_kwarg: str): kwargs = {timedelta_kwarg: 1} @@ -601,11 +598,12 @@ def test_sub_preserves_reso(self, non_nano_td, non_nano_reso): assert res == expected assert res._reso == non_nano_reso - def test_mul_preserves_reso(self, non_nano_td, non_nano_reso): + @pytest.mark.parametrize("factor", (2, 2.5)) + def test_mul_preserves_reso(self, non_nano_td, non_nano_reso, factor): # The non_nano_td fixture should always be far from the implementation # bound, so doubling does not risk overflow. - res = non_nano_td * 2 - assert res.value == non_nano_td.value * 2 + res = non_nano_td * factor + assert res.value == non_nano_td.value * factor assert res._reso == non_nano_reso def test_cmp_cross_reso(self, non_nano_td): From 4a73340a3c126f1ab7d217d7aa43729cc4bd2979 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Tue, 10 May 2022 15:25:14 -0700 Subject: [PATCH 15/18] remove unused stuff --- pandas/_libs/ops.pxd | 1 - pandas/_libs/ops.pyi | 1 - pandas/_libs/ops.pyx | 8 -- pandas/_libs/tslibs/timedeltas.pyx | 6 +- pandas/tests/libs/test_ops.py | 133 +++++-------------------- pandas/tests/tslibs/test_timedeltas.py | 28 +++--- 6 files changed, 41 insertions(+), 136 deletions(-) diff --git a/pandas/_libs/ops.pxd b/pandas/_libs/ops.pxd index bf634f9a56e0e..69d6541150345 100644 --- a/pandas/_libs/ops.pxd +++ b/pandas/_libs/ops.pxd @@ -2,4 +2,3 @@ from numpy cimport int64_t cpdef int64_t calc_int_int(object op, int64_t a, int64_t b) except? -1 -cpdef int64_t calc_int_float(object op, int64_t a, double b) except? -1 diff --git a/pandas/_libs/ops.pyi b/pandas/_libs/ops.pyi index 5086ed860329a..eb98e5749ec1b 100644 --- a/pandas/_libs/ops.pyi +++ b/pandas/_libs/ops.pyi @@ -49,4 +49,3 @@ def maybe_convert_bool( convert_to_masked_nullable: Literal[True], ) -> tuple[np.ndarray, np.ndarray]: ... def calc_int_int(op, left: int, right: int) -> int: ... -def calc_int_float(op, left: int, right: float) -> int: ... diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index 2abdee7c2b68e..43ebf10c97377 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -318,11 +318,3 @@ cpdef int64_t calc_int_int(object op, int64_t a, int64_t b) except? -1: operand or the result to an int64_t would overflow. """ return op(a, b) - -@cython.overflowcheck(True) -cpdef int64_t calc_int_float(object op, int64_t a, double b) except? -1: - """ - Calculate op(a, b) and return the result. Raises OverflowError if converting either - operand or the result would overflow. - """ - return op(a, b) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 31cf3d736870e..046fe29c0ed5f 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -250,9 +250,9 @@ cdef object ensure_td64ns(object ts): unitstr = npy_unit_to_abbrev(td64_unit) mult = precision_from_unit(unitstr)[0] - td64_value = calc_int_int(operator.mul, get_timedelta64_value(ts), mult) + ns = calc_int_int(operator.mul, get_timedelta64_value(ts), mult) - return np.timedelta64(td64_value, "ns") + return np.timedelta64(ns, "ns") cdef convert_to_timedelta64(object ts, str unit): @@ -673,7 +673,7 @@ def _op_unary_method(func, name): return f -cpdef int64_t calc_int_int(object op, object a, object b) except? -1: +cdef int64_t calc_int_int(object op, object a, object b) except? -1: """ Calculate op(a, b), raising if either operand or the result cannot be safely cast to an int64_t. diff --git a/pandas/tests/libs/test_ops.py b/pandas/tests/libs/test_ops.py index 4a8a86d5eca15..ebb4526d381ca 100644 --- a/pandas/tests/libs/test_ops.py +++ b/pandas/tests/libs/test_ops.py @@ -1,11 +1,15 @@ import operator -from platform import architecture import numpy as np import pytest from pandas._libs import ops +from pandas.core.ops import ( + radd, + rsub, +) + @pytest.fixture(name="int_max", scope="module") def fixture_int_max() -> int: @@ -17,16 +21,6 @@ def fixture_int_min() -> int: return np.iinfo(np.int64).min -@pytest.fixture(name="float_max", scope="module") -def fixture_float_max() -> np.float64: - return np.finfo(np.float64).max - - -@pytest.fixture(name="float_min", scope="module") -def fixture_float_min() -> np.float64: - return np.finfo(np.float64).min - - @pytest.fixture(name="overflow_msg", scope="module") def fixture_overflow_msg() -> str: return "|".join( @@ -37,116 +31,37 @@ def fixture_overflow_msg() -> str: ) -class TestCalcIntInt: - def test_raises_for_too_large_arg(self, int_max: int, overflow_msg: str): - with pytest.raises(OverflowError, match=overflow_msg): - ops.calc_int_int(operator.add, int_max + 1, 1) - - with pytest.raises(OverflowError, match=overflow_msg): - ops.calc_int_int(operator.add, 1, int_max + 1) - - def test_raises_for_too_small_arg(self, int_min: int, overflow_msg: str): - with pytest.raises(OverflowError, match=overflow_msg): - ops.calc_int_int(operator.add, int_min - 1, 1) +@pytest.fixture(name="add_op", params=(operator.add, radd)) +def fixture_add_op(request): + return request.param - with pytest.raises(OverflowError, match=overflow_msg): - ops.calc_int_int(operator.add, 1, int_min - 1) - def test_raises_for_too_large_result(self, int_max: int, overflow_msg: str): - with pytest.raises(OverflowError, match=overflow_msg): - ops.calc_int_int(operator.add, int_max, 1) +@pytest.fixture(name="sub_op", params=(operator.sub, rsub)) +def fixture_sub_op(request): + return request.param - with pytest.raises(OverflowError, match=overflow_msg): - ops.calc_int_int(operator.add, 1, int_max) - def test_raises_for_too_small_result(self, int_min: int, overflow_msg: str): - with pytest.raises(OverflowError, match=overflow_msg): - ops.calc_int_int(operator.sub, int_min, 1) +class TestCalcIntInt: + def test_raises_for_too_large_arg(self, int_max: int, add_op, overflow_msg: str): + add_op(int_max + 1, 1) with pytest.raises(OverflowError, match=overflow_msg): - ops.calc_int_int(operator.sub, 1, int_min) + ops.calc_int_int(add_op, int_max + 1, 1) + def test_raises_for_too_small_arg(self, int_min: int, sub_op, overflow_msg: str): + sub_op(int_min - 1, 1) -class TestCalcIntFloat: - @pytest.mark.parametrize( - "op,lval,rval,expected", - ( - (operator.add, 1, 1.0, 2), - (operator.sub, 2, 1.0, 1), - (operator.mul, 1, 2.0, 2), - (operator.truediv, 1, 0.5, 2), - ), - ids=("+", "-", "*", "/"), - ) - def test_arithmetic_ops(self, op, lval: int, rval: float, expected: int): - result = ops.calc_int_float(op, lval, rval) - - assert result == expected - assert isinstance(result, int) - - def test_raises_for_too_large_arg( - self, - int_max: int, - float_max: float, - overflow_msg: str, - ): with pytest.raises(OverflowError, match=overflow_msg): - ops.calc_int_float(operator.add, int_max + 1, 1) + ops.calc_int_int(sub_op, int_min - 1, 1) - with pytest.raises(OverflowError, match=overflow_msg): - ops.calc_int_float(operator.add, 1, float_max + 1) - - def test_raises_for_too_small_arg( - self, - int_min: int, - float_min: float, - overflow_msg: str, - ): - with pytest.raises(OverflowError, match=overflow_msg): - ops.calc_int_float(operator.add, int_min - 1, 1) + def test_raises_for_too_large_result(self, int_max: int, add_op, overflow_msg: str): + assert add_op(int_max, 1) == int_max + 1 with pytest.raises(OverflowError, match=overflow_msg): - ops.calc_int_float(operator.add, 1, float_min - 1) - - def test_raises_for_too_large_result( - self, - int_max: int, - float_max: float, - overflow_msg: str, - ): - with pytest.raises(OverflowError, match=overflow_msg): - ops.calc_int_float(operator.add, int_max, 1) + ops.calc_int_int(add_op, int_max, 1) - with pytest.raises(OverflowError, match=overflow_msg): - ops.calc_int_float(operator.add, 1, float_max) + def test_raises_for_too_small_result(self, int_min: int, sub_op, overflow_msg: str): + assert abs(sub_op(int_min, 1)) == abs(int_min - 1) - @pytest.mark.parametrize( - "value", - ( - pytest.param( - 1024, - marks=pytest.mark.xfail( - reason="TBD", - raises=pytest.fail.Exception, - strict=True, - ), - ), - pytest.param( - 1024.1, - marks=pytest.mark.xfail( - condition=architecture()[0] == "32bit", - reason="overflows earlier", - raises=pytest.fail.Exception, - strict=True, - ), - ), - ), - ) - def test_raises_for_most_too_small_results( - self, - value: float, - int_min: int, - overflow_msg: str, - ): with pytest.raises(OverflowError, match=overflow_msg): - ops.calc_int_float(operator.sub, int_min, value) + ops.calc_int_int(sub_op, int_min, 1) diff --git a/pandas/tests/tslibs/test_timedeltas.py b/pandas/tests/tslibs/test_timedeltas.py index a1ab97d8cb288..8043a77afde9e 100644 --- a/pandas/tests/tslibs/test_timedeltas.py +++ b/pandas/tests/tslibs/test_timedeltas.py @@ -53,27 +53,27 @@ def test_delta_to_nanoseconds_error(): delta_to_nanoseconds(np.int32(3)) -def test_huge_nanoseconds_overflow(): +def test_delta_to_nanoseconds_overflow(): # GH 32402 assert delta_to_nanoseconds(Timedelta(1e10)) == 1e10 assert delta_to_nanoseconds(Timedelta(nanoseconds=1e10)) == 1e10 -class TestArrayToTimedelta64: - def test_array_to_timedelta64_string_with_unit_2d_raises(self): - # check the 'unit is not None and errors != "coerce"' path - # in array_to_timedelta64 raises correctly with 2D values - values = np.array([["1", 2], [3, "4"]], dtype=object) - with pytest.raises(ValueError, match="unit must not be specified"): - array_to_timedelta64(values, unit="s") +def test_array_to_timedelta64_string_with_unit_2d_raises(): + # check the 'unit is not None and errors != "coerce"' path + # in array_to_timedelta64 raises correctly with 2D values + values = np.array([["1", 2], [3, "4"]], dtype=object) + with pytest.raises(ValueError, match="unit must not be specified"): + array_to_timedelta64(values, unit="s") - def test_array_to_timedelta64_non_object_raises(self): - # check we raise, not segfault - values = np.arange(5) - msg = "'values' must have object dtype" - with pytest.raises(TypeError, match=msg): - array_to_timedelta64(values) +def test_array_to_timedelta64_non_object_raises(): + # check we raise, not segfault + values = np.arange(5) + + msg = "'values' must have object dtype" + with pytest.raises(TypeError, match=msg): + array_to_timedelta64(values) @pytest.mark.parametrize("unit", ["s", "ms", "us"]) From 1bc27414c36b185861c8d15fc44064db08feef72 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Tue, 10 May 2022 15:58:32 -0700 Subject: [PATCH 16/18] describe current organization, PR feedback --- pandas/_libs/ops.pxd | 4 -- pandas/_libs/ops.pyi | 1 - pandas/_libs/ops.pyx | 10 --- pandas/_libs/tslibs/timedeltas.pyx | 33 +++------ pandas/tests/libs/test_ops.py | 67 ------------------- pandas/tests/scalar/timedelta/conftest.py | 10 +++ .../tests/scalar/timedelta/test_arithmetic.py | 17 ++--- .../tests/scalar/timedelta/test_timedelta.py | 14 ++-- pandas/tests/tslibs/test_timedeltas.py | 6 ++ 9 files changed, 38 insertions(+), 124 deletions(-) delete mode 100644 pandas/_libs/ops.pxd delete mode 100644 pandas/tests/libs/test_ops.py create mode 100644 pandas/tests/scalar/timedelta/conftest.py diff --git a/pandas/_libs/ops.pxd b/pandas/_libs/ops.pxd deleted file mode 100644 index 69d6541150345..0000000000000 --- a/pandas/_libs/ops.pxd +++ /dev/null @@ -1,4 +0,0 @@ -from numpy cimport int64_t - - -cpdef int64_t calc_int_int(object op, int64_t a, int64_t b) except? -1 diff --git a/pandas/_libs/ops.pyi b/pandas/_libs/ops.pyi index eb98e5749ec1b..74a6ad87cd279 100644 --- a/pandas/_libs/ops.pyi +++ b/pandas/_libs/ops.pyi @@ -48,4 +48,3 @@ def maybe_convert_bool( *, convert_to_masked_nullable: Literal[True], ) -> tuple[np.ndarray, np.ndarray]: ... -def calc_int_int(op, left: int, right: int) -> int: ... diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index 43ebf10c97377..308756e378dde 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -16,7 +16,6 @@ import numpy as np from numpy cimport ( import_array, - int64_t, ndarray, uint8_t, ) @@ -309,12 +308,3 @@ def maybe_convert_bool(ndarray[object] arr, return (arr, None) else: return (result.view(np.bool_), None) - - -@cython.overflowcheck(True) -cpdef int64_t calc_int_int(object op, int64_t a, int64_t b) except? -1: - """ - Calculate op(a, b) and return the result. Raises OverflowError if converting either - operand or the result to an int64_t would overflow. - """ - return op(a, b) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 046fe29c0ed5f..26f6b24668e4a 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -31,8 +31,6 @@ import_datetime() cimport pandas._libs.tslibs.util as util -from pandas._libs cimport ops -from pandas._libs.missing cimport C_NA from pandas._libs.tslibs.base cimport ABCTimestamp from pandas._libs.tslibs.conversion cimport ( cast_from_unit, @@ -217,12 +215,11 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1: return get_timedelta64_value(ensure_td64ns(delta)) if PyDelta_Check(delta): - microseconds = ( + return ( delta.days * 24 * 3600 * 1_000_000 + delta.seconds * 1_000_000 + delta.microseconds - ) - return calc_int_int(operator.mul, microseconds, 1000) + ) * 1000 raise TypeError(type(delta)) @@ -245,14 +242,20 @@ cdef object ensure_td64ns(object ts): str unitstr td64_unit = get_datetime64_unit(ts) - if td64_unit in (NPY_DATETIMEUNIT.NPY_FR_ns, NPY_DATETIMEUNIT.NPY_FR_GENERIC): + if td64_unit == NPY_DATETIMEUNIT.NPY_FR_ns or td64_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC: return ts unitstr = npy_unit_to_abbrev(td64_unit) mult = precision_from_unit(unitstr)[0] - ns = calc_int_int(operator.mul, get_timedelta64_value(ts), mult) - return np.timedelta64(ns, "ns") + with cython.overflowcheck(True): + try: + td64_value = get_timedelta64_value(ts) * mult + except OverflowError as ex: + msg = f"{ts} outside allowed range [{TIMEDELTA_MIN_NS}ns, {TIMEDELTA_MAX_NS}ns]" + raise OutOfBoundsTimedelta(msg) from ex + + return np.timedelta64(td64_value, "ns") cdef convert_to_timedelta64(object ts, str unit): @@ -673,18 +676,6 @@ def _op_unary_method(func, name): return f -cdef int64_t calc_int_int(object op, object a, object b) except? -1: - """ - Calculate op(a, b), raising if either operand or the result cannot be safely cast - to an int64_t. - """ - try: - return ops.calc_int_int(op, a, b) - except OverflowError as ex: - msg = f"outside allowed range [{TIMEDELTA_MIN_NS}ns, {TIMEDELTA_MAX_NS}ns]" - raise OutOfBoundsTimedelta(msg) from ex - - def _binary_op_method_timedeltalike(op, name): # define a binary operation that only works if the other argument is # timedelta like or an array of timedeltalike @@ -907,8 +898,6 @@ cdef object create_timedelta(object value, str in_unit, NPY_DATETIMEUNIT out_res if isinstance(value, _Timedelta): return value - if value is C_NA: - raise ValueError("Not supported") try: # if unit == "ns", no need to create an m8[ns] just to read the (same) value back diff --git a/pandas/tests/libs/test_ops.py b/pandas/tests/libs/test_ops.py deleted file mode 100644 index ebb4526d381ca..0000000000000 --- a/pandas/tests/libs/test_ops.py +++ /dev/null @@ -1,67 +0,0 @@ -import operator - -import numpy as np -import pytest - -from pandas._libs import ops - -from pandas.core.ops import ( - radd, - rsub, -) - - -@pytest.fixture(name="int_max", scope="module") -def fixture_int_max() -> int: - return np.iinfo(np.int64).max - - -@pytest.fixture(name="int_min", scope="module") -def fixture_int_min() -> int: - return np.iinfo(np.int64).min - - -@pytest.fixture(name="overflow_msg", scope="module") -def fixture_overflow_msg() -> str: - return "|".join( - ( - "Python int too large to convert to C long", - "int too big to convert", - ) - ) - - -@pytest.fixture(name="add_op", params=(operator.add, radd)) -def fixture_add_op(request): - return request.param - - -@pytest.fixture(name="sub_op", params=(operator.sub, rsub)) -def fixture_sub_op(request): - return request.param - - -class TestCalcIntInt: - def test_raises_for_too_large_arg(self, int_max: int, add_op, overflow_msg: str): - add_op(int_max + 1, 1) - - with pytest.raises(OverflowError, match=overflow_msg): - ops.calc_int_int(add_op, int_max + 1, 1) - - def test_raises_for_too_small_arg(self, int_min: int, sub_op, overflow_msg: str): - sub_op(int_min - 1, 1) - - with pytest.raises(OverflowError, match=overflow_msg): - ops.calc_int_int(sub_op, int_min - 1, 1) - - def test_raises_for_too_large_result(self, int_max: int, add_op, overflow_msg: str): - assert add_op(int_max, 1) == int_max + 1 - - with pytest.raises(OverflowError, match=overflow_msg): - ops.calc_int_int(add_op, int_max, 1) - - def test_raises_for_too_small_result(self, int_min: int, sub_op, overflow_msg: str): - assert abs(sub_op(int_min, 1)) == abs(int_min - 1) - - with pytest.raises(OverflowError, match=overflow_msg): - ops.calc_int_int(sub_op, int_min, 1) diff --git a/pandas/tests/scalar/timedelta/conftest.py b/pandas/tests/scalar/timedelta/conftest.py new file mode 100644 index 0000000000000..a2398c06d5e75 --- /dev/null +++ b/pandas/tests/scalar/timedelta/conftest.py @@ -0,0 +1,10 @@ +import re + +import pytest + + +@pytest.fixture(name="td_overflow_msg") +def fixture_td_overflow_msg() -> str: + return re.escape( + "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" + ) diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 9870507b39e78..2d8ee06986baa 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -1,6 +1,9 @@ """ -Tests for arithmetic ops between a Timedelta scalar and another scalar, or a Timedelta -scalar and a Array/Index/Series/DataFrame. +Tests of binary ops between a Timedelta scalar and another scalar or a +Array/Index/Series/DataFrame. + +See test_timedelta.py, in this same directory, for tests against the rest of the public +Timedelta API. """ from __future__ import annotations @@ -26,6 +29,7 @@ NaT, Timedelta, Timestamp, + compat, offsets, ) import pandas._testing as tm @@ -115,13 +119,6 @@ def fixture_floor_mod_divmod_op(request): return request.param -@pytest.fixture(name="td_overflow_msg") -def fixture_td_overflow_msg() -> str: - return re.escape( - "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" - ) - - @pytest.fixture(name="invalid_op_msg") def fixture_invalid_op_msg() -> str: messages = ( @@ -447,7 +444,7 @@ def test_numeric(self, ten_days, mul_op, factor, expected, box_with_array): ) tm.assert_equal(result, expected) - @pytest.mark.xfail(reason="no overflow check", raises=AssertionError, strict=True) + @pytest.mark.xfail(compat.IS64, reason="no overflow check", raises=AssertionError) @pytest.mark.parametrize("factor", (1.01, 2), ids=("int", "float")) def test_returns_nat_if_result_overflows(self, mul_op, factor, box_with_array): numeric_box = tm.box_expected((1, factor), box_with_array, transpose=False) diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 287435b505d1b..5843b1617f3fe 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -1,6 +1,8 @@ """ -Most Timedelta scalar tests; See test_arithmetic for tests of binary operations with a -Timedelta scalar. +Tests against (most of) the public Timedelta API. + +See test_arithmetic.py, in this same directory, for tests of binary ops between a +Timedelta scalar and another scalar or a Array/Index/Series/DataFrame. """ from __future__ import annotations @@ -12,7 +14,6 @@ zip_longest, ) import operator -import re from hypothesis import ( given, @@ -158,13 +159,6 @@ def fixture_td64_min_per_unit(request) -> tuple: return unit, TD64_MIN_PER_UNIT[unit] -@pytest.fixture(name="td_overflow_msg") -def fixture_td_overflow_msg() -> str: - return re.escape( - "outside allowed range [-9223372036854775807ns, 9223372036854775807ns]" - ) - - @pytest.fixture(name="non_nano_reso", params=(7, 8, 9)) def fixture_non_nano_reso(request): """7, 8, 9 correspond to second, millisecond, and microsecond, respectively""" diff --git a/pandas/tests/tslibs/test_timedeltas.py b/pandas/tests/tslibs/test_timedeltas.py index 8043a77afde9e..df5b4cb5f8df7 100644 --- a/pandas/tests/tslibs/test_timedeltas.py +++ b/pandas/tests/tslibs/test_timedeltas.py @@ -1,3 +1,9 @@ +""" +Tests against the for-internal-use-only functions in pandas._libs.tslibs.timedeltas. + +For tests against the public Timedelta API, see pandas/tests/scalar/timedelta/ +""" + import numpy as np import pytest From f00539fcb2dbb01430de8d0389bf3f10a8d85034 Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Tue, 10 May 2022 18:42:27 -0700 Subject: [PATCH 17/18] use preferred names --- pandas/_libs/tslibs/timedeltas.pyx | 27 ++++++++++--------- .../tests/scalar/timedelta/test_arithmetic.py | 14 +++++----- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 26f6b24668e4a..a4759ca9c09bb 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -57,6 +57,7 @@ from pandas._libs.tslibs.np_datetime cimport ( pandas_timedelta_to_timedeltastruct, pandas_timedeltastruct, ) +from pandas._libs.util cimport INT64_MAX from pandas._libs.tslibs.np_datetime import OutOfBoundsTimedelta @@ -138,9 +139,6 @@ cdef dict timedelta_abbrevs = { _no_input = object() -TIMEDELTA_MIN_NS = np.iinfo(np.int64).min + 1 -TIMEDELTA_MAX_NS = np.iinfo(np.int64).max - # ---------------------------------------------------------------------- # API @@ -215,12 +213,15 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1: return get_timedelta64_value(ensure_td64ns(delta)) if PyDelta_Check(delta): - return ( - delta.days * 24 * 3600 * 1_000_000 - + delta.seconds * 1_000_000 - + delta.microseconds - ) * 1000 - + try: + return ( + delta.days * 24 * 3600 * 1_000_000 + + delta.seconds * 1_000_000 + + delta.microseconds + ) * 1000 + except OverflowError as ex: + msg = f"{delta} outside allowed range [{NPY_NAT + 1}ns, {INT64_MAX}ns]" + raise OutOfBoundsTimedelta(msg) from ex raise TypeError(type(delta)) @@ -252,7 +253,7 @@ cdef object ensure_td64ns(object ts): try: td64_value = get_timedelta64_value(ts) * mult except OverflowError as ex: - msg = f"{ts} outside allowed range [{TIMEDELTA_MIN_NS}ns, {TIMEDELTA_MAX_NS}ns]" + msg = f"{ts} outside allowed range [{NPY_NAT + 1}ns, {INT64_MAX}ns]" raise OutOfBoundsTimedelta(msg) from ex return np.timedelta64(td64_value, "ns") @@ -910,7 +911,7 @@ cdef object create_timedelta(object value, str in_unit, NPY_DATETIMEUNIT out_res else: out_value = convert_to_timedelta64(value, in_unit).view(np.int64) except OverflowError as ex: - msg = f"{value} outside allowed range [{TIMEDELTA_MIN_NS}ns, {TIMEDELTA_MAX_NS}ns]" + msg = f"{value} outside allowed range [{NPY_NAT + 1}ns, {INT64_MAX}ns]" raise OutOfBoundsTimedelta(msg) from ex if out_value == NPY_NAT: @@ -1799,6 +1800,6 @@ cdef _broadcast_floordiv_td64( # resolution in ns -Timedelta.min = Timedelta(TIMEDELTA_MIN_NS) -Timedelta.max = Timedelta(TIMEDELTA_MAX_NS) +Timedelta.min = Timedelta(NPY_NAT + 1) +Timedelta.max = Timedelta(INT64_MAX) Timedelta.resolution = Timedelta(nanoseconds=1) diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 2d8ee06986baa..3d0dc54636216 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -444,16 +444,14 @@ def test_numeric(self, ten_days, mul_op, factor, expected, box_with_array): ) tm.assert_equal(result, expected) + @pytest.mark.skipif(not compat.IS64, reason="flaky") @pytest.mark.xfail(compat.IS64, reason="no overflow check", raises=AssertionError) - @pytest.mark.parametrize("factor", (1.01, 2), ids=("int", "float")) - def test_returns_nat_if_result_overflows(self, mul_op, factor, box_with_array): - numeric_box = tm.box_expected((1, factor), box_with_array, transpose=False) + @pytest.mark.parametrize("factors", ((1, 2), (1, 1.5)), ids=("ints", "floats")) + def test_returns_nat_if_result_overflows(self, mul_op, factors, box_with_array): + numeric_box = tm.box_expected(factors, box_with_array) result = mul_op(Timedelta.max, numeric_box) - expected = tm.box_expected( - (Timedelta.max, NaT), - box_with_array, - transpose=False, - ) + expected = tm.box_expected((Timedelta.max, NaT), box_with_array) + tm.assert_equal(result, expected) @pytest.mark.parametrize("value", (Timedelta.min, Timedelta.max, offsets.Day(1))) From d0967da628c77df0a609c666f17890317ec3252d Mon Sep 17 00:00:00 2001 From: Patrick McKenna Date: Wed, 11 May 2022 11:08:20 -0700 Subject: [PATCH 18/18] perf optimizations --- pandas/_libs/tslibs/timedeltas.pyx | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index a4759ca9c09bb..5f4a5d144eb8c 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -904,10 +904,17 @@ cdef object create_timedelta(object value, str in_unit, NPY_DATETIMEUNIT out_res # if unit == "ns", no need to create an m8[ns] just to read the (same) value back # if unit == "ignore", assume caller wants to invoke an overflow-safe version of # _timedelta_from_value_and_reso, and that any float rounding is acceptable - if (is_integer_object(value) or is_float_object(value)) and in_unit in ("ns", "ignore"): + if (is_integer_object(value) or is_float_object(value)) and (in_unit == "ns" or in_unit == "ignore"): if util.is_nan(value): return NaT out_value = value + elif is_timedelta64_object(value): + out_value = ensure_td64ns(value).view(np.int64) + elif isinstance(value, str): + if value.startswith(("P", "-P")): + out_value = parse_iso_format_string(value) + else: + out_value = parse_timedelta_string(value) else: out_value = convert_to_timedelta64(value, in_unit).view(np.int64) except OverflowError as ex: