diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 96f37bd47e10c..399b2db1c4c65 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -4,6 +4,7 @@ from datetime import datetime from decimal import Decimal from functools import wraps +from inspect import isclass import operator import os import re @@ -245,19 +246,21 @@ def box_expected(expected, box_cls, transpose=True): Parameters ---------- expected : np.ndarray, Index, Series - box_cls : {Index, Series, DataFrame} + box_cls : {Index, Series, DataFrame, pd.array, ExtensionArray} Returns ------- subclass of box_cls """ - if box_cls is pd.array: + if box_cls is pd.array or ( + isclass(box_cls) and issubclass(box_cls, ExtensionArray) + ): if isinstance(expected, RangeIndex): # pd.array would return an IntegerArray expected = PandasArray(np.asarray(expected._values)) else: expected = pd.array(expected) - elif box_cls is Index: + elif isclass(box_cls) and issubclass(box_cls, Index): expected = Index._with_infer(expected) elif box_cls is Series: expected = Series(expected) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index bb7949c9f08e2..53c7824562dbb 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1,9 +1,16 @@ # Arithmetic tests for DataFrame/Series/Index/Array classes that should # behave identically. +from __future__ import annotations + +from contextlib import ( + AbstractContextManager, + nullcontext, +) from datetime import ( datetime, timedelta, ) +from functools import partial import numpy as np import pytest @@ -31,12 +38,28 @@ Int64Index, UInt64Index, ) +from pandas.core.arrays import ( + DatetimeArray, + TimedeltaArray, +) from pandas.tests.arithmetic.common import ( assert_invalid_addsub_type, assert_invalid_comparison, get_upcast_box, ) +TIMEDELTA_OVERFLOW_MSG = "|".join( + [ + "int too big to convert", + "Python int too large to convert to C long", + "Overflow in int64 addition", + ] +) + + +does_not_raise = nullcontext +td_overflow_error = partial(pytest.raises, OverflowError, match=TIMEDELTA_OVERFLOW_MSG) + def assert_dtype(obj, expected_dtype): """ @@ -59,6 +82,56 @@ def get_expected_name(box, names): return exname +def get_result_type(td_type, dt_type): + """ + Expected result for add/sub between Timestamp-valued and Timedelta-valued boxes. + """ + result_types = { + (DatetimeArray, TimedeltaArray): DatetimeArray, + (DatetimeArray, TimedeltaIndex): DatetimeIndex, + (DatetimeArray, Series): Series, + (DatetimeArray, DataFrame): DataFrame, + (DatetimeIndex, TimedeltaArray): DatetimeIndex, + (DatetimeIndex, TimedeltaIndex): DatetimeIndex, + (DatetimeIndex, Series): Series, + (DatetimeIndex, DataFrame): DataFrame, + (Series, TimedeltaArray): Series, + (Series, TimedeltaIndex): Series, + (Series, Series): Series, + (Series, DataFrame): DataFrame, + } + + return result_types.get((dt_type, td_type), DataFrame) + + +@pytest.fixture(name="td_max_box") +def fixture_td_max_box( + box_with_array, +) -> TimedeltaArray | TimedeltaIndex | Series | DataFrame: + """ + A 1-elem ExtensionArray/Index/Series, or 2x1 DataFrame, w/ all elements set to + Timestamp.max. + """ + return tm.box_expected((Timedelta.max,), box_with_array) + + +@pytest.fixture( + name="positive_td_box", + params=[Timedelta(1), Timedelta(1024), Timedelta.max], + ids=["1ns", "1024ns", "td_max"], +) +def fixture_positive_td_box( + request, + box_with_array, +) -> TimedeltaArray | TimedeltaIndex | Series | DataFrame: + """ + A 1-elem ExtensionArray/Index/Series, or 2x1 DataFrame, w/ all elements set to the + same positive Timestamp. + """ + value = (request.param,) + return tm.box_expected(value, box_with_array) + + # ------------------------------------------------------------------ # Timedelta64[ns] dtype Comparisons @@ -320,8 +393,6 @@ def test_subtraction_ops(self): msg = "cannot subtract a datelike from a TimedeltaArray" with pytest.raises(TypeError, match=msg): tdi - dt - with pytest.raises(TypeError, match=msg): - tdi - dti msg = r"unsupported operand type\(s\) for -" with pytest.raises(TypeError, match=msg): @@ -442,23 +513,6 @@ def _check(result, expected): expected = tm.box_expected(expected, box_with_array) tm.assert_equal(result, expected) - def test_dti_tdi_numeric_ops(self): - # These are normally union/diff set-like ops - tdi = TimedeltaIndex(["1 days", NaT, "2 days"], name="foo") - dti = pd.date_range("20130101", periods=3, name="bar") - - result = tdi - tdi - expected = TimedeltaIndex(["0 days", NaT, "0 days"], name="foo") - tm.assert_index_equal(result, expected) - - result = tdi + tdi - expected = TimedeltaIndex(["2 days", NaT, "4 days"], name="foo") - tm.assert_index_equal(result, expected) - - result = dti - tdi # name will be reset - expected = DatetimeIndex(["20121231", NaT, "20130101"]) - tm.assert_index_equal(result, expected) - def test_addition_ops(self): # with datetimes/timedelta and tdi/dti tdi = TimedeltaIndex(["1 days", NaT, "2 days"], name="foo") @@ -497,14 +551,6 @@ def test_addition_ops(self): # this is a union! # pytest.raises(TypeError, lambda : Int64Index([1,2,3]) + tdi) - result = tdi + dti # name will be reset - expected = DatetimeIndex(["20130102", NaT, "20130105"]) - tm.assert_index_equal(result, expected) - - result = dti + tdi # name will be reset - expected = DatetimeIndex(["20130102", NaT, "20130105"]) - tm.assert_index_equal(result, expected) - result = dt + td expected = Timestamp("20130102") assert result == expected @@ -555,25 +601,6 @@ def test_timedelta_tick_arithmetic(self): result3 = result3._with_freq(None) tm.assert_index_equal(result2, result3) - def test_tda_add_sub_index(self): - # Check that TimedeltaArray defers to Index on arithmetic ops - tdi = TimedeltaIndex(["1 days", NaT, "2 days"]) - tda = tdi.array - - dti = pd.date_range("1999-12-31", periods=3, freq="D") - - result = tda + dti - expected = tdi + dti - tm.assert_index_equal(result, expected) - - result = tda + tdi - expected = tdi + tdi - tm.assert_index_equal(result, expected) - - result = tda - tdi - expected = tdi - tdi - tm.assert_index_equal(result, expected) - def test_tda_add_dt64_object_array(self, box_with_array, tz_naive_fixture): # Result should be cast back to DatetimeArray box = box_with_array @@ -686,46 +713,6 @@ def test_tdarr_add_timestamp_nat_masking(self, box_with_array, str_ts): else: assert res[1] is NaT - def test_tdi_add_overflow(self): - # See GH#14068 - # preliminary test scalar analogue of vectorized tests below - # TODO: Make raised error message more informative and test - with pytest.raises(OutOfBoundsDatetime, match="10155196800000000000"): - pd.to_timedelta(106580, "D") + Timestamp("2000") - with pytest.raises(OutOfBoundsDatetime, match="10155196800000000000"): - Timestamp("2000") + pd.to_timedelta(106580, "D") - - _NaT = NaT.value + 1 - msg = "Overflow in int64 addition" - with pytest.raises(OverflowError, match=msg): - pd.to_timedelta([106580], "D") + Timestamp("2000") - with pytest.raises(OverflowError, match=msg): - Timestamp("2000") + pd.to_timedelta([106580], "D") - with pytest.raises(OverflowError, match=msg): - pd.to_timedelta([_NaT]) - Timedelta("1 days") - with pytest.raises(OverflowError, match=msg): - pd.to_timedelta(["5 days", _NaT]) - Timedelta("1 days") - with pytest.raises(OverflowError, match=msg): - ( - pd.to_timedelta([_NaT, "5 days", "1 hours"]) - - pd.to_timedelta(["7 seconds", _NaT, "4 hours"]) - ) - - # These should not overflow! - exp = TimedeltaIndex([NaT]) - result = pd.to_timedelta([NaT]) - Timedelta("1 days") - tm.assert_index_equal(result, exp) - - exp = TimedeltaIndex(["4 days", NaT]) - result = pd.to_timedelta(["5 days", NaT]) - Timedelta("1 days") - tm.assert_index_equal(result, exp) - - exp = TimedeltaIndex([NaT, NaT, "5 hours"]) - result = pd.to_timedelta([NaT, "5 days", "1 hours"]) + pd.to_timedelta( - ["7 seconds", NaT, "4 hours"] - ) - tm.assert_index_equal(result, exp) - class TestTimedeltaArraylikeAddSubOps: # Tests for timedelta64[ns] __add__, __sub__, __radd__, __rsub__ @@ -763,11 +750,6 @@ def test_timedelta_ops_with_missing_values(self): actual = scalar2 - scalar1 assert actual == scalar1 - actual = s1 + s1 - tm.assert_series_equal(actual, s2) - actual = s2 - s1 - tm.assert_series_equal(actual, s1) - actual = s1 + scalar1 tm.assert_series_equal(actual, s2) actual = scalar1 + s1 @@ -801,20 +783,6 @@ def test_timedelta_ops_with_missing_values(self): actual = s2 - NaT tm.assert_series_equal(actual, sn) - actual = s1 + df1 - tm.assert_frame_equal(actual, df2) - actual = s2 - df1 - tm.assert_frame_equal(actual, df1) - actual = df1 + s1 - tm.assert_frame_equal(actual, df2) - actual = df2 - s1 - tm.assert_frame_equal(actual, df1) - - actual = df1 + df1 - tm.assert_frame_equal(actual, df2) - actual = df2 - df1 - tm.assert_frame_equal(actual, df1) - actual = df1 + scalar1 tm.assert_frame_equal(actual, df2) actual = df2 - scalar1 @@ -1038,37 +1006,6 @@ def test_td64arr_add_datetime64_nat(self, box_with_array): tm.assert_equal(tdser + other, expected) tm.assert_equal(other + tdser, expected) - def test_td64arr_sub_dt64_array(self, box_with_array): - dti = pd.date_range("2016-01-01", periods=3) - tdi = TimedeltaIndex(["-1 Day"] * 3) - dtarr = dti.values - expected = DatetimeIndex(dtarr) - tdi - - tdi = tm.box_expected(tdi, box_with_array) - expected = tm.box_expected(expected, box_with_array) - - msg = "cannot subtract a datelike from" - with pytest.raises(TypeError, match=msg): - tdi - dtarr - - # TimedeltaIndex.__rsub__ - result = dtarr - tdi - tm.assert_equal(result, expected) - - def test_td64arr_add_dt64_array(self, box_with_array): - dti = pd.date_range("2016-01-01", periods=3) - tdi = TimedeltaIndex(["-1 Day"] * 3) - dtarr = dti.values - expected = DatetimeIndex(dtarr) + tdi - - tdi = tm.box_expected(tdi, box_with_array) - expected = tm.box_expected(expected, box_with_array) - - result = tdi + dtarr - tm.assert_equal(result, expected) - result = dtarr + tdi - tm.assert_equal(result, expected) - # ------------------------------------------------------------------ # Invalid __add__/__sub__ operations @@ -1173,27 +1110,6 @@ def test_td64arr_addsub_integer_array_no_freq(self, box_with_array): # ------------------------------------------------------------------ # Operations with timedelta-like others - def test_td64arr_add_sub_td64_array(self, box_with_array): - box = box_with_array - dti = pd.date_range("2016-01-01", periods=3) - tdi = dti - dti.shift(1) - tdarr = tdi.values - - expected = 2 * tdi - tdi = tm.box_expected(tdi, box) - expected = tm.box_expected(expected, box) - - result = tdi + tdarr - tm.assert_equal(result, expected) - result = tdarr + tdi - tm.assert_equal(result, expected) - - expected_sub = 0 * tdi - result = tdi - tdarr - tm.assert_equal(result, expected_sub) - result = tdarr - tdi - tm.assert_equal(result, expected_sub) - def test_td64arr_add_sub_tdi(self, box_with_array, names): # GH#17250 make sure result dtype is correct # GH#19043 make sure names are propagated correctly @@ -2089,18 +2005,142 @@ def test_td64arr_pow_invalid(self, scalar_td, box_with_array): td1**scalar_td -def test_add_timestamp_to_timedelta(): - # GH: 35897 - timestamp = Timestamp("2021-01-01") - result = timestamp + timedelta_range("0s", "1s", periods=31) - expected = DatetimeIndex( +class TestAddSub: + """ + Add/sub between 2 Timestamp-valued ExtensionArrays/Indexes/Series/DataFrames. + """ + + def test_add_raises_if_result_would_overflow( + self, + td_max_box: TimedeltaArray | TimedeltaIndex | Series | DataFrame, + positive_td_box: TimedeltaArray | TimedeltaIndex | Series | DataFrame, + ): + with td_overflow_error(): + td_max_box + positive_td_box + + with td_overflow_error(): + positive_td_box + td_max_box + + @pytest.mark.parametrize( + ["positive_td", "expected_exs"], [ - timestamp - + ( - pd.to_timedelta("0.033333333s") * i - + pd.to_timedelta("0.000000001s") * divmod(i, 3)[0] - ) - for i in range(31) - ] + # can't use positive_td_box fixture b/c errors vary + (Timedelta(1), does_not_raise()), + (Timedelta(2), td_overflow_error()), + (Timedelta.max, td_overflow_error()), + ], ) - tm.assert_index_equal(result, expected) + def test_sub_raises_if_result_would_overflow( + self, + td_max_box: TimedeltaArray | TimedeltaIndex | Series | DataFrame, + positive_td: Timedelta, + expected_exs: AbstractContextManager, + box_with_array, + ): + positive_td_box = tm.box_expected((positive_td,), box_with_array) + td_min_box = -1 * td_max_box + + with expected_exs: + td_min_box - positive_td_box + + with expected_exs: + -1 * positive_td_box - td_max_box + + +class TestNumericScalarMulDiv: + """ + Operations on Timedelta-valued ExtensionArray/Index/Series/DataFrame and a + numeric scalar. + """ + + @pytest.mark.xfail(reason="Not implemented", raises=pytest.fail.Exception) + def test_scalar_mul_raises_if_result_would_overflow( + self, + td_max_box: TimedeltaArray | TimedeltaIndex | Series | DataFrame, + ): + with td_overflow_error(): + td_max_box * 1.01 + + with td_overflow_error(): + 1.01 * td_max_box + + +class TestAddSubTimestampBox: + """ + Add/sub between Timedelta-valued and Timestamp-valued + ExtensionArrays/Indexes/Series/DataFrames. + """ + + def test_add(self, box_with_array, box_with_array2): + # GH: 35897 + td_box = tm.box_expected( + (Timedelta(hours=3), Timedelta(hours=3), NaT, NaT), + box_with_array, + ) + dt_box = tm.box_expected( + (Timestamp(2020, 1, 2), NaT, Timestamp(2020, 1, 2), NaT), + box_with_array2, + ) + expected_type = get_result_type(type(td_box), type(dt_box)) + expected = tm.box_expected( + (Timestamp(2020, 1, 2, 3), NaT, NaT, NaT), + expected_type, + ) + result = dt_box + td_box + + assert isinstance(result, expected_type) + tm.assert_equal(result, expected) + + def test_add_raises_if_result_would_overflow( + self, + td_max_box: TimedeltaArray | TimedeltaIndex | Series | DataFrame, + box_with_array, + ): + dt_max_box = tm.box_expected((Timestamp.max,), box_with_array) + ex = (OutOfBoundsDatetime, OverflowError) + msg = "|".join([TIMEDELTA_OVERFLOW_MSG, "Out of bounds nanosecond timestamp"]) + + with pytest.raises(ex, match=msg): + td_max_box + dt_max_box + + with pytest.raises(ex, match=msg): + dt_max_box + td_max_box + + def test_sub(self, box_with_array, box_with_array2): + td_box = tm.box_expected( + (Timedelta(hours=3), Timedelta(hours=3), NaT, NaT), + box_with_array, + ) + dt_box = tm.box_expected( + (Timestamp(2020, 1, 2, 6), NaT, Timestamp(2020, 1, 2, 6), NaT), + box_with_array2, + ) + expected_type = get_result_type(type(td_box), type(dt_box)) + expected = tm.box_expected( + (Timestamp(2020, 1, 2, 3), NaT, NaT, NaT), + expected_type, + ) + result = dt_box - td_box + + assert isinstance(result, expected_type) + tm.assert_equal(result, expected) + + def test_sub_dt_box_from_td_box_raises(self, box_with_array, box_with_array2): + td_box = tm.box_expected((Timedelta(hours=3),), box_with_array) + dt_box = tm.box_expected((Timestamp(2020, 1, 2),), box_with_array2) + msg = "cannot subtract a datelike from a TimedeltaArray" + + with pytest.raises(TypeError, match=msg): + td_box - dt_box + + def test_sub_raises_if_result_would_overflow( + self, + td_max_box: TimedeltaArray | TimedeltaIndex | Series | DataFrame, + box_with_array, + ): + dt_min_box = tm.box_expected((Timestamp.min,), box_with_array) + ex = (OutOfBoundsDatetime, OverflowError) + msg = "|".join([TIMEDELTA_OVERFLOW_MSG, "Out of bounds nanosecond timestamp"]) + + with pytest.raises(ex, match=msg): + dt_min_box - td_max_box diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 7677b8950c7a3..5c77ed998981e 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -1,7 +1,11 @@ +from __future__ import annotations + from datetime import ( datetime, timedelta, ) +from functools import partial +import os import numpy as np import pytest @@ -28,6 +32,45 @@ import pandas._testing as tm from pandas.core import nanops +TD64_VALUE_ERROR_MSG = "overflow in timedelta operation" +TD64_OVERFLOW_MSG = "|".join( + [ + "int too big to convert", + "Python int too large to convert to C long", + "Overflow in int64 addition", + ] +) + + +# TODO: more robust platform/env detection? +on_arm = os.environ.get("CIRCLECI") == "true" +using_array_data_mgr = os.environ.get("PANDAS_DATA_MANAGER") == "array" + +xfail_does_not_raise = partial( + pytest.mark.xfail, + reason="should raise exception", + raises=pytest.fail.Exception, + strict=True, +) +xfail_returns_nat = partial( + pytest.mark.xfail, + reason="returns NaT", + raises=AssertionError, + strict=True, +) +xfail_ints_wrap = partial( + pytest.mark.xfail, + reason="ints wrap", + raises=AssertionError, + strict=True, +) +xfail_value_overflow_error = partial( + pytest.mark.xfail, + reason="unclear", + raises=(ValueError, OverflowError), + strict=True, +) + def get_objs(): indexes = [ @@ -1527,3 +1570,136 @@ def test_multimode_complex(self, array, expected, dtype): # Complex numbers are sorted by their magnitude result = Series(array, dtype=dtype).mode() tm.assert_series_equal(result, expected) + + +class TestTimedelta: + """ + For Timedelta-valued ExtensionArrays/Indexes/Series/DataFrames. + """ + + @pytest.mark.parametrize( + "value", + [Timedelta(-(10**15) + 1), Timedelta(10**15 + 1)], + ) + def test_single_elem_sum_retains_ns_precision_over_expected_range( + self, + value: Timedelta, + index_or_series_or_array, + ): + td_arraylike = tm.box_expected((value,), index_or_series_or_array) + result = td_arraylike.sum() + + assert result == value + + @pytest.mark.parametrize( + "value", + [ + Timedelta.min + Timedelta(512), + Timedelta(-(10**16) - 1), + Timedelta(10**16 + 1), + Timedelta.max - Timedelta(512), + ], + ) + def test_single_elem_sum_loses_ns_precision_if_float_conversion_rounds( + self, + value: Timedelta, + index_or_series_or_array, + ): + """ + The computation involves int->float conversion, so there can be loss of + precision. + """ + td_arraylike = tm.box_expected((value,), index_or_series_or_array) + result = td_arraylike.sum() + + assert result != value + assert np.isclose(result.value, value.value) + + @xfail_returns_nat(condition=not on_arm) + @pytest.mark.parametrize( + "value", + ( + pytest.param(Timedelta.min, marks=xfail_returns_nat(condition=on_arm)), + pytest.param( + Timedelta.min + Timedelta(511), + marks=xfail_returns_nat(condition=on_arm), + ), + pytest.param( + Timedelta.max - Timedelta(511), + marks=pytest.mark.xfail( + on_arm, + reason="returns Timedelta.max", + raises=AssertionError, + ), + ), + pytest.param(Timedelta.max), + ), + ) + def test_single_elem_sum_works_near_boundaries( + self, + value: Timedelta, + index_or_series_or_array, + ): + td_arraylike = tm.box_expected((value,), index_or_series_or_array) + result = td_arraylike.sum() + + assert result == value + + @pytest.mark.parametrize( + "values", + ( + (Timedelta.min, Timedelta.min), + (Timedelta.min, Timedelta(-1025)), + pytest.param( + (Timedelta.min, Timedelta(-1024)), + marks=xfail_does_not_raise(), + ), + pytest.param((Timedelta.min, Timedelta(-1)), marks=xfail_does_not_raise()), + pytest.param((Timedelta.max, Timedelta(1)), marks=xfail_does_not_raise()), + pytest.param( + (Timedelta.max, Timedelta(1024)), + marks=xfail_does_not_raise(), + ), + (Timedelta.max, Timedelta(1025)), + (Timedelta.max, Timedelta.max), + ), + ) + def test_arraylike_sum_usually_raises_for_overflow( + self, + values: tuple[Timedelta], + index_or_series_or_array, + ): + td_arraylike = tm.box_expected(values, index_or_series_or_array) + with pytest.raises(ValueError, match=TD64_VALUE_ERROR_MSG): + td_arraylike.sum() + + @pytest.mark.parametrize( + "values", + ( + pytest.param( + (Timedelta.min,) * 2, + marks=xfail_value_overflow_error(condition=using_array_data_mgr), + ), + (Timedelta.min, Timedelta(-1)), + pytest.param( + (Timedelta.max, Timedelta(1)), marks=xfail_ints_wrap(condition=on_arm) + ), + pytest.param( + (Timedelta.max,) * 2, + marks=( + xfail_ints_wrap(condition=on_arm), + xfail_value_overflow_error(condition=using_array_data_mgr), + ), + ), + ), + ids=("double_td_min", "over_by_-1ns", "over_by_1ns", "double_td_max"), + ) + def test_df_sum_usually_returns_nat_for_overflows(self, values: list[Timedelta]): + """ + Special case behavior for some values, for some platforms/configs. + """ + td64_df = tm.box_expected(values, DataFrame, transpose=False) + result = td64_df.sum() + expected = Series(NaT, index=[0], dtype="timedelta64[ns]") + + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py index a552d9d84329f..47cd2a53d89bb 100644 --- a/pandas/tests/series/test_reductions.py +++ b/pandas/tests/series/test_reductions.py @@ -51,31 +51,6 @@ def test_td64_sum_empty(skipna): assert result == pd.Timedelta(0) -def test_td64_summation_overflow(): - # GH#9442 - ser = Series(pd.date_range("20130101", periods=100000, freq="H")) - ser[0] += pd.Timedelta("1s 1ms") - - # mean - result = (ser - ser.min()).mean() - expected = pd.Timedelta((pd.TimedeltaIndex(ser - ser.min()).asi8 / len(ser)).sum()) - - # the computation is converted to float so - # might be some loss of precision - assert np.allclose(result.value / 1000, expected.value / 1000) - - # sum - msg = "overflow in timedelta operation" - with pytest.raises(ValueError, match=msg): - (ser - ser.min()).sum() - - s1 = ser[0:10000] - with pytest.raises(ValueError, match=msg): - (s1 - s1.min()).sum() - s2 = ser[0:1000] - (s2 - s2.min()).sum() - - def test_prod_numpy16_bug(): ser = Series([1.0, 1.0, 1.0], index=range(3)) result = ser.prod()