diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 064242f3649f4..d5eb65ec9d35d 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -239,9 +239,23 @@ def test_compare_array(self, data, comparison_op): class BaseUnaryOpsTests(BaseOpsUtil): def test_invert(self, data): ser = pd.Series(data, name="name") - result = ~ser - expected = pd.Series(~data, name="name") - tm.assert_series_equal(result, expected) + try: + # 10 is an arbitrary choice here, just avoid iterating over + # the whole array to trim test runtime + [~x for x in data[:10]] + except TypeError: + # scalars don't support invert -> we don't expect the vectorized + # operation to succeed + with pytest.raises(TypeError): + ~ser + with pytest.raises(TypeError): + ~data + else: + # Note we do not re-use the pointwise result to construct expected + # because python semantics for negating bools are weird see GH#54569 + result = ~ser + expected = pd.Series(~data, name="name") + tm.assert_series_equal(result, expected) @pytest.mark.parametrize("ufunc", [np.positive, np.negative, np.abs]) def test_unary_ufunc_dunder_equivalence(self, data, ufunc): diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py index a6532a6190467..9b56b10681e15 100644 --- a/pandas/tests/extension/base/reduce.py +++ b/pandas/tests/extension/base/reduce.py @@ -13,22 +13,23 @@ class BaseReduceTests: make sense for numeric/boolean operations. """ - def _supports_reduction(self, obj, op_name: str) -> bool: + def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: # Specify if we expect this reduction to succeed. return False - def check_reduce(self, s, op_name, skipna): + def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool): # We perform the same operation on the np.float64 data and check # that the results match. Override if you need to cast to something # other than float64. - res_op = getattr(s, op_name) + res_op = getattr(ser, op_name) try: - alt = s.astype("float64") - except TypeError: - # e.g. Interval can't cast, so let's cast to object and do + alt = ser.astype("float64") + except (TypeError, ValueError): + # e.g. Interval can't cast (TypeError), StringArray can't cast + # (ValueError), so let's cast to object and do # the reduction pointwise - alt = s.astype(object) + alt = ser.astype(object) exp_op = getattr(alt, op_name) if op_name == "count": @@ -79,53 +80,53 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool): @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna): op_name = all_boolean_reductions - s = pd.Series(data) + ser = pd.Series(data) - if not self._supports_reduction(s, op_name): + if not self._supports_reduction(ser, op_name): msg = ( "[Cc]annot perform|Categorical is not ordered for operation|" "does not support reduction|" ) with pytest.raises(TypeError, match=msg): - getattr(s, op_name)(skipna=skipna) + getattr(ser, op_name)(skipna=skipna) else: - self.check_reduce(s, op_name, skipna) + self.check_reduce(ser, op_name, skipna) @pytest.mark.filterwarnings("ignore::RuntimeWarning") @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): op_name = all_numeric_reductions - s = pd.Series(data) + ser = pd.Series(data) - if not self._supports_reduction(s, op_name): + if not self._supports_reduction(ser, op_name): msg = ( "[Cc]annot perform|Categorical is not ordered for operation|" "does not support reduction|" ) with pytest.raises(TypeError, match=msg): - getattr(s, op_name)(skipna=skipna) + getattr(ser, op_name)(skipna=skipna) else: # min/max with empty produce numpy warnings - self.check_reduce(s, op_name, skipna) + self.check_reduce(ser, op_name, skipna) @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_frame(self, data, all_numeric_reductions, skipna): op_name = all_numeric_reductions - s = pd.Series(data) - if not is_numeric_dtype(s.dtype): + ser = pd.Series(data) + if not is_numeric_dtype(ser.dtype): pytest.skip("not numeric dtype") if op_name in ["count", "kurt", "sem"]: pytest.skip(f"{op_name} not an array method") - if not self._supports_reduction(s, op_name): + if not self._supports_reduction(ser, op_name): pytest.skip(f"Reduction {op_name} not supported for this dtype") - self.check_reduce_frame(s, op_name, skipna) + self.check_reduce_frame(ser, op_name, skipna) # TODO: deprecate BaseNoReduceTests, BaseNumericReduceTests, BaseBooleanReduceTests @@ -135,7 +136,7 @@ class BaseNoReduceTests(BaseReduceTests): class BaseNumericReduceTests(BaseReduceTests): # For backward compatibility only, this only runs the numeric reductions - def _supports_reduction(self, obj, op_name: str) -> bool: + def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: if op_name in ["any", "all"]: pytest.skip("These are tested in BaseBooleanReduceTests") return True @@ -143,7 +144,7 @@ def _supports_reduction(self, obj, op_name: str) -> bool: class BaseBooleanReduceTests(BaseReduceTests): # For backward compatibility only, this only runs the numeric reductions - def _supports_reduction(self, obj, op_name: str) -> bool: + def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: if op_name not in ["any", "all"]: pytest.skip("These are tested in BaseNumericReduceTests") return True diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index baa056550624f..2f274354f0da0 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -71,15 +71,15 @@ def _get_expected_exception( ) -> type[Exception] | None: return None - def _supports_reduction(self, obj, op_name: str) -> bool: + def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: return True - def check_reduce(self, s, op_name, skipna): + def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool): if op_name == "count": - return super().check_reduce(s, op_name, skipna) + return super().check_reduce(ser, op_name, skipna) else: - result = getattr(s, op_name)(skipna=skipna) - expected = getattr(np.asarray(s), op_name)() + result = getattr(ser, op_name)(skipna=skipna) + expected = getattr(np.asarray(ser), op_name)() tm.assert_almost_equal(result, expected) def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, request): @@ -216,12 +216,6 @@ def test_series_repr(self, data): assert data.dtype.name in repr(ser) assert "Decimal: " in repr(ser) - @pytest.mark.xfail( - reason="Looks like the test (incorrectly) implicitly assumes int/bool dtype" - ) - def test_invert(self, data): - super().test_invert(data) - @pytest.mark.xfail(reason="Inconsistent array-vs-scalar behavior") @pytest.mark.parametrize("ufunc", [np.positive, np.negative, np.abs]) def test_unary_ufunc_dunder_equivalence(self, data, ufunc): diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 4c05049ddfcf5..35184450e9c11 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -401,8 +401,8 @@ def test_accumulate_series(self, data, all_numeric_accumulations, skipna, reques self.check_accumulate(ser, op_name, skipna) - def _supports_reduction(self, obj, op_name: str) -> bool: - dtype = tm.get_dtype(obj) + def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: + dtype = ser.dtype # error: Item "dtype[Any]" of "dtype[Any] | ExtensionDtype" has # no attribute "pyarrow_dtype" pa_dtype = dtype.pyarrow_dtype # type: ignore[union-attr] @@ -445,20 +445,25 @@ def _supports_reduction(self, obj, op_name: str) -> bool: return True - def check_reduce(self, ser, op_name, skipna): - pa_dtype = ser.dtype.pyarrow_dtype - if op_name == "count": - result = getattr(ser, op_name)() + def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool): + # error: Item "dtype[Any]" of "dtype[Any] | ExtensionDtype" has no + # attribute "pyarrow_dtype" + pa_dtype = ser.dtype.pyarrow_dtype # type: ignore[union-attr] + if pa.types.is_integer(pa_dtype) or pa.types.is_floating(pa_dtype): + alt = ser.astype("Float64") else: - result = getattr(ser, op_name)(skipna=skipna) + # TODO: in the opposite case, aren't we testing... nothing? For + # e.g. date/time dtypes trying to calculate 'expected' by converting + # to object will raise for mean, std etc + alt = ser - if pa.types.is_integer(pa_dtype) or pa.types.is_floating(pa_dtype): - ser = ser.astype("Float64") # TODO: in the opposite case, aren't we testing... nothing? if op_name == "count": - expected = getattr(ser, op_name)() + result = getattr(ser, op_name)() + expected = getattr(alt, op_name)() else: - expected = getattr(ser, op_name)(skipna=skipna) + result = getattr(ser, op_name)(skipna=skipna) + expected = getattr(alt, op_name)(skipna=skipna) tm.assert_almost_equal(result, expected) @pytest.mark.parametrize("skipna", [True, False]) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 3ceb32f181986..79b8e9ddbf8f5 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -179,12 +179,6 @@ def _compare_other(self, s, data, op, other): def test_array_repr(self, data, size): super().test_array_repr(data, size) - @pytest.mark.xfail( - reason="Looks like the test (incorrectly) implicitly assumes int/bool dtype" - ) - def test_invert(self, data): - super().test_invert(data) - @pytest.mark.xfail(reason="TBD") @pytest.mark.parametrize("as_index", [True, False]) def test_groupby_extension_agg(self, as_index, data_for_grouping): diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index 66b25abb55961..f37ac4b289852 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -13,6 +13,10 @@ be added to the array-specific tests in `pandas/tests/arrays/`. """ +from __future__ import annotations + +from typing import TYPE_CHECKING + import numpy as np import pytest @@ -22,6 +26,9 @@ from pandas.core.arrays import IntervalArray from pandas.tests.extension import base +if TYPE_CHECKING: + import pandas as pd + def make_data(): N = 100 @@ -73,7 +80,7 @@ def data_for_grouping(): class TestIntervalArray(base.ExtensionTests): divmod_exc = TypeError - def _supports_reduction(self, obj, op_name: str) -> bool: + def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: return op_name in ["min", "max"] @pytest.mark.xfail( @@ -89,12 +96,6 @@ def test_EA_types(self, engine, data): with pytest.raises(NotImplementedError, match=expected_msg): super().test_EA_types(engine, data) - @pytest.mark.xfail( - reason="Looks like the test (incorrectly) implicitly assumes int/bool dtype" - ) - def test_invert(self, data): - super().test_invert(data) - # TODO: either belongs in tests.arrays.interval or move into base tests. def test_fillna_non_scalar_raises(data_missing): diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py index bed406e902483..7efb8fbad8cd1 100644 --- a/pandas/tests/extension/test_masked.py +++ b/pandas/tests/extension/test_masked.py @@ -238,8 +238,8 @@ def test_combine_le(self, data_repeated): self._combine_le_expected_dtype = object super().test_combine_le(data_repeated) - def _supports_reduction(self, obj, op_name: str) -> bool: - if op_name in ["any", "all"] and tm.get_dtype(obj).kind != "b": + def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: + if op_name in ["any", "all"] and ser.dtype.kind != "b": pytest.skip(reason="Tested in tests/reductions/test_reductions.py") return True @@ -256,12 +256,16 @@ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool): if op_name in ["min", "max"]: cmp_dtype = "bool" + # TODO: prod with integer dtypes does *not* match the result we would + # get if we used object for cmp_dtype. In that cae the object result + # is a large integer while the non-object case overflows and returns 0 + alt = ser.dropna().astype(cmp_dtype) if op_name == "count": result = getattr(ser, op_name)() - expected = getattr(ser.dropna().astype(cmp_dtype), op_name)() + expected = getattr(alt, op_name)() else: result = getattr(ser, op_name)(skipna=skipna) - expected = getattr(ser.dropna().astype(cmp_dtype), op_name)(skipna=skipna) + expected = getattr(alt, op_name)(skipna=skipna) if not skipna and ser.isna().any() and op_name not in ["any", "all"]: expected = pd.NA tm.assert_almost_equal(result, expected) @@ -350,15 +354,6 @@ def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool): else: raise NotImplementedError(f"{op_name} not supported") - def test_invert(self, data, request): - if data.dtype.kind == "f": - mark = pytest.mark.xfail( - reason="Looks like the base class test implicitly assumes " - "boolean/integer dtypes" - ) - request.node.add_marker(mark) - super().test_invert(data) - class Test2DCompat(base.Dim2CompatTests): pass diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index a54729de57a97..542e938d1a40a 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -302,15 +302,19 @@ class TestPrinting(BaseNumPyTests, base.BasePrintingTests): class TestReduce(BaseNumPyTests, base.BaseReduceTests): - def _supports_reduction(self, obj, op_name: str) -> bool: - if tm.get_dtype(obj).kind == "O": + def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: + if ser.dtype.kind == "O": return op_name in ["sum", "min", "max", "any", "all"] return True - def check_reduce(self, s, op_name, skipna): - res_op = getattr(s, op_name) + def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool): + res_op = getattr(ser, op_name) # avoid coercing int -> float. Just cast to the actual numpy type. - exp_op = getattr(s.astype(s.dtype._dtype), op_name) + # error: Item "ExtensionDtype" of "dtype[Any] | ExtensionDtype" has + # no attribute "numpy_dtype" + cmp_dtype = ser.dtype.numpy_dtype # type: ignore[union-attr] + alt = ser.astype(cmp_dtype) + exp_op = getattr(alt, op_name) if op_name == "count": result = res_op() expected = exp_op() diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 6597ff84e3ca4..c3440b3bdb318 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -157,16 +157,8 @@ def test_fillna_no_op_returns_copy(self, data): class TestReduce(base.BaseReduceTests): - @pytest.mark.parametrize("skipna", [True, False]) - def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): - op_name = all_numeric_reductions - - if op_name in ["min", "max"]: - return None - - ser = pd.Series(data) - with pytest.raises(TypeError): - getattr(ser, op_name)(skipna=skipna) + def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: + return op_name in ["min", "max"] class TestMethods(base.BaseMethodsTests):