Skip to content

REF: simplify extension reduction tests #54394

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pandas/tests/extension/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class TestMyDtype(BaseDtypeTests):
BaseBooleanReduceTests,
BaseNoReduceTests,
BaseNumericReduceTests,
BaseReduceTests,
)
from pandas.tests.extension.base.reshaping import BaseReshapingTests # noqa: F401
from pandas.tests.extension.base.setitem import BaseSetitemTests # noqa: F401
83 changes: 49 additions & 34 deletions pandas/tests/extension/base/reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ class BaseReduceTests(BaseExtensionTests):
make sense for numeric/boolean operations.
"""

def _supports_reduction(self, obj, op_name: str) -> bool:
# Specify if we expect this reduction to succeed.
return False

def check_reduce(self, s, op_name, skipna):
# We perform the same operation on the np.float64 data and check
# that the results match. Override if you need to cast to something
Expand Down Expand Up @@ -66,47 +70,42 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):

tm.assert_extension_array_equal(result1, expected)


class BaseNoReduceTests(BaseReduceTests):
"""we don't define any reductions"""

@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
op_name = all_numeric_reductions
s = pd.Series(data)

msg = (
"[Cc]annot perform|Categorical is not ordered for operation|"
"does not support reduction|"
)

with pytest.raises(TypeError, match=msg):
getattr(s, op_name)(skipna=skipna)

@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna):
op_name = all_boolean_reductions
s = pd.Series(data)

msg = (
"[Cc]annot perform|Categorical is not ordered for operation|"
"does not support reduction|"
)
if not self._supports_reduction(s, op_name):
msg = (
"[Cc]annot perform|Categorical is not ordered for operation|"
"does not support reduction|"
)

with pytest.raises(TypeError, match=msg):
getattr(s, op_name)(skipna=skipna)
with pytest.raises(TypeError, match=msg):
getattr(s, op_name)(skipna=skipna)

else:
self.check_reduce(s, op_name, skipna)

class BaseNumericReduceTests(BaseReduceTests):
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_series(self, data, all_numeric_reductions, skipna):
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
op_name = all_numeric_reductions
s = pd.Series(data)

# min/max with empty produce numpy warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore", RuntimeWarning)
self.check_reduce(s, op_name, skipna)
if not self._supports_reduction(s, op_name):
msg = (
"[Cc]annot perform|Categorical is not ordered for operation|"
"does not support reduction|"
)

with pytest.raises(TypeError, match=msg):
getattr(s, op_name)(skipna=skipna)

else:
# min/max with empty produce numpy warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore", RuntimeWarning)
self.check_reduce(s, op_name, skipna)

@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_frame(self, data, all_numeric_reductions, skipna):
Expand All @@ -118,12 +117,28 @@ def test_reduce_frame(self, data, all_numeric_reductions, skipna):
if op_name in ["count", "kurt", "sem"]:
pytest.skip(f"{op_name} not an array method")

if not self._supports_reduction(s, op_name):
pytest.skip(f"Reduction {op_name} not supported for this dtype")

self.check_reduce_frame(s, op_name, skipna)


# TODO: deprecate BaseNoReduceTests, BaseNumericReduceTests, BaseBooleanReduceTests
class BaseNoReduceTests(BaseReduceTests):
"""we don't define any reductions"""


class BaseNumericReduceTests(BaseReduceTests):
# For backward compatibility only, this only runs the numeric reductions
def _supports_reduction(self, obj, op_name: str) -> bool:
if op_name in ["any", "all"]:
pytest.skip("These are tested in BaseBooleanReduceTests")
return True


class BaseBooleanReduceTests(BaseReduceTests):
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_series(self, data, all_boolean_reductions, skipna):
op_name = all_boolean_reductions
s = pd.Series(data)
self.check_reduce(s, op_name, skipna)
# For backward compatibility only, this only runs the numeric reductions
def _supports_reduction(self, obj, op_name: str) -> bool:
if op_name not in ["any", "all"]:
pytest.skip("These are tested in BaseNumericReduceTests")
return True
9 changes: 4 additions & 5 deletions pandas/tests/extension/decimal/test_decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,9 @@ def test_fillna_series_method(self, data_missing, fillna_method):


class Reduce:
def _supports_reduction(self, obj, op_name: str) -> bool:
return True

def check_reduce(self, s, op_name, skipna):
if op_name in ["median", "skew", "kurt", "sem"]:
msg = r"decimal does not support the .* operation"
Expand Down Expand Up @@ -183,7 +186,7 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
tm.assert_series_equal(result, expected)


class TestNumericReduce(Reduce, base.BaseNumericReduceTests):
class TestReduce(Reduce, base.BaseReduceTests):
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_frame(self, data, all_numeric_reductions, skipna):
op_name = all_numeric_reductions
Expand All @@ -194,10 +197,6 @@ def test_reduce_frame(self, data, all_numeric_reductions, skipna):
return super().test_reduce_frame(data, all_numeric_reductions, skipna)


class TestBooleanReduce(Reduce, base.BaseBooleanReduceTests):
pass


class TestMethods(base.BaseMethodsTests):
def test_fillna_copy_frame(self, data_missing, using_copy_on_write):
warn = FutureWarning if not using_copy_on_write else None
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/json/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def test_fillna_frame(self):
unhashable = pytest.mark.xfail(reason="Unhashable")


class TestReduce(base.BaseNoReduceTests):
class TestReduce(base.BaseReduceTests):
pass


Expand Down
77 changes: 39 additions & 38 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,10 @@ def test_accumulate_series(self, data, all_numeric_accumulations, skipna, reques
self.check_accumulate(ser, op_name, skipna)


class TestBaseNumericReduce(base.BaseNumericReduceTests):
class TestReduce(base.BaseReduceTests):
def _supports_reduction(self, obj, op_name: str) -> bool:
return True

def check_reduce(self, ser, op_name, skipna):
pa_dtype = ser.dtype.pyarrow_dtype
if op_name == "count":
Expand All @@ -429,7 +432,7 @@ def check_reduce(self, ser, op_name, skipna):
tm.assert_almost_equal(result, expected)

@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_series(self, data, all_numeric_reductions, skipna, request):
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, request):
pa_dtype = data.dtype.pyarrow_dtype
opname = all_numeric_reductions

Expand Down Expand Up @@ -497,44 +500,10 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna, request):
"median",
}:
request.node.add_marker(xfail_mark)
super().test_reduce_series(data, all_numeric_reductions, skipna)

def _get_expected_reduction_dtype(self, arr, op_name: str):
if op_name in ["max", "min"]:
cmp_dtype = arr.dtype
elif arr.dtype.name == "decimal128(7, 3)[pyarrow]":
if op_name not in ["median", "var", "std"]:
cmp_dtype = arr.dtype
else:
cmp_dtype = "float64[pyarrow]"
elif op_name in ["median", "var", "std", "mean", "skew"]:
cmp_dtype = "float64[pyarrow]"
else:
cmp_dtype = {
"i": "int64[pyarrow]",
"u": "uint64[pyarrow]",
"f": "float64[pyarrow]",
}[arr.dtype.kind]
return cmp_dtype
super().test_reduce_series_numeric(data, all_numeric_reductions, skipna)

@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_frame(self, data, all_numeric_reductions, skipna):
op_name = all_numeric_reductions
if op_name == "skew":
assert not hasattr(data, op_name)
return
return super().test_reduce_frame(data, all_numeric_reductions, skipna)

@pytest.mark.parametrize("typ", ["int64", "uint64", "float64"])
def test_median_not_approximate(self, typ):
# GH 52679
result = pd.Series([1, 2], dtype=f"{typ}[pyarrow]").median()
assert result == 1.5


class TestBaseBooleanReduce(base.BaseBooleanReduceTests):
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_series(
def test_reduce_series_boolean(
self, data, all_boolean_reductions, skipna, na_value, request
):
pa_dtype = data.dtype.pyarrow_dtype
Expand Down Expand Up @@ -566,6 +535,38 @@ def test_reduce_series(
result = getattr(ser, op_name)(skipna=skipna)
assert result is (op_name == "any")

def _get_expected_reduction_dtype(self, arr, op_name: str):
if op_name in ["max", "min"]:
cmp_dtype = arr.dtype
elif arr.dtype.name == "decimal128(7, 3)[pyarrow]":
if op_name not in ["median", "var", "std"]:
cmp_dtype = arr.dtype
else:
cmp_dtype = "float64[pyarrow]"
elif op_name in ["median", "var", "std", "mean", "skew"]:
cmp_dtype = "float64[pyarrow]"
else:
cmp_dtype = {
"i": "int64[pyarrow]",
"u": "uint64[pyarrow]",
"f": "float64[pyarrow]",
}[arr.dtype.kind]
return cmp_dtype

@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_frame(self, data, all_numeric_reductions, skipna):
op_name = all_numeric_reductions
if op_name == "skew":
assert not hasattr(data, op_name)
return
return super().test_reduce_frame(data, all_numeric_reductions, skipna)

@pytest.mark.parametrize("typ", ["int64", "uint64", "float64"])
def test_median_not_approximate(self, typ):
# GH 52679
result = pd.Series([1, 2], dtype=f"{typ}[pyarrow]").median()
assert result == 1.5


class TestBaseGroupby(base.BaseGroupbyTests):
def test_in_numeric_groupby(self, data_for_grouping):
Expand Down
9 changes: 4 additions & 5 deletions pandas/tests/extension/test_boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,10 @@ def test_groupby_sum_mincount(self, data_for_grouping, min_count):
tm.assert_frame_equal(result, expected)


class TestNumericReduce(base.BaseNumericReduceTests):
class TestReduce(base.BaseReduceTests):
def _supports_reduction(self, obj, op_name: str) -> bool:
return True

def check_reduce(self, s, op_name, skipna):
if op_name == "count":
result = getattr(s, op_name)()
Expand Down Expand Up @@ -248,10 +251,6 @@ def _get_expected_reduction_dtype(self, arr, op_name: str):
return cmp_dtype


class TestBooleanReduce(base.BaseBooleanReduceTests):
pass


class TestPrinting(base.BasePrintingTests):
pass

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ class TestMissing(base.BaseMissingTests):
pass


class TestReduce(base.BaseNoReduceTests):
class TestReduce(base.BaseReduceTests):
pass


Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ class TestInterface(BaseInterval, base.BaseInterfaceTests):
pass


class TestReduce(base.BaseNoReduceTests):
class TestReduce(base.BaseReduceTests):
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
op_name = all_numeric_reductions
Expand Down
12 changes: 6 additions & 6 deletions pandas/tests/extension/test_masked_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,12 @@ class TestGroupby(base.BaseGroupbyTests):
pass


class TestNumericReduce(base.BaseNumericReduceTests):
class TestReduce(base.BaseReduceTests):
def _supports_reduction(self, obj, op_name: str) -> bool:
if op_name in ["any", "all"]:
pytest.skip(reason="Tested in tests/reductions/test_reductions.py")
return True

def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
# overwrite to ensure pd.NA is tested instead of np.nan
# https://github.com/pandas-dev/pandas/issues/30958
Expand Down Expand Up @@ -266,11 +271,6 @@ def _get_expected_reduction_dtype(self, arr, op_name: str):
return cmp_dtype


@pytest.mark.skip(reason="Tested in tests/reductions/test_reductions.py")
class TestBooleanReduce(base.BaseBooleanReduceTests):
pass


class TestAccumulation(base.BaseAccumulateTests):
def _supports_accumulation(self, ser: pd.Series, op_name: str) -> bool:
return True
Expand Down
26 changes: 14 additions & 12 deletions pandas/tests/extension/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,27 +303,29 @@ class TestPrinting(BaseNumPyTests, base.BasePrintingTests):
pass


class TestNumericReduce(BaseNumPyTests, base.BaseNumericReduceTests):
class TestReduce(BaseNumPyTests, base.BaseReduceTests):
def _supports_reduction(self, obj, op_name: str) -> bool:
if tm.get_dtype(obj).kind == "O":
return op_name in ["sum", "min", "max", "any", "all"]
return True

def check_reduce(self, s, op_name, skipna):
result = getattr(s, op_name)(skipna=skipna)
res_op = getattr(s, op_name)
# avoid coercing int -> float. Just cast to the actual numpy type.
expected = getattr(s.astype(s.dtype._dtype), op_name)(skipna=skipna)
exp_op = getattr(s.astype(s.dtype._dtype), op_name)
if op_name == "count":
result = res_op()
expected = exp_op()
else:
result = res_op(skipna=skipna)
expected = exp_op(skipna=skipna)
tm.assert_almost_equal(result, expected)

@pytest.mark.skip("tests not written yet")
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_frame(self, data, all_numeric_reductions, skipna):
pass

@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_series(self, data, all_boolean_reductions, skipna):
super().test_reduce_series(data, all_boolean_reductions, skipna)


@skip_nested
class TestBooleanReduce(BaseNumPyTests, base.BaseBooleanReduceTests):
pass


class TestMissing(BaseNumPyTests, base.BaseMissingTests):
@skip_nested
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def test_fillna_no_op_returns_copy(self, data):
tm.assert_extension_array_equal(result, data)


class TestNoReduce(base.BaseNoReduceTests):
class TestReduce(base.BaseReduceTests):
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
op_name = all_numeric_reductions
Expand Down