Skip to content

REF: de-duplicate check_reduce_frame #54393

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 45 additions & 1 deletion pandas/tests/extension/base/reduce.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from typing import final
import warnings

import pytest
Expand All @@ -15,6 +16,9 @@ class BaseReduceTests(BaseExtensionTests):
"""

def check_reduce(self, s, op_name, skipna):
# We perform the same operation on the np.float64 data and check
# that the results match. Override if you need to cast to something
# other than float64.
res_op = getattr(s, op_name)
exp_op = getattr(s.astype("float64"), op_name)
if op_name == "count":
Expand All @@ -25,6 +29,43 @@ def check_reduce(self, s, op_name, skipna):
expected = exp_op(skipna=skipna)
tm.assert_almost_equal(result, expected)

def _get_expected_reduction_dtype(self, arr, op_name: str):
# Find the expected dtype when the given reduction is done on a DataFrame
# column with this array. The default assumes float64-like behavior,
# i.e. retains the dtype.
return arr.dtype

# We anticipate that authors should not need to override check_reduce_frame,
# but should be able to do any necessary overriding in
# _get_expected_reduction_dtype. If you have a use case where this
# does not hold, please let us know at github.com/pandas-dev/pandas/issues.
@final
def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
# Check that the 2D reduction done in a DataFrame reduction "looks like"
# a wrapped version of the 1D reduction done by Series.
arr = ser.array
df = pd.DataFrame({"a": arr})

kwargs = {"ddof": 1} if op_name in ["var", "std"] else {}

cmp_dtype = self._get_expected_reduction_dtype(arr, op_name)

# The DataFrame method just calls arr._reduce with keepdims=True,
# so this first check is perfunctory.
result1 = arr._reduce(op_name, skipna=skipna, keepdims=True, **kwargs)
result2 = getattr(df, op_name)(skipna=skipna, **kwargs).array
tm.assert_extension_array_equal(result1, result2)

# Check that the 2D reduction looks like a wrapped version of the
# 1D reduction
if not skipna and ser.isna().any():
expected = pd.array([pd.NA], dtype=cmp_dtype)
else:
exp_value = getattr(ser.dropna(), op_name)()
expected = pd.array([exp_value], dtype=cmp_dtype)

tm.assert_extension_array_equal(result1, expected)


class BaseNoReduceTests(BaseReduceTests):
"""we don't define any reductions"""
Expand Down Expand Up @@ -71,9 +112,12 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna):
def test_reduce_frame(self, data, all_numeric_reductions, skipna):
op_name = all_numeric_reductions
s = pd.Series(data)
if not is_numeric_dtype(s):
if not is_numeric_dtype(s.dtype):
pytest.skip("not numeric dtype")

if op_name in ["count", "kurt", "sem"]:
pytest.skip(f"{op_name} not an array method")

self.check_reduce_frame(s, op_name, skipna)


Expand Down
30 changes: 8 additions & 22 deletions pandas/tests/extension/decimal/test_decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,27 +160,6 @@ def check_reduce(self, s, op_name, skipna):
expected = getattr(np.asarray(s), op_name)()
tm.assert_almost_equal(result, expected)

def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
arr = ser.array
df = pd.DataFrame({"a": arr})

if op_name in ["count", "kurt", "sem", "skew", "median"]:
assert not hasattr(arr, op_name)
pytest.skip(f"{op_name} not an array method")

result1 = arr._reduce(op_name, skipna=skipna, keepdims=True)
result2 = getattr(df, op_name)(skipna=skipna).array

tm.assert_extension_array_equal(result1, result2)

if not skipna and ser.isna().any():
expected = DecimalArray([pd.NA])
else:
exp_value = getattr(ser.dropna(), op_name)()
expected = DecimalArray([exp_value])

tm.assert_extension_array_equal(result1, expected)

def test_reduction_without_keepdims(self):
# GH52788
# test _reduce without keepdims
Expand All @@ -205,7 +184,14 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):


class TestNumericReduce(Reduce, base.BaseNumericReduceTests):
pass
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_frame(self, data, all_numeric_reductions, skipna):
op_name = all_numeric_reductions
if op_name in ["skew", "median"]:
assert not hasattr(data, op_name)
pytest.skip(f"{op_name} not an array method")

return super().test_reduce_frame(data, all_numeric_reductions, skipna)


class TestBooleanReduce(Reduce, base.BaseBooleanReduceTests):
Expand Down
26 changes: 9 additions & 17 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,15 +499,7 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna, request):
request.node.add_marker(xfail_mark)
super().test_reduce_series(data, all_numeric_reductions, skipna)

def check_reduce_frame(self, ser, op_name, skipna):
arr = ser.array

if op_name in ["count", "kurt", "sem", "skew"]:
assert not hasattr(arr, op_name)
return

kwargs = {"ddof": 1} if op_name in ["var", "std"] else {}

def _get_expected_reduction_dtype(self, arr, op_name: str):
if op_name in ["max", "min"]:
cmp_dtype = arr.dtype
elif arr.dtype.name == "decimal128(7, 3)[pyarrow]":
Expand All @@ -523,15 +515,15 @@ def check_reduce_frame(self, ser, op_name, skipna):
"u": "uint64[pyarrow]",
"f": "float64[pyarrow]",
}[arr.dtype.kind]
result = arr._reduce(op_name, skipna=skipna, keepdims=True, **kwargs)
return cmp_dtype

if not skipna and ser.isna().any():
expected = pd.array([pd.NA], dtype=cmp_dtype)
else:
exp_value = getattr(ser.dropna().astype(cmp_dtype), op_name)(**kwargs)
expected = pd.array([exp_value], dtype=cmp_dtype)

tm.assert_extension_array_equal(result, expected)
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_frame(self, data, all_numeric_reductions, skipna):
op_name = all_numeric_reductions
if op_name == "skew":
assert not hasattr(data, op_name)
return
return super().test_reduce_frame(data, all_numeric_reductions, skipna)

@pytest.mark.parametrize("typ", ["int64", "uint64", "float64"])
def test_median_not_approximate(self, typ):
Expand Down
17 changes: 2 additions & 15 deletions pandas/tests/extension/test_boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,13 +235,7 @@ def check_reduce(self, s, op_name, skipna):
expected = bool(expected)
tm.assert_almost_equal(result, expected)

def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
arr = ser.array

if op_name in ["count", "kurt", "sem"]:
assert not hasattr(arr, op_name)
pytest.skip(f"{op_name} not an array method")

def _get_expected_reduction_dtype(self, arr, op_name: str):
if op_name in ["mean", "median", "var", "std", "skew"]:
cmp_dtype = "Float64"
elif op_name in ["min", "max"]:
Expand All @@ -251,14 +245,7 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
cmp_dtype = "Int32" if is_windows_or_32bit else "Int64"
else:
raise TypeError("not supposed to reach this")

result = arr._reduce(op_name, skipna=skipna, keepdims=True)
if not skipna and ser.isna().any():
expected = pd.array([pd.NA], dtype=cmp_dtype)
else:
exp_value = getattr(ser.dropna().astype(cmp_dtype), op_name)()
expected = pd.array([exp_value], dtype=cmp_dtype)
tm.assert_extension_array_equal(result, expected)
return cmp_dtype


class TestBooleanReduce(base.BaseBooleanReduceTests):
Expand Down
26 changes: 4 additions & 22 deletions pandas/tests/extension/test_masked_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
)
from pandas.tests.extension import base

is_windows_or_32bit = is_platform_windows() or not IS64

pytestmark = [
pytest.mark.filterwarnings(
"ignore:invalid value encountered in divide:RuntimeWarning"
Expand Down Expand Up @@ -246,16 +248,7 @@ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
expected = pd.NA
tm.assert_almost_equal(result, expected)

def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
if op_name in ["count", "kurt", "sem"]:
assert not hasattr(ser.array, op_name)
pytest.skip(f"{op_name} not an array method")

arr = ser.array
df = pd.DataFrame({"a": arr})

is_windows_or_32bit = is_platform_windows() or not IS64

def _get_expected_reduction_dtype(self, arr, op_name: str):
if tm.is_float_dtype(arr.dtype):
cmp_dtype = arr.dtype.name
elif op_name in ["mean", "median", "var", "std", "skew"]:
Expand All @@ -270,18 +263,7 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
cmp_dtype = "UInt32" if is_windows_or_32bit else "UInt64"
else:
raise TypeError("not supposed to reach this")

if not skipna and ser.isna().any():
expected = pd.array([pd.NA], dtype=cmp_dtype)
else:
exp_value = getattr(ser.dropna().astype(cmp_dtype), op_name)()
expected = pd.array([exp_value], dtype=cmp_dtype)

result1 = arr._reduce(op_name, skipna=skipna, keepdims=True)
result2 = getattr(df, op_name)(skipna=skipna).array

tm.assert_extension_array_equal(result1, result2)
tm.assert_extension_array_equal(result2, expected)
return cmp_dtype


@pytest.mark.skip(reason="Tested in tests/reductions/test_reductions.py")
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/extension/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,8 @@ def check_reduce(self, s, op_name, skipna):
tm.assert_almost_equal(result, expected)

@pytest.mark.skip("tests not written yet")
def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_frame(self, data, all_numeric_reductions, skipna):
pass

@pytest.mark.parametrize("skipna", [True, False])
Expand Down