Skip to content

Commit 8b227f3

Browse files
authored
BUG/TST: Include sem & count in all_numeric_reductions (#49759)
* CLN: Fixture reduction * BUG/TST: Include sem & count in all_numeric_reductions * Add xfails * Make more generic, and fix whatsnew * Fix commment typo
1 parent dd13032 commit 8b227f3

File tree

12 files changed

+72
-60
lines changed

12 files changed

+72
-60
lines changed

doc/source/whatsnew/v2.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -652,7 +652,7 @@ Numeric
652652
^^^^^^^
653653
- Bug in :meth:`DataFrame.add` cannot apply ufunc when inputs contain mixed DataFrame type and Series type (:issue:`39853`)
654654
- Bug in DataFrame reduction methods (e.g. :meth:`DataFrame.sum`) with object dtype, ``axis=1`` and ``numeric_only=False`` would not be coerced to float (:issue:`49551`)
655-
-
655+
- Bug in :meth:`DataFrame.sem` and :meth:`Series.sem` where an erroneous ``TypeError`` would always raise when using data backed by an :class:`ArrowDtype` (:issue:`49759`)
656656

657657
Conversion
658658
^^^^^^^^^^

pandas/conftest.py

+2
Original file line numberDiff line numberDiff line change
@@ -1034,6 +1034,7 @@ def all_arithmetic_functions(request):
10341034

10351035

10361036
_all_numeric_reductions = [
1037+
"count",
10371038
"sum",
10381039
"max",
10391040
"min",
@@ -1044,6 +1045,7 @@ def all_arithmetic_functions(request):
10441045
"median",
10451046
"kurt",
10461047
"skew",
1048+
"sem",
10471049
]
10481050

10491051

pandas/core/arrays/arrow/array.py

+3-7
Original file line numberDiff line numberDiff line change
@@ -842,13 +842,9 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
842842
"""
843843
if name == "sem":
844844

845-
def pyarrow_meth(data, skipna, **kwargs):
846-
numerator = pc.stddev(data, skip_nulls=skipna, **kwargs)
847-
denominator = pc.sqrt_checked(
848-
pc.subtract_checked(
849-
pc.count(self._data, skip_nulls=skipna), kwargs["ddof"]
850-
)
851-
)
845+
def pyarrow_meth(data, skip_nulls, **kwargs):
846+
numerator = pc.stddev(data, skip_nulls=skip_nulls, **kwargs)
847+
denominator = pc.sqrt_checked(pc.count(self._data))
852848
return pc.divide_checked(numerator, denominator)
853849

854850
else:

pandas/tests/arrays/boolean/test_reduction.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,11 @@ def test_reductions_return_types(dropna, data, all_numeric_reductions):
5050
if dropna:
5151
s = s.dropna()
5252

53-
if op == "sum":
54-
assert isinstance(getattr(s, op)(), np.int_)
55-
elif op == "prod":
53+
if op in ("sum", "prod"):
5654
assert isinstance(getattr(s, op)(), np.int_)
55+
elif op == "count":
56+
# Oddly on the 32 bit build (but not Windows), this is intc (!= intp)
57+
assert isinstance(getattr(s, op)(), np.integer)
5758
elif op in ("min", "max"):
5859
assert isinstance(getattr(s, op)(), np.bool_)
5960
else:

pandas/tests/extension/base/reduce.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,14 @@ class BaseReduceTests(BaseExtensionTests):
1414
"""
1515

1616
def check_reduce(self, s, op_name, skipna):
17-
result = getattr(s, op_name)(skipna=skipna)
18-
expected = getattr(s.astype("float64"), op_name)(skipna=skipna)
17+
res_op = getattr(s, op_name)
18+
exp_op = getattr(s.astype("float64"), op_name)
19+
if op_name == "count":
20+
result = res_op()
21+
expected = exp_op()
22+
else:
23+
result = res_op(skipna=skipna)
24+
expected = exp_op(skipna=skipna)
1925
tm.assert_almost_equal(result, expected)
2026

2127

pandas/tests/extension/decimal/test_decimal.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,14 @@ class TestMissing(base.BaseMissingTests):
112112
class Reduce:
113113
def check_reduce(self, s, op_name, skipna):
114114

115-
if op_name in ["median", "skew", "kurt"]:
115+
if op_name in ["median", "skew", "kurt", "sem"]:
116116
msg = r"decimal does not support the .* operation"
117117
with pytest.raises(NotImplementedError, match=msg):
118118
getattr(s, op_name)(skipna=skipna)
119-
119+
elif op_name == "count":
120+
result = getattr(s, op_name)()
121+
expected = len(s) - s.isna().sum()
122+
tm.assert_almost_equal(result, expected)
120123
else:
121124
result = getattr(s, op_name)(skipna=skipna)
122125
expected = getattr(np.asarray(s), op_name)()

pandas/tests/extension/test_arrow.py

+28-12
Original file line numberDiff line numberDiff line change
@@ -346,15 +346,21 @@ def test_getitem_scalar(self, data):
346346
class TestBaseNumericReduce(base.BaseNumericReduceTests):
347347
def check_reduce(self, ser, op_name, skipna):
348348
pa_dtype = ser.dtype.pyarrow_dtype
349-
result = getattr(ser, op_name)(skipna=skipna)
349+
if op_name == "count":
350+
result = getattr(ser, op_name)()
351+
else:
352+
result = getattr(ser, op_name)(skipna=skipna)
350353
if pa.types.is_boolean(pa_dtype):
351354
# Can't convert if ser contains NA
352355
pytest.skip(
353356
"pandas boolean data with NA does not fully support all reductions"
354357
)
355358
elif pa.types.is_integer(pa_dtype) or pa.types.is_floating(pa_dtype):
356359
ser = ser.astype("Float64")
357-
expected = getattr(ser, op_name)(skipna=skipna)
360+
if op_name == "count":
361+
expected = getattr(ser, op_name)()
362+
else:
363+
expected = getattr(ser, op_name)(skipna=skipna)
358364
tm.assert_almost_equal(result, expected)
359365

360366
@pytest.mark.parametrize("skipna", [True, False])
@@ -374,6 +380,8 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna, request):
374380
and pa_version_under6p0
375381
):
376382
request.node.add_marker(xfail_mark)
383+
elif all_numeric_reductions == "sem" and pa_version_under8p0:
384+
request.node.add_marker(xfail_mark)
377385
elif (
378386
all_numeric_reductions in {"sum", "mean"}
379387
and skipna is False
@@ -389,20 +397,28 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna, request):
389397
),
390398
)
391399
)
392-
elif not (
393-
pa.types.is_integer(pa_dtype)
394-
or pa.types.is_floating(pa_dtype)
395-
or pa.types.is_boolean(pa_dtype)
396-
) and not (
397-
all_numeric_reductions in {"min", "max"}
398-
and (
399-
(pa.types.is_temporal(pa_dtype) and not pa.types.is_duration(pa_dtype))
400-
or pa.types.is_string(pa_dtype)
401-
or pa.types.is_binary(pa_dtype)
400+
elif (
401+
not (
402+
pa.types.is_integer(pa_dtype)
403+
or pa.types.is_floating(pa_dtype)
404+
or pa.types.is_boolean(pa_dtype)
405+
)
406+
and not (
407+
all_numeric_reductions in {"min", "max"}
408+
and (
409+
(
410+
pa.types.is_temporal(pa_dtype)
411+
and not pa.types.is_duration(pa_dtype)
412+
)
413+
or pa.types.is_string(pa_dtype)
414+
or pa.types.is_binary(pa_dtype)
415+
)
402416
)
417+
and not all_numeric_reductions == "count"
403418
):
404419
request.node.add_marker(xfail_mark)
405420
elif pa.types.is_boolean(pa_dtype) and all_numeric_reductions in {
421+
"sem",
406422
"std",
407423
"var",
408424
"median",

pandas/tests/extension/test_boolean.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -367,8 +367,12 @@ def test_groupby_sum_mincount(self, data_for_grouping, min_count):
367367

368368
class TestNumericReduce(base.BaseNumericReduceTests):
369369
def check_reduce(self, s, op_name, skipna):
370-
result = getattr(s, op_name)(skipna=skipna)
371-
expected = getattr(s.astype("float64"), op_name)(skipna=skipna)
370+
if op_name == "count":
371+
result = getattr(s, op_name)()
372+
expected = getattr(s.astype("float64"), op_name)()
373+
else:
374+
result = getattr(s, op_name)(skipna=skipna)
375+
expected = getattr(s.astype("float64"), op_name)(skipna=skipna)
372376
# override parent function to cast to bool for min/max
373377
if np.isnan(expected):
374378
expected = pd.NA

pandas/tests/extension/test_floating.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -188,13 +188,16 @@ class TestNumericReduce(base.BaseNumericReduceTests):
188188
def check_reduce(self, s, op_name, skipna):
189189
# overwrite to ensure pd.NA is tested instead of np.nan
190190
# https://github.com/pandas-dev/pandas/issues/30958
191-
result = getattr(s, op_name)(skipna=skipna)
192-
if not skipna and s.isna().any():
193-
expected = pd.NA
191+
if op_name == "count":
192+
result = getattr(s, op_name)()
193+
expected = getattr(s.dropna().astype(s.dtype.numpy_dtype), op_name)()
194194
else:
195+
result = getattr(s, op_name)(skipna=skipna)
195196
expected = getattr(s.dropna().astype(s.dtype.numpy_dtype), op_name)(
196197
skipna=skipna
197198
)
199+
if not skipna and s.isna().any():
200+
expected = pd.NA
198201
tm.assert_almost_equal(result, expected)
199202

200203

pandas/tests/extension/test_integer.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -211,11 +211,14 @@ class TestNumericReduce(base.BaseNumericReduceTests):
211211
def check_reduce(self, s, op_name, skipna):
212212
# overwrite to ensure pd.NA is tested instead of np.nan
213213
# https://github.com/pandas-dev/pandas/issues/30958
214-
result = getattr(s, op_name)(skipna=skipna)
215-
if not skipna and s.isna().any():
216-
expected = pd.NA
214+
if op_name == "count":
215+
result = getattr(s, op_name)()
216+
expected = getattr(s.dropna().astype("int64"), op_name)()
217217
else:
218+
result = getattr(s, op_name)(skipna=skipna)
218219
expected = getattr(s.dropna().astype("int64"), op_name)(skipna=skipna)
220+
if not skipna and s.isna().any():
221+
expected = pd.NA
219222
tm.assert_almost_equal(result, expected)
220223

221224

pandas/tests/frame/conftest.py

-22
Original file line numberDiff line numberDiff line change
@@ -259,25 +259,3 @@ def frame_of_index_cols():
259259
}
260260
)
261261
return df
262-
263-
264-
@pytest.fixture(
265-
params=[
266-
"any",
267-
"all",
268-
"count",
269-
"sum",
270-
"prod",
271-
"max",
272-
"min",
273-
"mean",
274-
"median",
275-
"skew",
276-
"kurt",
277-
"sem",
278-
"var",
279-
"std",
280-
]
281-
)
282-
def reduction_functions(request):
283-
return request.param

pandas/tests/frame/test_reductions.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1427,16 +1427,16 @@ def test_frame_any_with_timedelta(self):
14271427
tm.assert_series_equal(result, expected)
14281428

14291429
def test_reductions_skipna_none_raises(
1430-
self, request, frame_or_series, reduction_functions
1430+
self, request, frame_or_series, all_reductions
14311431
):
1432-
if reduction_functions == "count":
1432+
if all_reductions == "count":
14331433
request.node.add_marker(
14341434
pytest.mark.xfail(reason="Count does not accept skipna")
14351435
)
14361436
obj = frame_or_series([1, 2, 3])
14371437
msg = 'For argument "skipna" expected type bool, received type NoneType.'
14381438
with pytest.raises(ValueError, match=msg):
1439-
getattr(obj, reduction_functions)(skipna=None)
1439+
getattr(obj, all_reductions)(skipna=None)
14401440

14411441

14421442
class TestNuisanceColumns:

0 commit comments

Comments
 (0)