BUG/TST: Include sem & count in all_numeric_reductions (#49759)

mroeschke · web-flow · commit 8b227f39cecc · 2022-11-28T14:55:48.000-08:00
* CLN: Fixture reduction

* BUG/TST: Include sem &amp; count in all_numeric_reductions

* Add xfails

* Make more generic, and fix whatsnew

* Fix commment typo
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -652,7 +652,7 @@ Numeric
 ^^^^^^^
 - Bug in :meth:`DataFrame.add` cannot apply ufunc when inputs contain mixed DataFrame type and Series type (:issue:`39853`)
 - Bug in DataFrame reduction methods (e.g. :meth:`DataFrame.sum`) with object dtype, ``axis=1`` and ``numeric_only=False`` would not be coerced to float (:issue:`49551`)
--
+- Bug in :meth:`DataFrame.sem` and :meth:`Series.sem` where an erroneous ``TypeError`` would always raise when using data backed by an :class:`ArrowDtype` (:issue:`49759`)
 
 Conversion
 ^^^^^^^^^^
diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -1034,6 +1034,7 @@ def all_arithmetic_functions(request):
 
 
 _all_numeric_reductions = [
+    "count",
     "sum",
     "max",
     "min",
@@ -1044,6 +1045,7 @@ def all_arithmetic_functions(request):
     "median",
     "kurt",
     "skew",
+    "sem",
 ]
 
 
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -842,13 +842,9 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
         """
         if name == "sem":
 
-            def pyarrow_meth(data, skipna, **kwargs):
-                numerator = pc.stddev(data, skip_nulls=skipna, **kwargs)
-                denominator = pc.sqrt_checked(
-                    pc.subtract_checked(
-                        pc.count(self._data, skip_nulls=skipna), kwargs["ddof"]
-                    )
-                )
+            def pyarrow_meth(data, skip_nulls, **kwargs):
+                numerator = pc.stddev(data, skip_nulls=skip_nulls, **kwargs)
+                denominator = pc.sqrt_checked(pc.count(self._data))
                 return pc.divide_checked(numerator, denominator)
 
         else:
diff --git a/pandas/tests/arrays/boolean/test_reduction.py b/pandas/tests/arrays/boolean/test_reduction.py
@@ -50,10 +50,11 @@ def test_reductions_return_types(dropna, data, all_numeric_reductions):
     if dropna:
         s = s.dropna()
 
-    if op == "sum":
-        assert isinstance(getattr(s, op)(), np.int_)
-    elif op == "prod":
+    if op in ("sum", "prod"):
         assert isinstance(getattr(s, op)(), np.int_)
+    elif op == "count":
+        # Oddly on the 32 bit build (but not Windows), this is intc (!= intp)
+        assert isinstance(getattr(s, op)(), np.integer)
     elif op in ("min", "max"):
         assert isinstance(getattr(s, op)(), np.bool_)
     else:
diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py
@@ -14,8 +14,14 @@ class BaseReduceTests(BaseExtensionTests):
     """
 
     def check_reduce(self, s, op_name, skipna):
-        result = getattr(s, op_name)(skipna=skipna)
-        expected = getattr(s.astype("float64"), op_name)(skipna=skipna)
+        res_op = getattr(s, op_name)
+        exp_op = getattr(s.astype("float64"), op_name)
+        if op_name == "count":
+            result = res_op()
+            expected = exp_op()
+        else:
+            result = res_op(skipna=skipna)
+            expected = exp_op(skipna=skipna)
         tm.assert_almost_equal(result, expected)
 
 
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
@@ -112,11 +112,14 @@ class TestMissing(base.BaseMissingTests):
 class Reduce:
     def check_reduce(self, s, op_name, skipna):
 
-        if op_name in ["median", "skew", "kurt"]:
+        if op_name in ["median", "skew", "kurt", "sem"]:
             msg = r"decimal does not support the .* operation"
             with pytest.raises(NotImplementedError, match=msg):
                 getattr(s, op_name)(skipna=skipna)
-
+        elif op_name == "count":
+            result = getattr(s, op_name)()
+            expected = len(s) - s.isna().sum()
+            tm.assert_almost_equal(result, expected)
         else:
             result = getattr(s, op_name)(skipna=skipna)
             expected = getattr(np.asarray(s), op_name)()
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
@@ -346,15 +346,21 @@ def test_getitem_scalar(self, data):
 class TestBaseNumericReduce(base.BaseNumericReduceTests):
     def check_reduce(self, ser, op_name, skipna):
         pa_dtype = ser.dtype.pyarrow_dtype
-        result = getattr(ser, op_name)(skipna=skipna)
+        if op_name == "count":
+            result = getattr(ser, op_name)()
+        else:
+            result = getattr(ser, op_name)(skipna=skipna)
         if pa.types.is_boolean(pa_dtype):
             # Can't convert if ser contains NA
             pytest.skip(
                 "pandas boolean data with NA does not fully support all reductions"
             )
         elif pa.types.is_integer(pa_dtype) or pa.types.is_floating(pa_dtype):
             ser = ser.astype("Float64")
-        expected = getattr(ser, op_name)(skipna=skipna)
+        if op_name == "count":
+            expected = getattr(ser, op_name)()
+        else:
+            expected = getattr(ser, op_name)(skipna=skipna)
         tm.assert_almost_equal(result, expected)
 
     @pytest.mark.parametrize("skipna", [True, False])
@@ -374,6 +380,8 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna, request):
             and pa_version_under6p0
         ):
             request.node.add_marker(xfail_mark)
+        elif all_numeric_reductions == "sem" and pa_version_under8p0:
+            request.node.add_marker(xfail_mark)
         elif (
             all_numeric_reductions in {"sum", "mean"}
             and skipna is False
@@ -389,20 +397,28 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna, request):
                     ),
                 )
             )
-        elif not (
-            pa.types.is_integer(pa_dtype)
-            or pa.types.is_floating(pa_dtype)
-            or pa.types.is_boolean(pa_dtype)
-        ) and not (
-            all_numeric_reductions in {"min", "max"}
-            and (
-                (pa.types.is_temporal(pa_dtype) and not pa.types.is_duration(pa_dtype))
-                or pa.types.is_string(pa_dtype)
-                or pa.types.is_binary(pa_dtype)
+        elif (
+            not (
+                pa.types.is_integer(pa_dtype)
+                or pa.types.is_floating(pa_dtype)
+                or pa.types.is_boolean(pa_dtype)
+            )
+            and not (
+                all_numeric_reductions in {"min", "max"}
+                and (
+                    (
+                        pa.types.is_temporal(pa_dtype)
+                        and not pa.types.is_duration(pa_dtype)
+                    )
+                    or pa.types.is_string(pa_dtype)
+                    or pa.types.is_binary(pa_dtype)
+                )
             )
+            and not all_numeric_reductions == "count"
         ):
             request.node.add_marker(xfail_mark)
         elif pa.types.is_boolean(pa_dtype) and all_numeric_reductions in {
+            "sem",
             "std",
             "var",
             "median",
diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py
@@ -367,8 +367,12 @@ def test_groupby_sum_mincount(self, data_for_grouping, min_count):
 
 class TestNumericReduce(base.BaseNumericReduceTests):
     def check_reduce(self, s, op_name, skipna):
-        result = getattr(s, op_name)(skipna=skipna)
-        expected = getattr(s.astype("float64"), op_name)(skipna=skipna)
+        if op_name == "count":
+            result = getattr(s, op_name)()
+            expected = getattr(s.astype("float64"), op_name)()
+        else:
+            result = getattr(s, op_name)(skipna=skipna)
+            expected = getattr(s.astype("float64"), op_name)(skipna=skipna)
         # override parent function to cast to bool for min/max
         if np.isnan(expected):
             expected = pd.NA
diff --git a/pandas/tests/extension/test_floating.py b/pandas/tests/extension/test_floating.py
@@ -188,13 +188,16 @@ class TestNumericReduce(base.BaseNumericReduceTests):
     def check_reduce(self, s, op_name, skipna):
         # overwrite to ensure pd.NA is tested instead of np.nan
         # https://github.com/pandas-dev/pandas/issues/30958
-        result = getattr(s, op_name)(skipna=skipna)
-        if not skipna and s.isna().any():
-            expected = pd.NA
+        if op_name == "count":
+            result = getattr(s, op_name)()
+            expected = getattr(s.dropna().astype(s.dtype.numpy_dtype), op_name)()
         else:
+            result = getattr(s, op_name)(skipna=skipna)
             expected = getattr(s.dropna().astype(s.dtype.numpy_dtype), op_name)(
                 skipna=skipna
             )
+            if not skipna and s.isna().any():
+                expected = pd.NA
         tm.assert_almost_equal(result, expected)
 
 
diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py
@@ -211,11 +211,14 @@ class TestNumericReduce(base.BaseNumericReduceTests):
     def check_reduce(self, s, op_name, skipna):
         # overwrite to ensure pd.NA is tested instead of np.nan
         # https://github.com/pandas-dev/pandas/issues/30958
-        result = getattr(s, op_name)(skipna=skipna)
-        if not skipna and s.isna().any():
-            expected = pd.NA
+        if op_name == "count":
+            result = getattr(s, op_name)()
+            expected = getattr(s.dropna().astype("int64"), op_name)()
         else:
+            result = getattr(s, op_name)(skipna=skipna)
             expected = getattr(s.dropna().astype("int64"), op_name)(skipna=skipna)
+            if not skipna and s.isna().any():
+                expected = pd.NA
         tm.assert_almost_equal(result, expected)
 
 
diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py
@@ -259,25 +259,3 @@ def frame_of_index_cols():
         }
     )
     return df
-
-
-@pytest.fixture(
-    params=[
-        "any",
-        "all",
-        "count",
-        "sum",
-        "prod",
-        "max",
-        "min",
-        "mean",
-        "median",
-        "skew",
-        "kurt",
-        "sem",
-        "var",
-        "std",
-    ]
-)
-def reduction_functions(request):
-    return request.param
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
@@ -1427,16 +1427,16 @@ def test_frame_any_with_timedelta(self):
         tm.assert_series_equal(result, expected)
 
     def test_reductions_skipna_none_raises(
-        self, request, frame_or_series, reduction_functions
+        self, request, frame_or_series, all_reductions
     ):
-        if reduction_functions == "count":
+        if all_reductions == "count":
             request.node.add_marker(
                 pytest.mark.xfail(reason="Count does not accept skipna")
             )
         obj = frame_or_series([1, 2, 3])
         msg = 'For argument "skipna" expected type bool, received type NoneType.'
         with pytest.raises(ValueError, match=msg):
-            getattr(obj, reduction_functions)(skipna=None)
+            getattr(obj, all_reductions)(skipna=None)
 
 
 class TestNuisanceColumns: