diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 20e99d007c798..94c2aa4ae24d7 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -645,7 +645,7 @@ Numeric ^^^^^^^ - Bug in :meth:`DataFrame.add` cannot apply ufunc when inputs contain mixed DataFrame type and Series type (:issue:`39853`) - Bug in DataFrame reduction methods (e.g. :meth:`DataFrame.sum`) with object dtype, ``axis=1`` and ``numeric_only=False`` would not be coerced to float (:issue:`49551`) -- +- Bug in :meth:`DataFrame.sem` and :meth:`Series.sem` where an erroneous ``TypeError`` would always raise when using data backed by an :class:`ArrowDtype` (:issue:`49759`) Conversion ^^^^^^^^^^ diff --git a/pandas/conftest.py b/pandas/conftest.py index 30ff8306a03b2..634b86659b22d 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1034,6 +1034,7 @@ def all_arithmetic_functions(request): _all_numeric_reductions = [ + "count", "sum", "max", "min", @@ -1044,6 +1045,7 @@ def all_arithmetic_functions(request): "median", "kurt", "skew", + "sem", ] diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 8d34ce1d29817..ed99f12aaad5f 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -829,13 +829,9 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs): """ if name == "sem": - def pyarrow_meth(data, skipna, **kwargs): - numerator = pc.stddev(data, skip_nulls=skipna, **kwargs) - denominator = pc.sqrt_checked( - pc.subtract_checked( - pc.count(self._data, skip_nulls=skipna), kwargs["ddof"] - ) - ) + def pyarrow_meth(data, skip_nulls, **kwargs): + numerator = pc.stddev(data, skip_nulls=skip_nulls, **kwargs) + denominator = pc.sqrt_checked(pc.count(self._data)) return pc.divide_checked(numerator, denominator) else: diff --git a/pandas/tests/arrays/boolean/test_reduction.py b/pandas/tests/arrays/boolean/test_reduction.py index f3807df929f9a..dd8c3eda9ed05 100644 --- a/pandas/tests/arrays/boolean/test_reduction.py +++ b/pandas/tests/arrays/boolean/test_reduction.py @@ -50,10 +50,11 @@ def test_reductions_return_types(dropna, data, all_numeric_reductions): if dropna: s = s.dropna() - if op == "sum": - assert isinstance(getattr(s, op)(), np.int_) - elif op == "prod": + if op in ("sum", "prod"): assert isinstance(getattr(s, op)(), np.int_) + elif op == "count": + # Oddly on the 32 bit build (but not Windows), this is intc (!= intp) + assert isinstance(getattr(s, op)(), np.integer) elif op in ("min", "max"): assert isinstance(getattr(s, op)(), np.bool_) else: diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py index e363fda650d52..cf161a7f4b906 100644 --- a/pandas/tests/extension/base/reduce.py +++ b/pandas/tests/extension/base/reduce.py @@ -14,8 +14,14 @@ class BaseReduceTests(BaseExtensionTests): """ def check_reduce(self, s, op_name, skipna): - result = getattr(s, op_name)(skipna=skipna) - expected = getattr(s.astype("float64"), op_name)(skipna=skipna) + res_op = getattr(s, op_name) + exp_op = getattr(s.astype("float64"), op_name) + if op_name == "count": + result = res_op() + expected = exp_op() + else: + result = res_op(skipna=skipna) + expected = exp_op(skipna=skipna) tm.assert_almost_equal(result, expected) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index acba1bd557351..a49f723ea7a92 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -112,11 +112,14 @@ class TestMissing(base.BaseMissingTests): class Reduce: def check_reduce(self, s, op_name, skipna): - if op_name in ["median", "skew", "kurt"]: + if op_name in ["median", "skew", "kurt", "sem"]: msg = r"decimal does not support the .* operation" with pytest.raises(NotImplementedError, match=msg): getattr(s, op_name)(skipna=skipna) - + elif op_name == "count": + result = getattr(s, op_name)() + expected = len(s) - s.isna().sum() + tm.assert_almost_equal(result, expected) else: result = getattr(s, op_name)(skipna=skipna) expected = getattr(np.asarray(s), op_name)() diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index ef5060265a0b4..e6f1675bb8bc8 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -346,7 +346,10 @@ def test_getitem_scalar(self, data): class TestBaseNumericReduce(base.BaseNumericReduceTests): def check_reduce(self, ser, op_name, skipna): pa_dtype = ser.dtype.pyarrow_dtype - result = getattr(ser, op_name)(skipna=skipna) + if op_name == "count": + result = getattr(ser, op_name)() + else: + result = getattr(ser, op_name)(skipna=skipna) if pa.types.is_boolean(pa_dtype): # Can't convert if ser contains NA pytest.skip( @@ -354,7 +357,10 @@ def check_reduce(self, ser, op_name, skipna): ) elif pa.types.is_integer(pa_dtype) or pa.types.is_floating(pa_dtype): ser = ser.astype("Float64") - expected = getattr(ser, op_name)(skipna=skipna) + if op_name == "count": + expected = getattr(ser, op_name)() + else: + expected = getattr(ser, op_name)(skipna=skipna) tm.assert_almost_equal(result, expected) @pytest.mark.parametrize("skipna", [True, False]) @@ -374,6 +380,8 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna, request): and pa_version_under6p0 ): request.node.add_marker(xfail_mark) + elif all_numeric_reductions == "sem" and pa_version_under8p0: + request.node.add_marker(xfail_mark) elif ( all_numeric_reductions in {"sum", "mean"} and skipna is False @@ -389,20 +397,28 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna, request): ), ) ) - elif not ( - pa.types.is_integer(pa_dtype) - or pa.types.is_floating(pa_dtype) - or pa.types.is_boolean(pa_dtype) - ) and not ( - all_numeric_reductions in {"min", "max"} - and ( - (pa.types.is_temporal(pa_dtype) and not pa.types.is_duration(pa_dtype)) - or pa.types.is_string(pa_dtype) - or pa.types.is_binary(pa_dtype) + elif ( + not ( + pa.types.is_integer(pa_dtype) + or pa.types.is_floating(pa_dtype) + or pa.types.is_boolean(pa_dtype) + ) + and not ( + all_numeric_reductions in {"min", "max"} + and ( + ( + pa.types.is_temporal(pa_dtype) + and not pa.types.is_duration(pa_dtype) + ) + or pa.types.is_string(pa_dtype) + or pa.types.is_binary(pa_dtype) + ) ) + and not all_numeric_reductions == "count" ): request.node.add_marker(xfail_mark) elif pa.types.is_boolean(pa_dtype) and all_numeric_reductions in { + "sem", "std", "var", "median", diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py index b846028dab947..9646ade43e1d7 100644 --- a/pandas/tests/extension/test_boolean.py +++ b/pandas/tests/extension/test_boolean.py @@ -367,8 +367,12 @@ def test_groupby_sum_mincount(self, data_for_grouping, min_count): class TestNumericReduce(base.BaseNumericReduceTests): def check_reduce(self, s, op_name, skipna): - result = getattr(s, op_name)(skipna=skipna) - expected = getattr(s.astype("float64"), op_name)(skipna=skipna) + if op_name == "count": + result = getattr(s, op_name)() + expected = getattr(s.astype("float64"), op_name)() + else: + result = getattr(s, op_name)(skipna=skipna) + expected = getattr(s.astype("float64"), op_name)(skipna=skipna) # override parent function to cast to bool for min/max if np.isnan(expected): expected = pd.NA diff --git a/pandas/tests/extension/test_floating.py b/pandas/tests/extension/test_floating.py index 0d88822009a90..580ab743a9d93 100644 --- a/pandas/tests/extension/test_floating.py +++ b/pandas/tests/extension/test_floating.py @@ -188,13 +188,16 @@ class TestNumericReduce(base.BaseNumericReduceTests): def check_reduce(self, s, op_name, skipna): # overwrite to ensure pd.NA is tested instead of np.nan # https://github.com/pandas-dev/pandas/issues/30958 - result = getattr(s, op_name)(skipna=skipna) - if not skipna and s.isna().any(): - expected = pd.NA + if op_name == "count": + result = getattr(s, op_name)() + expected = getattr(s.dropna().astype(s.dtype.numpy_dtype), op_name)() else: + result = getattr(s, op_name)(skipna=skipna) expected = getattr(s.dropna().astype(s.dtype.numpy_dtype), op_name)( skipna=skipna ) + if not skipna and s.isna().any(): + expected = pd.NA tm.assert_almost_equal(result, expected) diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index a6cf820dc7609..ba6daf4f2e189 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -211,11 +211,14 @@ class TestNumericReduce(base.BaseNumericReduceTests): def check_reduce(self, s, op_name, skipna): # overwrite to ensure pd.NA is tested instead of np.nan # https://github.com/pandas-dev/pandas/issues/30958 - result = getattr(s, op_name)(skipna=skipna) - if not skipna and s.isna().any(): - expected = pd.NA + if op_name == "count": + result = getattr(s, op_name)() + expected = getattr(s.dropna().astype("int64"), op_name)() else: + result = getattr(s, op_name)(skipna=skipna) expected = getattr(s.dropna().astype("int64"), op_name)(skipna=skipna) + if not skipna and s.isna().any(): + expected = pd.NA tm.assert_almost_equal(result, expected) diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py index 2cfa295d939a8..97cf75acbd629 100644 --- a/pandas/tests/frame/conftest.py +++ b/pandas/tests/frame/conftest.py @@ -259,25 +259,3 @@ def frame_of_index_cols(): } ) return df - - -@pytest.fixture( - params=[ - "any", - "all", - "count", - "sum", - "prod", - "max", - "min", - "mean", - "median", - "skew", - "kurt", - "sem", - "var", - "std", - ] -) -def reduction_functions(request): - return request.param diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index e420274e7fd82..6c6a923e363ae 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1427,16 +1427,16 @@ def test_frame_any_with_timedelta(self): tm.assert_series_equal(result, expected) def test_reductions_skipna_none_raises( - self, request, frame_or_series, reduction_functions + self, request, frame_or_series, all_reductions ): - if reduction_functions == "count": + if all_reductions == "count": request.node.add_marker( pytest.mark.xfail(reason="Count does not accept skipna") ) obj = frame_or_series([1, 2, 3]) msg = 'For argument "skipna" expected type bool, received type NoneType.' with pytest.raises(ValueError, match=msg): - getattr(obj, reduction_functions)(skipna=None) + getattr(obj, all_reductions)(skipna=None) class TestNuisanceColumns: