From 23ed323e10bd57f67876a3f29fecee3be8e01a4c Mon Sep 17 00:00:00 2001 From: richard Date: Sat, 5 Nov 2022 07:21:29 -0400 Subject: [PATCH 1/8] WIP --- pandas/core/frame.py | 52 ++-- pandas/core/generic.py | 2 +- pandas/tests/apply/test_frame_apply.py | 18 +- pandas/tests/frame/methods/test_quantile.py | 16 +- pandas/tests/frame/test_reductions.py | 296 +++++++++----------- pandas/tests/groupby/test_apply.py | 7 +- pandas/tests/groupby/test_categorical.py | 2 +- 7 files changed, 166 insertions(+), 227 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 58859054943b3..8c56d7e9d1c9b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -10739,7 +10739,7 @@ def _get_data() -> DataFrame: # After possibly _get_data and transposing, we are now in the # simple case where we can use BlockManager.reduce - res, _ = df._mgr.reduce(blk_func, ignore_failures=ignore_failures) + res, _ = df._mgr.reduce(blk_func, ignore_failures=False) out = df._constructor(res).iloc[0] if out_dtype is not None: out = out.astype(out_dtype) @@ -10761,31 +10761,31 @@ def _get_data() -> DataFrame: data = self values = data.values - try: - result = func(values) - - except TypeError: - # e.g. in nanops trying to convert strs to float - - data = _get_data() - labels = data._get_agg_axis(axis) - - values = data.values - with np.errstate(all="ignore"): - result = func(values) - - # columns have been dropped GH#41480 - arg_name = "numeric_only" - if name in ["all", "any"]: - arg_name = "bool_only" - warnings.warn( - "Dropping of nuisance columns in DataFrame reductions " - f"(with '{arg_name}=None') is deprecated; in a future " - "version this will raise TypeError. Select only valid " - "columns before calling the reduction.", - FutureWarning, - stacklevel=find_stack_level(), - ) + # try: + result = func(values) + + # except TypeError: + # # e.g. in nanops trying to convert strs to float + # + # data = _get_data() + # labels = data._get_agg_axis(axis) + # + # values = data.values + # with np.errstate(all="ignore"): + # result = func(values) + # + # # columns have been dropped GH#41480 + # arg_name = "numeric_only" + # if name in ["all", "any"]: + # arg_name = "bool_only" + # warnings.warn( + # "Dropping of nuisance columns in DataFrame reductions " + # f"(with '{arg_name}=None') is deprecated; in a future " + # "version this will raise TypeError. Select only valid " + # "columns before calling the reduction.", + # FutureWarning, + # stacklevel=find_stack_level(), + # ) if hasattr(result, "dtype"): if filter_type == "bool" and notna(result).all(): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f88fe35360074..4c930a6d9235c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11549,7 +11549,7 @@ def mean( axis: AxisInt | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, level=None, - numeric_only=None, + numeric_only=False, **kwargs, ): return NDFrame.mean(self, axis, skipna, level, numeric_only, **kwargs) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index a1802a2fcb674..3e54c06ae8ed8 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1283,8 +1283,11 @@ def test_nuiscance_columns(): ) tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match="Select only valid"): - result = df.agg("sum") + msg = "DataFrame constructor called with incompatible data and dtype" + with pytest.raises(TypeError, match=msg): + df.agg("sum") + + result = df[["A", "B", "C"]].agg("sum") expected = Series([6, 6.0, "foobarbaz"], index=["A", "B", "C"]) tm.assert_series_equal(result, expected) @@ -1426,17 +1429,6 @@ def test_apply_datetime_tz_issue(): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("df", [DataFrame({"A": ["a", None], "B": ["c", "d"]})]) -@pytest.mark.parametrize("method", ["min", "max", "sum"]) -def test_consistency_of_aggregates_of_columns_with_missing_values(df, method): - # GH 16832 - with tm.assert_produces_warning(FutureWarning, match="Select only valid"): - none_in_first_column_result = getattr(df[["A", "B"]], method)() - none_in_second_column_result = getattr(df[["B", "A"]], method)() - - tm.assert_series_equal(none_in_first_column_result, none_in_second_column_result) - - @pytest.mark.parametrize("col", [1, 1.0, True, "a", np.nan]) def test_apply_dtype(col): # GH 31466 diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 3beb201bcfa05..3b2b291130163 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -133,21 +133,11 @@ def test_empty(self, interp_method): ) assert np.isnan(q["x"]) and np.isnan(q["y"]) - def test_non_numeric_exclusion(self, interp_method, request, using_array_manager): + def test_non_numeric(self, interp_method, request, using_array_manager): interpolation, method = interp_method df = DataFrame({"col1": ["A", "A", "B", "B"], "col2": [1, 2, 3, 4]}) - rs = df.quantile( - 0.5, numeric_only=True, interpolation=interpolation, method=method - ) - with tm.assert_produces_warning(FutureWarning, match="Select only valid"): - xp = df.median().rename(0.5) - if interpolation == "nearest": - xp = (xp + 0.5).astype(np.int64) - if method == "table" and using_array_manager: - request.node.add_marker( - pytest.mark.xfail(reason="Axis name incorrectly set.") - ) - tm.assert_series_equal(rs, xp) + with pytest.raises(TypeError, match="could not convert"): + df.median().rename(0.5) def test_axis(self, interp_method, request, using_array_manager): # axis diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 8d4d705296f35..5071a1b7dbdc0 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -169,15 +169,23 @@ class TestDataFrameAnalytics: ], ) def test_stat_op_api_float_string_frame(self, float_string_frame, axis, opname): - if opname in ["sum", "min", "max"] and axis == 0: - warn = None - elif opname not in ["count", "nunique"]: - warn = FutureWarning - else: - warn = None - msg = "nuisance columns|default value of numeric_only" - with tm.assert_produces_warning(warn, match=msg): + if (opname in ("sum", "min", "max") and axis == 0) or opname in ( + "count", + "nunique", + ): getattr(float_string_frame, opname)(axis=axis) + else: + msg = "|".join( + [ + "Could not convert", + "could not convert", + "can't multiply sequence by non-int", + "unsupported operand type", + "not supported between instances of", + ] + ) + with pytest.raises(TypeError, match=msg): + print(getattr(float_string_frame, opname)(axis=axis)) if opname != "nunique": getattr(float_string_frame, opname)(axis=axis, numeric_only=True) @@ -337,18 +345,26 @@ def test_mixed_ops(self, op): "str": ["a", "b", "c", "d"], } ) - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns" - ): - result = getattr(df, op)() - assert len(result) == 2 + msg = "|".join( + [ + "Could not convert", + "could not convert", + "can't multiply sequence by non-int", + ] + ) + with pytest.raises(TypeError, match=msg): + getattr(df, op)() with pd.option_context("use_bottleneck", False): - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns" - ): - result = getattr(df, op)() - assert len(result) == 2 + msg = "|".join( + [ + "Could not convert", + "could not convert", + "can't multiply sequence by non-int", + ] + ) + with pytest.raises(TypeError, match=msg): + getattr(df, op)() def test_reduce_mixed_frame(self): # GH 6806 @@ -416,10 +432,9 @@ def test_mean_mixed_string_decimal(self): df = DataFrame(d) - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns" - ): - result = df.mean() + with pytest.raises(TypeError, match="unsupported operand type"): + df.mean() + result = df[["A", "C"]].mean() expected = Series([2.7, 681.6], index=["A", "C"]) tm.assert_series_equal(result, expected) @@ -648,9 +663,8 @@ def test_operators_timedelta64(self): ) tm.assert_series_equal(result, expected) - # excludes numeric - with tm.assert_produces_warning(FutureWarning, match="Select only valid"): - result = mixed.min(axis=1) + # excludes non-numeric + result = mixed.min(axis=1, numeric_only=True) expected = Series([1, 1, 1.0], index=[0, 1, 2]) tm.assert_series_equal(result, expected) @@ -819,25 +833,17 @@ def test_sum_mixed_datetime(self): df = DataFrame({"A": date_range("2000", periods=4), "B": [1, 2, 3, 4]}).reindex( [2, 3, 4] ) - with tm.assert_produces_warning(FutureWarning, match="Select only valid"): - result = df.sum() - - expected = Series({"B": 7.0}) - tm.assert_series_equal(result, expected) + with pytest.raises(TypeError, match="does not support reduction 'sum'"): + df.sum() def test_mean_corner(self, float_frame, float_string_frame): # unit test when have object data - with tm.assert_produces_warning(FutureWarning, match="Select only valid"): - the_mean = float_string_frame.mean(axis=0) - the_sum = float_string_frame.sum(axis=0, numeric_only=True) - tm.assert_index_equal(the_sum.index, the_mean.index) - assert len(the_mean.index) < len(float_string_frame.columns) + with pytest.raises(TypeError, match="Could not convert"): + float_string_frame.mean(axis=0) # xs sum mixed type, just want to know it works... - with tm.assert_produces_warning(FutureWarning, match="Select only valid"): - the_mean = float_string_frame.mean(axis=1) - the_sum = float_string_frame.sum(axis=1, numeric_only=True) - tm.assert_index_equal(the_sum.index, the_mean.index) + with pytest.raises(TypeError, match="unsupported operand type"): + float_string_frame.mean(axis=1) # take mean of boolean column float_frame["bool"] = float_frame["A"] > 0 @@ -861,10 +867,8 @@ def test_mean_datetimelike(self): expected = Series({"A": 1.0}) tm.assert_series_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match="Select only valid"): - result = df.mean() - expected = Series({"A": 1.0, "B": df.loc[1, "B"], "C": df.loc[1, "C"]}) - tm.assert_series_equal(result, expected) + with pytest.raises(TypeError, match="mean is not implemented for PeriodArray"): + df.mean() def test_mean_datetimelike_numeric_only_false(self): df = DataFrame( @@ -895,13 +899,13 @@ def test_mean_extensionarray_numeric_only_true(self): tm.assert_series_equal(result, expected) def test_stats_mixed_type(self, float_string_frame): - # don't blow up - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns" - ): + with pytest.raises(TypeError, match="could not convert"): float_string_frame.std(1) + with pytest.raises(TypeError, match="could not convert"): float_string_frame.var(1) + with pytest.raises(TypeError, match="unsupported operand type"): float_string_frame.mean(1) + with pytest.raises(TypeError, match="could not convert"): float_string_frame.skew(1) def test_sum_bools(self): @@ -1250,24 +1254,26 @@ def test_any_all_np_func(self, func, data, expected): # GH 19976 data = DataFrame(data) - warn = None if any(is_categorical_dtype(x) for x in data.dtypes): - warn = FutureWarning + with pytest.raises( + TypeError, match="dtype category does not support reduction" + ): + func(data) - with tm.assert_produces_warning( - warn, match="Select only valid columns", check_stacklevel=False - ): + # method version + with pytest.raises( + TypeError, match="dtype category does not support reduction" + ): + getattr(DataFrame(data), func.__name__)(axis=None) + else: result = func(data) - assert isinstance(result, np.bool_) - assert result.item() is expected + assert isinstance(result, np.bool_) + assert result.item() is expected - # method version - with tm.assert_produces_warning( - warn, match="Select only valid columns", check_stacklevel=False - ): + # method version result = getattr(DataFrame(data), func.__name__)(axis=None) - assert isinstance(result, np.bool_) - assert result.item() is expected + assert isinstance(result, np.bool_) + assert result.item() is expected def test_any_all_object(self): # GH 19976 @@ -1512,20 +1518,11 @@ def test_any_all_categorical_dtype_nuisance_column(self, method): with pytest.raises(TypeError, match="does not support reduction"): getattr(df, method)(bool_only=False) - # With bool_only=None, operating on this column raises and is ignored, - # so we expect an empty result. - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns" - ): - result = getattr(df, method)(bool_only=None) - expected = Series([], index=Index([]), dtype=bool) - tm.assert_series_equal(result, expected) + with pytest.raises(TypeError, match="does not support reduction"): + getattr(df, method)(bool_only=None) - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns", check_stacklevel=False - ): - result = getattr(np, method)(df, axis=0) - tm.assert_series_equal(result, expected) + with pytest.raises(TypeError, match="does not support reduction"): + getattr(np, method)(df, axis=0) def test_median_categorical_dtype_nuisance_column(self): # GH#21020 DataFrame.median should match Series.median @@ -1539,12 +1536,8 @@ def test_median_categorical_dtype_nuisance_column(self): with pytest.raises(TypeError, match="does not support reduction"): df.median(numeric_only=False) - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns" - ): - result = df.median() - expected = Series([], index=Index([]), dtype=np.float64) - tm.assert_series_equal(result, expected) + with pytest.raises(TypeError, match="does not support reduction"): + df.median() # same thing, but with an additional non-categorical column df["B"] = df["A"].astype(int) @@ -1552,12 +1545,8 @@ def test_median_categorical_dtype_nuisance_column(self): with pytest.raises(TypeError, match="does not support reduction"): df.median(numeric_only=False) - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns" - ): - result = df.median() - expected = Series([2.0], index=["B"]) - tm.assert_series_equal(result, expected) + with pytest.raises(TypeError, match="does not support reduction"): + df.median() # TODO: np.median(df, axis=0) gives np.array([2.0, 2.0]) instead # of expected.values @@ -1579,58 +1568,19 @@ def test_min_max_categorical_dtype_non_ordered_nuisance_column(self, method): with pytest.raises(TypeError, match="is not ordered for operation"): getattr(df, method)(numeric_only=False) - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns" - ): - result = getattr(df, method)() - expected = Series([], index=Index([]), dtype=np.float64) - tm.assert_series_equal(result, expected) + with pytest.raises(TypeError, match="is not ordered for operation"): + getattr(df, method)() - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns", check_stacklevel=False - ): - result = getattr(np, method)(df) - tm.assert_series_equal(result, expected) + with pytest.raises(TypeError, match="is not ordered for operation"): + getattr(np, method)(df) # same thing, but with an additional non-categorical column df["B"] = df["A"].astype(object) - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns" - ): - result = getattr(df, method)() - if method == "min": - expected = Series(["a"], index=["B"]) - else: - expected = Series(["c"], index=["B"]) - tm.assert_series_equal(result, expected) - - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns", check_stacklevel=False - ): - result = getattr(np, method)(df) - tm.assert_series_equal(result, expected) - - def test_reduction_object_block_splits_nuisance_columns(self): - # GH#37827 - df = DataFrame({"A": [0, 1, 2], "B": ["a", "b", "c"]}, dtype=object) - - # We should only exclude "B", not "A" - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns" - ): - result = df.mean() - expected = Series([1.0], index=["A"]) - tm.assert_series_equal(result, expected) - - # Same behavior but heterogeneous dtype - df["C"] = df["A"].astype(int) + 4 + with pytest.raises(TypeError, match="is not ordered for operation"): + getattr(df, method)() - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns" - ): - result = df.mean() - expected = Series([1.0, 5.0], index=["A", "C"]) - tm.assert_series_equal(result, expected) + with pytest.raises(TypeError, match="is not ordered for operation"): + getattr(np, method)(df) def test_sum_timedelta64_skipna_false(using_array_manager, request): @@ -1710,12 +1660,8 @@ def test_groupby_regular_arithmetic_equivalent(meth): def test_frame_mixed_numeric_object_with_timestamp(ts_value): # GH 13912 df = DataFrame({"a": [1], "b": [1.1], "c": ["foo"], "d": [ts_value]}) - with tm.assert_produces_warning( - FutureWarning, match="The default value of numeric_only" - ): - result = df.sum() - expected = Series([1, 1.1, "foo"], index=list("abc")) - tm.assert_series_equal(result, expected) + with pytest.raises(TypeError, match="does not support reduction"): + df.sum() def test_prod_sum_min_count_mixed_object(): @@ -1755,18 +1701,46 @@ def test_reduction_axis_none_deprecation(method): "corrwith", "count", "cov", + "mode", + "quantile", + ], +) +def test_numeric_only_deprecation(kernel): + # GH#46852 + df = DataFrame({"a": [1, 2, 3], "b": object}) + args = (df,) if kernel == "corrwith" else () + signature = inspect.signature(getattr(DataFrame, kernel)) + default = signature.parameters["numeric_only"].default + assert default is not True + + if default is None or default is lib.no_default: + expected = getattr(df[["a"]], kernel)(*args) + warn = FutureWarning + else: + # default must be False and works on any nuisance columns + expected = getattr(df, kernel)(*args) + if kernel == "mode": + assert "b" in expected.columns + else: + assert "b" in expected.index + warn = None + msg = f"The default value of numeric_only in DataFrame.{kernel}" + with tm.assert_produces_warning(warn, match=msg): + result = getattr(df, kernel)(*args) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "kernel", + [ "idxmax", "idxmin", "kurt", - "kurt", "max", "mean", "median", "min", - "mode", "prod", - "prod", - "quantile", "sem", "skew", "std", @@ -1774,32 +1748,16 @@ def test_reduction_axis_none_deprecation(method): "var", ], ) -def test_numeric_only_deprecation(kernel): +def test_fails_on_non_numeric(kernel): # GH#46852 df = DataFrame({"a": [1, 2, 3], "b": object}) - args = (df,) if kernel == "corrwith" else () - signature = inspect.signature(getattr(DataFrame, kernel)) - default = signature.parameters["numeric_only"].default - assert default is not True - - if kernel in ("idxmax", "idxmin"): - # kernels that default to numeric_only=False and fail on nuisance columns - assert default is False - with pytest.raises(TypeError, match="not allowed for this dtype"): - getattr(df, kernel)(*args) - else: - if default is None or default is lib.no_default: - expected = getattr(df[["a"]], kernel)(*args) - warn = FutureWarning - else: - # default must be False and works on any nuisance columns - expected = getattr(df, kernel)(*args) - if kernel == "mode": - assert "b" in expected.columns - else: - assert "b" in expected.index - warn = None - msg = f"The default value of numeric_only in DataFrame.{kernel}" - with tm.assert_produces_warning(warn, match=msg): - result = getattr(df, kernel)(*args) - tm.assert_equal(result, expected) + msg = "|".join( + [ + "not allowed for this dtype", + "argument must be a string or a number", + "not supported between instances of", + "unsupported operand type", + ] + ) + with pytest.raises(TypeError, match=msg): + getattr(df, kernel)() diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 935c39af8af3a..a93faa2275251 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -971,13 +971,12 @@ def test_apply_function_with_indexing_return_column(): # GH: 7002 df = DataFrame( { - "foo1": ["one", "two", "two", "three", "one", "two"], + "foo1": [1, 2, 2, 3, 1, 2], "foo2": [1, 2, 4, 4, 5, 6], } ) - with tm.assert_produces_warning(FutureWarning, match="Select only valid"): - result = df.groupby("foo1", as_index=False).apply(lambda x: x.mean()) - expected = DataFrame({"foo1": ["one", "three", "two"], "foo2": [3.0, 4.0, 4.0]}) + result = df.groupby("foo1", as_index=False).apply(lambda x: x.mean()) + expected = DataFrame({"foo1": [1.0, 2.0, 3.0], "foo2": [3.0, 4.0, 4.0]}) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 092fd4a4d6be0..ae8e7928d12b8 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -298,7 +298,7 @@ def test_apply(ordered): # is coming back as Series([0., 1., 0.], index=["missing", "dense", "values"]) # when we expect Series(0., index=["values"]) with tm.assert_produces_warning( - FutureWarning, match="Select only valid", check_stacklevel=False + None, match="Select only valid", check_stacklevel=False ): result = grouped.apply(lambda x: np.mean(x)) tm.assert_frame_equal(result, expected) From 7a98ef0e2aa5044ce3ad7af8b041d999e37a48e2 Mon Sep 17 00:00:00 2001 From: richard Date: Sat, 5 Nov 2022 07:21:29 -0400 Subject: [PATCH 2/8] DEPR: Enforce deprecation of numeric_only=None in DataFrame aggregations --- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/core/frame.py | 107 ++----- pandas/tests/apply/test_frame_apply.py | 18 +- pandas/tests/frame/methods/test_quantile.py | 16 +- pandas/tests/frame/test_reductions.py | 296 +++++++++----------- pandas/tests/groupby/test_apply.py | 8 +- pandas/tests/groupby/test_categorical.py | 8 +- pandas/tests/groupby/test_function.py | 41 ++- 8 files changed, 179 insertions(+), 317 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index df190a4df393c..1e7f936e0504c 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -428,7 +428,7 @@ Removal of prior version deprecations/changes - Changed behavior of comparison of a :class:`Timestamp` with a ``datetime.date`` object; these now compare as un-equal and raise on inequality comparisons, matching the ``datetime.datetime`` behavior (:issue:`36131`) - Enforced deprecation of silently dropping columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a list or dictionary (:issue:`43740`) - Change behavior of :meth:`DataFrame.apply` with list-like so that any partial failure will raise an error (:issue:`43740`) -- +- Enforced deprecation of silently dropping columns that raised in DataFrame reductions (:issue:`41480`) .. --------------------------------------------------------------------------- .. _whatsnew_200.performance: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a8631f42fb2d6..afee6c647ebf8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -134,7 +134,6 @@ is_iterator, is_list_like, is_numeric_dtype, - is_object_dtype, is_scalar, is_sequence, needs_i8_conversion, @@ -266,9 +265,8 @@ you to specify a location to update with some value.""", } -_numeric_only_doc = """numeric_only : bool or None, default None - Include only float, int, boolean data. If None, will attempt to use - everything, then use only numeric data +_numeric_only_doc = """numeric_only : bool, default False + Include only float, int, boolean data. """ _merge_doc = """ @@ -10506,7 +10504,7 @@ def _reduce( *, axis: Axis = 0, skipna: bool = True, - numeric_only: bool | None = None, + numeric_only: bool = False, filter_type=None, **kwds, ): @@ -10515,13 +10513,8 @@ def _reduce( # TODO: Make other agg func handle axis=None properly GH#21597 axis = self._get_axis_number(axis) - labels = self._get_agg_axis(axis) assert axis in [0, 1] - def func(values: np.ndarray): - # We only use this in the case that operates on self.values - return op(values, axis=axis, skipna=skipna, **kwds) - def blk_func(values, axis: Axis = 1): if isinstance(values, ExtensionArray): if not is_1d_only_ea_dtype(values.dtype) and not isinstance( @@ -10541,84 +10534,26 @@ def _get_data() -> DataFrame: data = self._get_bool_data() return data - numeric_only_bool = com.resolve_numeric_only(numeric_only) - if numeric_only is not None or axis == 0: - # For numeric_only non-None and axis non-None, we know - # which blocks to use and no try/except is needed. - # For numeric_only=None only the case with axis==0 and no object - # dtypes are unambiguous can be handled with BlockManager.reduce - # Case with EAs see GH#35881 - df = self - if numeric_only_bool: - df = _get_data() - if axis == 1: - df = df.T - axis = 0 - - ignore_failures = numeric_only is None - - # After possibly _get_data and transposing, we are now in the - # simple case where we can use BlockManager.reduce - res, _ = df._mgr.reduce(blk_func, ignore_failures=ignore_failures) - out = df._constructor(res).iloc[0] - if out_dtype is not None: - out = out.astype(out_dtype) - if axis == 0 and len(self) == 0 and name in ["sum", "prod"]: - # Even if we are object dtype, follow numpy and return - # float64, see test_apply_funcs_over_empty - out = out.astype(np.float64) - - if numeric_only is None and out.shape[0] != df.shape[1]: - # columns have been dropped GH#41480 - com.deprecate_numeric_only_default( - type(self), name, deprecate_none=True - ) - - return out - - assert numeric_only is None - - data = self - values = data.values - - try: - result = func(values) - - except TypeError: - # e.g. in nanops trying to convert strs to float - - data = _get_data() - labels = data._get_agg_axis(axis) - - values = data.values - with np.errstate(all="ignore"): - result = func(values) - - # columns have been dropped GH#41480 - arg_name = "numeric_only" - if name in ["all", "any"]: - arg_name = "bool_only" - warnings.warn( - "Dropping of nuisance columns in DataFrame reductions " - f"(with '{arg_name}=None') is deprecated; in a future " - "version this will raise TypeError. Select only valid " - "columns before calling the reduction.", - FutureWarning, - stacklevel=find_stack_level(), - ) + # Case with EAs see GH#35881 + df = self + if numeric_only: + df = _get_data() + if axis == 1: + df = df.T + axis = 0 - if hasattr(result, "dtype"): - if filter_type == "bool" and notna(result).all(): - result = result.astype(np.bool_) - elif filter_type is None and is_object_dtype(result.dtype): - try: - result = result.astype(np.float64) - except (ValueError, TypeError): - # try to coerce to the original dtypes item by item if we can - pass + # After possibly _get_data and transposing, we are now in the + # simple case where we can use BlockManager.reduce + res, _ = df._mgr.reduce(blk_func, ignore_failures=False) + out = df._constructor(res).iloc[0] + if out_dtype is not None: + out = out.astype(out_dtype) + if axis == 0 and len(self) == 0 and name in ["sum", "prod"]: + # Even if we are object dtype, follow numpy and return + # float64, see test_apply_funcs_over_empty + out = out.astype(np.float64) - result = self._constructor_sliced(result, index=labels) - return result + return out def _reduce_axis1(self, name: str, func, skipna: bool) -> Series: """ diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 068ce32b5e7aa..7f933267ca443 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1283,8 +1283,11 @@ def test_nuiscance_columns(): ) tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match="Select only valid"): - result = df.agg("sum") + msg = "DataFrame constructor called with incompatible data and dtype" + with pytest.raises(TypeError, match=msg): + df.agg("sum") + + result = df[["A", "B", "C"]].agg("sum") expected = Series([6, 6.0, "foobarbaz"], index=["A", "B", "C"]) tm.assert_series_equal(result, expected) @@ -1426,17 +1429,6 @@ def test_apply_datetime_tz_issue(): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("df", [DataFrame({"A": ["a", None], "B": ["c", "d"]})]) -@pytest.mark.parametrize("method", ["min", "max", "sum"]) -def test_consistency_of_aggregates_of_columns_with_missing_values(df, method): - # GH 16832 - with tm.assert_produces_warning(FutureWarning, match="Select only valid"): - none_in_first_column_result = getattr(df[["A", "B"]], method)() - none_in_second_column_result = getattr(df[["B", "A"]], method)() - - tm.assert_series_equal(none_in_first_column_result, none_in_second_column_result) - - @pytest.mark.parametrize("col", [1, 1.0, True, "a", np.nan]) def test_apply_dtype(col): # GH 31466 diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 3beb201bcfa05..3b2b291130163 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -133,21 +133,11 @@ def test_empty(self, interp_method): ) assert np.isnan(q["x"]) and np.isnan(q["y"]) - def test_non_numeric_exclusion(self, interp_method, request, using_array_manager): + def test_non_numeric(self, interp_method, request, using_array_manager): interpolation, method = interp_method df = DataFrame({"col1": ["A", "A", "B", "B"], "col2": [1, 2, 3, 4]}) - rs = df.quantile( - 0.5, numeric_only=True, interpolation=interpolation, method=method - ) - with tm.assert_produces_warning(FutureWarning, match="Select only valid"): - xp = df.median().rename(0.5) - if interpolation == "nearest": - xp = (xp + 0.5).astype(np.int64) - if method == "table" and using_array_manager: - request.node.add_marker( - pytest.mark.xfail(reason="Axis name incorrectly set.") - ) - tm.assert_series_equal(rs, xp) + with pytest.raises(TypeError, match="could not convert"): + df.median().rename(0.5) def test_axis(self, interp_method, request, using_array_manager): # axis diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 8d4d705296f35..141b852390fce 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -169,15 +169,23 @@ class TestDataFrameAnalytics: ], ) def test_stat_op_api_float_string_frame(self, float_string_frame, axis, opname): - if opname in ["sum", "min", "max"] and axis == 0: - warn = None - elif opname not in ["count", "nunique"]: - warn = FutureWarning - else: - warn = None - msg = "nuisance columns|default value of numeric_only" - with tm.assert_produces_warning(warn, match=msg): + if (opname in ("sum", "min", "max") and axis == 0) or opname in ( + "count", + "nunique", + ): getattr(float_string_frame, opname)(axis=axis) + else: + msg = "|".join( + [ + "Could not convert", + "could not convert", + "can't multiply sequence by non-int", + "unsupported operand type", + "not supported between instances of", + ] + ) + with pytest.raises(TypeError, match=msg): + getattr(float_string_frame, opname)(axis=axis) if opname != "nunique": getattr(float_string_frame, opname)(axis=axis, numeric_only=True) @@ -337,18 +345,26 @@ def test_mixed_ops(self, op): "str": ["a", "b", "c", "d"], } ) - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns" - ): - result = getattr(df, op)() - assert len(result) == 2 + msg = "|".join( + [ + "Could not convert", + "could not convert", + "can't multiply sequence by non-int", + ] + ) + with pytest.raises(TypeError, match=msg): + getattr(df, op)() with pd.option_context("use_bottleneck", False): - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns" - ): - result = getattr(df, op)() - assert len(result) == 2 + msg = "|".join( + [ + "Could not convert", + "could not convert", + "can't multiply sequence by non-int", + ] + ) + with pytest.raises(TypeError, match=msg): + getattr(df, op)() def test_reduce_mixed_frame(self): # GH 6806 @@ -416,10 +432,9 @@ def test_mean_mixed_string_decimal(self): df = DataFrame(d) - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns" - ): - result = df.mean() + with pytest.raises(TypeError, match="unsupported operand type"): + df.mean() + result = df[["A", "C"]].mean() expected = Series([2.7, 681.6], index=["A", "C"]) tm.assert_series_equal(result, expected) @@ -648,9 +663,8 @@ def test_operators_timedelta64(self): ) tm.assert_series_equal(result, expected) - # excludes numeric - with tm.assert_produces_warning(FutureWarning, match="Select only valid"): - result = mixed.min(axis=1) + # excludes non-numeric + result = mixed.min(axis=1, numeric_only=True) expected = Series([1, 1, 1.0], index=[0, 1, 2]) tm.assert_series_equal(result, expected) @@ -819,25 +833,17 @@ def test_sum_mixed_datetime(self): df = DataFrame({"A": date_range("2000", periods=4), "B": [1, 2, 3, 4]}).reindex( [2, 3, 4] ) - with tm.assert_produces_warning(FutureWarning, match="Select only valid"): - result = df.sum() - - expected = Series({"B": 7.0}) - tm.assert_series_equal(result, expected) + with pytest.raises(TypeError, match="does not support reduction 'sum'"): + df.sum() def test_mean_corner(self, float_frame, float_string_frame): # unit test when have object data - with tm.assert_produces_warning(FutureWarning, match="Select only valid"): - the_mean = float_string_frame.mean(axis=0) - the_sum = float_string_frame.sum(axis=0, numeric_only=True) - tm.assert_index_equal(the_sum.index, the_mean.index) - assert len(the_mean.index) < len(float_string_frame.columns) + with pytest.raises(TypeError, match="Could not convert"): + float_string_frame.mean(axis=0) # xs sum mixed type, just want to know it works... - with tm.assert_produces_warning(FutureWarning, match="Select only valid"): - the_mean = float_string_frame.mean(axis=1) - the_sum = float_string_frame.sum(axis=1, numeric_only=True) - tm.assert_index_equal(the_sum.index, the_mean.index) + with pytest.raises(TypeError, match="unsupported operand type"): + float_string_frame.mean(axis=1) # take mean of boolean column float_frame["bool"] = float_frame["A"] > 0 @@ -861,10 +867,8 @@ def test_mean_datetimelike(self): expected = Series({"A": 1.0}) tm.assert_series_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match="Select only valid"): - result = df.mean() - expected = Series({"A": 1.0, "B": df.loc[1, "B"], "C": df.loc[1, "C"]}) - tm.assert_series_equal(result, expected) + with pytest.raises(TypeError, match="mean is not implemented for PeriodArray"): + df.mean() def test_mean_datetimelike_numeric_only_false(self): df = DataFrame( @@ -895,13 +899,13 @@ def test_mean_extensionarray_numeric_only_true(self): tm.assert_series_equal(result, expected) def test_stats_mixed_type(self, float_string_frame): - # don't blow up - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns" - ): + with pytest.raises(TypeError, match="could not convert"): float_string_frame.std(1) + with pytest.raises(TypeError, match="could not convert"): float_string_frame.var(1) + with pytest.raises(TypeError, match="unsupported operand type"): float_string_frame.mean(1) + with pytest.raises(TypeError, match="could not convert"): float_string_frame.skew(1) def test_sum_bools(self): @@ -1250,24 +1254,26 @@ def test_any_all_np_func(self, func, data, expected): # GH 19976 data = DataFrame(data) - warn = None if any(is_categorical_dtype(x) for x in data.dtypes): - warn = FutureWarning + with pytest.raises( + TypeError, match="dtype category does not support reduction" + ): + func(data) - with tm.assert_produces_warning( - warn, match="Select only valid columns", check_stacklevel=False - ): + # method version + with pytest.raises( + TypeError, match="dtype category does not support reduction" + ): + getattr(DataFrame(data), func.__name__)(axis=None) + else: result = func(data) - assert isinstance(result, np.bool_) - assert result.item() is expected + assert isinstance(result, np.bool_) + assert result.item() is expected - # method version - with tm.assert_produces_warning( - warn, match="Select only valid columns", check_stacklevel=False - ): + # method version result = getattr(DataFrame(data), func.__name__)(axis=None) - assert isinstance(result, np.bool_) - assert result.item() is expected + assert isinstance(result, np.bool_) + assert result.item() is expected def test_any_all_object(self): # GH 19976 @@ -1512,20 +1518,11 @@ def test_any_all_categorical_dtype_nuisance_column(self, method): with pytest.raises(TypeError, match="does not support reduction"): getattr(df, method)(bool_only=False) - # With bool_only=None, operating on this column raises and is ignored, - # so we expect an empty result. - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns" - ): - result = getattr(df, method)(bool_only=None) - expected = Series([], index=Index([]), dtype=bool) - tm.assert_series_equal(result, expected) + with pytest.raises(TypeError, match="does not support reduction"): + getattr(df, method)(bool_only=None) - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns", check_stacklevel=False - ): - result = getattr(np, method)(df, axis=0) - tm.assert_series_equal(result, expected) + with pytest.raises(TypeError, match="does not support reduction"): + getattr(np, method)(df, axis=0) def test_median_categorical_dtype_nuisance_column(self): # GH#21020 DataFrame.median should match Series.median @@ -1539,12 +1536,8 @@ def test_median_categorical_dtype_nuisance_column(self): with pytest.raises(TypeError, match="does not support reduction"): df.median(numeric_only=False) - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns" - ): - result = df.median() - expected = Series([], index=Index([]), dtype=np.float64) - tm.assert_series_equal(result, expected) + with pytest.raises(TypeError, match="does not support reduction"): + df.median() # same thing, but with an additional non-categorical column df["B"] = df["A"].astype(int) @@ -1552,12 +1545,8 @@ def test_median_categorical_dtype_nuisance_column(self): with pytest.raises(TypeError, match="does not support reduction"): df.median(numeric_only=False) - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns" - ): - result = df.median() - expected = Series([2.0], index=["B"]) - tm.assert_series_equal(result, expected) + with pytest.raises(TypeError, match="does not support reduction"): + df.median() # TODO: np.median(df, axis=0) gives np.array([2.0, 2.0]) instead # of expected.values @@ -1579,58 +1568,19 @@ def test_min_max_categorical_dtype_non_ordered_nuisance_column(self, method): with pytest.raises(TypeError, match="is not ordered for operation"): getattr(df, method)(numeric_only=False) - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns" - ): - result = getattr(df, method)() - expected = Series([], index=Index([]), dtype=np.float64) - tm.assert_series_equal(result, expected) + with pytest.raises(TypeError, match="is not ordered for operation"): + getattr(df, method)() - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns", check_stacklevel=False - ): - result = getattr(np, method)(df) - tm.assert_series_equal(result, expected) + with pytest.raises(TypeError, match="is not ordered for operation"): + getattr(np, method)(df) # same thing, but with an additional non-categorical column df["B"] = df["A"].astype(object) - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns" - ): - result = getattr(df, method)() - if method == "min": - expected = Series(["a"], index=["B"]) - else: - expected = Series(["c"], index=["B"]) - tm.assert_series_equal(result, expected) - - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns", check_stacklevel=False - ): - result = getattr(np, method)(df) - tm.assert_series_equal(result, expected) - - def test_reduction_object_block_splits_nuisance_columns(self): - # GH#37827 - df = DataFrame({"A": [0, 1, 2], "B": ["a", "b", "c"]}, dtype=object) - - # We should only exclude "B", not "A" - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns" - ): - result = df.mean() - expected = Series([1.0], index=["A"]) - tm.assert_series_equal(result, expected) - - # Same behavior but heterogeneous dtype - df["C"] = df["A"].astype(int) + 4 + with pytest.raises(TypeError, match="is not ordered for operation"): + getattr(df, method)() - with tm.assert_produces_warning( - FutureWarning, match="Select only valid columns" - ): - result = df.mean() - expected = Series([1.0, 5.0], index=["A", "C"]) - tm.assert_series_equal(result, expected) + with pytest.raises(TypeError, match="is not ordered for operation"): + getattr(np, method)(df) def test_sum_timedelta64_skipna_false(using_array_manager, request): @@ -1710,12 +1660,8 @@ def test_groupby_regular_arithmetic_equivalent(meth): def test_frame_mixed_numeric_object_with_timestamp(ts_value): # GH 13912 df = DataFrame({"a": [1], "b": [1.1], "c": ["foo"], "d": [ts_value]}) - with tm.assert_produces_warning( - FutureWarning, match="The default value of numeric_only" - ): - result = df.sum() - expected = Series([1, 1.1, "foo"], index=list("abc")) - tm.assert_series_equal(result, expected) + with pytest.raises(TypeError, match="does not support reduction"): + df.sum() def test_prod_sum_min_count_mixed_object(): @@ -1755,18 +1701,46 @@ def test_reduction_axis_none_deprecation(method): "corrwith", "count", "cov", + "mode", + "quantile", + ], +) +def test_numeric_only_deprecation(kernel): + # GH#46852 + df = DataFrame({"a": [1, 2, 3], "b": object}) + args = (df,) if kernel == "corrwith" else () + signature = inspect.signature(getattr(DataFrame, kernel)) + default = signature.parameters["numeric_only"].default + assert default is not True + + if default is None or default is lib.no_default: + expected = getattr(df[["a"]], kernel)(*args) + warn = FutureWarning + else: + # default must be False and works on any nuisance columns + expected = getattr(df, kernel)(*args) + if kernel == "mode": + assert "b" in expected.columns + else: + assert "b" in expected.index + warn = None + msg = f"The default value of numeric_only in DataFrame.{kernel}" + with tm.assert_produces_warning(warn, match=msg): + result = getattr(df, kernel)(*args) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "kernel", + [ "idxmax", "idxmin", "kurt", - "kurt", "max", "mean", "median", "min", - "mode", "prod", - "prod", - "quantile", "sem", "skew", "std", @@ -1774,32 +1748,16 @@ def test_reduction_axis_none_deprecation(method): "var", ], ) -def test_numeric_only_deprecation(kernel): +def test_fails_on_non_numeric(kernel): # GH#46852 df = DataFrame({"a": [1, 2, 3], "b": object}) - args = (df,) if kernel == "corrwith" else () - signature = inspect.signature(getattr(DataFrame, kernel)) - default = signature.parameters["numeric_only"].default - assert default is not True - - if kernel in ("idxmax", "idxmin"): - # kernels that default to numeric_only=False and fail on nuisance columns - assert default is False - with pytest.raises(TypeError, match="not allowed for this dtype"): - getattr(df, kernel)(*args) - else: - if default is None or default is lib.no_default: - expected = getattr(df[["a"]], kernel)(*args) - warn = FutureWarning - else: - # default must be False and works on any nuisance columns - expected = getattr(df, kernel)(*args) - if kernel == "mode": - assert "b" in expected.columns - else: - assert "b" in expected.index - warn = None - msg = f"The default value of numeric_only in DataFrame.{kernel}" - with tm.assert_produces_warning(warn, match=msg): - result = getattr(df, kernel)(*args) - tm.assert_equal(result, expected) + msg = "|".join( + [ + "not allowed for this dtype", + "argument must be a string or a number", + "not supported between instances of", + "unsupported operand type", + ] + ) + with pytest.raises(TypeError, match=msg): + getattr(df, kernel)() diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 935c39af8af3a..7b6049cc4b5d9 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -968,17 +968,15 @@ def test_apply_function_index_return(function): def test_apply_function_with_indexing_return_column(): - # GH: 7002 + # GH#7002, GH#41480 df = DataFrame( { "foo1": ["one", "two", "two", "three", "one", "two"], "foo2": [1, 2, 4, 4, 5, 6], } ) - with tm.assert_produces_warning(FutureWarning, match="Select only valid"): - result = df.groupby("foo1", as_index=False).apply(lambda x: x.mean()) - expected = DataFrame({"foo1": ["one", "three", "two"], "foo2": [3.0, 4.0, 4.0]}) - tm.assert_frame_equal(result, expected) + with pytest.raises(TypeError, match="Could not convert"): + df.groupby("foo1", as_index=False).apply(lambda x: x.mean()) @pytest.mark.parametrize( diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 092fd4a4d6be0..6a79099de38d4 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -294,13 +294,7 @@ def test_apply(ordered): idx = MultiIndex.from_arrays([missing, dense], names=["missing", "dense"]) expected = DataFrame([0, 1, 2.0], index=idx, columns=["values"]) - # GH#21636 tracking down the xfail, in some builds np.mean(df.loc[[0]]) - # is coming back as Series([0., 1., 0.], index=["missing", "dense", "values"]) - # when we expect Series(0., index=["values"]) - with tm.assert_produces_warning( - FutureWarning, match="Select only valid", check_stacklevel=False - ): - result = grouped.apply(lambda x: np.mean(x)) + result = grouped.apply(lambda x: np.mean(x)) tm.assert_frame_equal(result, expected) result = grouped.mean() diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 7a9d540ae08c4..5383a4d28c8ce 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -301,27 +301,19 @@ def gni(self, df): return gni # TODO: non-unique columns, as_index=False - def test_idxmax(self, gb): - # object dtype so idxmax goes through _aggregate_item_by_item - # GH#5610 - # non-cython calls should not include the grouper + def test_idxmax_nuisance_raises(self, gb): + # GH#5610, GH#41480 expected = DataFrame([[0.0], [np.nan]], columns=["B"], index=[1, 3]) expected.index.name = "A" - msg = "The default value of numeric_only in DataFrameGroupBy.idxmax" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = gb.idxmax() - tm.assert_frame_equal(result, expected) + with pytest.raises(TypeError, match="not allowed for this dtype"): + gb.idxmax() - def test_idxmin(self, gb): - # object dtype so idxmax goes through _aggregate_item_by_item - # GH#5610 - # non-cython calls should not include the grouper + def test_idxmin_nuisance_raises(self, gb): + # GH#5610, GH#41480 expected = DataFrame([[0.0], [np.nan]], columns=["B"], index=[1, 3]) expected.index.name = "A" - msg = "The default value of numeric_only in DataFrameGroupBy.idxmin" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = gb.idxmin() - tm.assert_frame_equal(result, expected) + with pytest.raises(TypeError, match="not allowed for this dtype"): + gb.idxmin() def test_describe(self, df, gb, gni): # describe @@ -1382,11 +1374,15 @@ def test_deprecate_numeric_only( gb = df.groupby(keys) method = getattr(gb, kernel) - if has_arg and ( - # Cases where b does not appear in the result - numeric_only is True - or (numeric_only is lib.no_default and numeric_only_default) - or drops_nuisance + if ( + has_arg + and (kernel not in ("idxmax", "idxmin") or numeric_only is True) + and ( + # Cases where b does not appear in the result + numeric_only is True + or (numeric_only is lib.no_default and numeric_only_default) + or drops_nuisance + ) ): if numeric_only is True or (not numeric_only_default and not drops_nuisance): warn = None @@ -1411,9 +1407,8 @@ def test_deprecate_numeric_only( ): result = method(*args, **kwargs) assert "b" in result.columns - elif has_arg: + elif has_arg or kernel in ("idxmax", "idxmin"): assert numeric_only is not True - assert numeric_only is not lib.no_default or numeric_only_default is False assert not drops_nuisance # kernels that are successful on any dtype were above; this will fail msg = ( From 9faf0a90e0ffbdb43d8a0ef351b8b7ef9d5571de Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 5 Nov 2022 17:41:14 -0400 Subject: [PATCH 3/8] Partial reverts --- pandas/tests/apply/test_frame_apply.py | 12 ++++++++++++ pandas/tests/frame/methods/test_quantile.py | 15 ++++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 7f933267ca443..28c776d0a6d35 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1429,6 +1429,18 @@ def test_apply_datetime_tz_issue(): tm.assert_series_equal(result, expected) +@pytest.mark.parametrize("df", [DataFrame({"A": ["a", None], "B": ["c", "d"]})]) +@pytest.mark.parametrize("method", ["min", "max", "sum"]) +def test_mixed_column_raises(df, method): + # GH 16832 + if method == "sum": + msg = r'can only concatenate str \(not "int"\) to str' + else: + msg = "not supported between instances of 'str' and 'float'" + with pytest.raises(TypeError, match=msg): + getattr(df, method)() + + @pytest.mark.parametrize("col", [1, 1.0, True, "a", np.nan]) def test_apply_dtype(col): # GH 31466 diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 3b2b291130163..bd5138adaae5d 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -133,11 +133,20 @@ def test_empty(self, interp_method): ) assert np.isnan(q["x"]) and np.isnan(q["y"]) - def test_non_numeric(self, interp_method, request, using_array_manager): + def test_non_numeric_exclusion(self, interp_method, request, using_array_manager): interpolation, method = interp_method df = DataFrame({"col1": ["A", "A", "B", "B"], "col2": [1, 2, 3, 4]}) - with pytest.raises(TypeError, match="could not convert"): - df.median().rename(0.5) + rs = df.quantile( + 0.5, numeric_only=True, interpolation=interpolation, method=method + ) + xp = df.median(numeric_only=True).rename(0.5) + if interpolation == "nearest": + xp = (xp + 0.5).astype(np.int64) + if method == "table" and using_array_manager: + request.node.add_marker( + pytest.mark.xfail(reason="Axis name incorrectly set.") + ) + tm.assert_series_equal(rs, xp) def test_axis(self, interp_method, request, using_array_manager): # axis From 9b297935bd99e651452361531e4109521c1c9f1f Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 6 Nov 2022 09:30:30 -0500 Subject: [PATCH 4/8] numeric_only in generic/series, fixup --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/frame.py | 73 ++++++++++++++++++------- pandas/core/generic.py | 77 ++++++++++++--------------- pandas/core/series.py | 2 +- pandas/tests/frame/test_reductions.py | 4 +- 5 files changed, 91 insertions(+), 66 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 1e7f936e0504c..930821a9aefd2 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -496,6 +496,7 @@ Timezones Numeric ^^^^^^^ - Bug in :meth:`DataFrame.add` cannot apply ufunc when inputs contain mixed DataFrame type and Series type (:issue:`39853`) +- Bug in DataFrame reduction methods (e.g. :meth:`DataFrame.sum`) with object dtype, ``axis=1`` and ``numeric_only=False`` would not be coerced to float (:issue:`49551`) - Conversion diff --git a/pandas/core/frame.py b/pandas/core/frame.py index afee6c647ebf8..ba7195c836fa7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -10515,6 +10515,10 @@ def _reduce( axis = self._get_axis_number(axis) assert axis in [0, 1] + def func(values: np.ndarray): + # We only use this in the case that operates on self.values + return op(values, axis=axis, skipna=skipna, **kwds) + def blk_func(values, axis: Axis = 1): if isinstance(values, ExtensionArray): if not is_1d_only_ea_dtype(values.dtype) and not isinstance( @@ -10534,26 +10538,59 @@ def _get_data() -> DataFrame: data = self._get_bool_data() return data - # Case with EAs see GH#35881 - df = self - if numeric_only: - df = _get_data() - if axis == 1: - df = df.T - axis = 0 + if numeric_only or axis == 0: + # For numeric_only non-None and axis non-None, we know + # which blocks to use and no try/except is needed. + # For numeric_only=None only the case with axis==0 and no object + # dtypes are unambiguous can be handled with BlockManager.reduce + # Case with EAs see GH#35881 + df = self + if numeric_only: + df = _get_data() + if axis == 1: + df = df.T + axis = 0 + + # After possibly _get_data and transposing, we are now in the + # simple case where we can use BlockManager.reduce + res, _ = df._mgr.reduce(blk_func, ignore_failures=False) + out = df._constructor(res).iloc[0] + if out_dtype is not None: + out = out.astype(out_dtype) + if axis == 0 and len(self) == 0 and name in ["sum", "prod"]: + # Even if we are object dtype, follow numpy and return + # float64, see test_apply_funcs_over_empty + out = out.astype(np.float64) + + if numeric_only is None and out.shape[0] != df.shape[1]: + # columns have been dropped GH#41480 + com.deprecate_numeric_only_default( + type(self), name, deprecate_none=True + ) - # After possibly _get_data and transposing, we are now in the - # simple case where we can use BlockManager.reduce - res, _ = df._mgr.reduce(blk_func, ignore_failures=False) - out = df._constructor(res).iloc[0] - if out_dtype is not None: - out = out.astype(out_dtype) - if axis == 0 and len(self) == 0 and name in ["sum", "prod"]: - # Even if we are object dtype, follow numpy and return - # float64, see test_apply_funcs_over_empty - out = out.astype(np.float64) + return out - return out + assert not numeric_only and axis == 1 + + data = self + values = data.values + result = func(values) + + from pandas.core.dtypes.common import is_object_dtype + + if hasattr(result, "dtype"): + if filter_type == "bool" and notna(result).all(): + result = result.astype(np.bool_) + elif filter_type is None and is_object_dtype(result.dtype): + try: + result = result.astype(np.float64) + except (ValueError, TypeError): + # try to coerce to the original dtypes item by item if we can + pass + + labels = self._get_agg_axis(axis) + result = self._constructor_sliced(result, index=labels) + return result def _reduce_axis1(self, name: str, func, skipna: bool) -> Series: """ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4a0f31357079f..ae6806f590568 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10844,7 +10844,7 @@ def _logical_func( name: str, func, axis: Axis = 0, - bool_only: bool_t | None = None, + bool_only: bool_t = False, skipna: bool_t = True, level: Level | None = None, **kwargs, @@ -10859,7 +10859,7 @@ def _logical_func( FutureWarning, stacklevel=find_stack_level(), ) - if bool_only is not None: + if bool_only: raise NotImplementedError( "Option bool_only is not implemented with option level." ) @@ -10878,7 +10878,6 @@ def _logical_func( and len(self._mgr.arrays) > 1 # TODO(EA2D): special-case not needed and all(x.ndim == 2 for x in self._mgr.arrays) - and bool_only is not None and not kwargs ): # Fastpath avoiding potentially expensive transpose @@ -10899,7 +10898,7 @@ def _logical_func( def any( self, axis: Axis = 0, - bool_only: bool_t | None = None, + bool_only: bool_t = False, skipna: bool_t = True, level: Level | None = None, **kwargs, @@ -10911,7 +10910,7 @@ def any( def all( self, axis: Axis = 0, - bool_only: bool_t | None = None, + bool_only: bool_t = False, skipna: bool_t = True, level: Level | None = None, **kwargs, @@ -10978,7 +10977,7 @@ def _stat_function_ddof( skipna: bool_t = True, level: Level | None = None, ddof: int = 1, - numeric_only: bool_t | None = None, + numeric_only: bool_t = False, **kwargs, ) -> Series | float: nv.validate_stat_ddof_func((), kwargs, fname=name) @@ -11006,7 +11005,7 @@ def sem( skipna: bool_t = True, level: Level | None = None, ddof: int = 1, - numeric_only: bool_t | None = None, + numeric_only: bool_t = False, **kwargs, ) -> Series | float: return self._stat_function_ddof( @@ -11019,7 +11018,7 @@ def var( skipna: bool_t = True, level: Level | None = None, ddof: int = 1, - numeric_only: bool_t | None = None, + numeric_only: bool_t = False, **kwargs, ) -> Series | float: return self._stat_function_ddof( @@ -11032,7 +11031,7 @@ def std( skipna: bool_t = True, level: Level | None = None, ddof: int = 1, - numeric_only: bool_t | None = None, + numeric_only: bool_t = False, **kwargs, ) -> Series | float: return self._stat_function_ddof( @@ -11047,7 +11046,7 @@ def _stat_function( axis: Axis | None | lib.NoDefault = None, skipna: bool_t = True, level: Level | None = None, - numeric_only: bool_t | None = None, + numeric_only: bool_t = False, **kwargs, ): if name == "median": @@ -11092,7 +11091,7 @@ def min( axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, level: Level | None = None, - numeric_only: bool_t | None = None, + numeric_only: bool_t = False, **kwargs, ): return self._stat_function( @@ -11110,7 +11109,7 @@ def max( axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, level: Level | None = None, - numeric_only: bool_t | None = None, + numeric_only: bool_t = False, **kwargs, ): return self._stat_function( @@ -11128,7 +11127,7 @@ def mean( axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, level: Level | None = None, - numeric_only: bool_t | None = None, + numeric_only: bool_t = False, **kwargs, ) -> Series | float: return self._stat_function( @@ -11140,7 +11139,7 @@ def median( axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, level: Level | None = None, - numeric_only: bool_t | None = None, + numeric_only: bool_t = False, **kwargs, ) -> Series | float: return self._stat_function( @@ -11152,7 +11151,7 @@ def skew( axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, level: Level | None = None, - numeric_only: bool_t | None = None, + numeric_only: bool_t = False, **kwargs, ) -> Series | float: return self._stat_function( @@ -11164,7 +11163,7 @@ def kurt( axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, level: Level | None = None, - numeric_only: bool_t | None = None, + numeric_only: bool_t = False, **kwargs, ) -> Series | float: return self._stat_function( @@ -11181,7 +11180,7 @@ def _min_count_stat_function( axis: Axis | None = None, skipna: bool_t = True, level: Level | None = None, - numeric_only: bool_t | None = None, + numeric_only: bool_t = False, min_count: int = 0, **kwargs, ): @@ -11227,7 +11226,7 @@ def sum( axis: Axis | None = None, skipna: bool_t = True, level: Level | None = None, - numeric_only: bool_t | None = None, + numeric_only: bool_t = False, min_count: int = 0, **kwargs, ): @@ -11240,7 +11239,7 @@ def prod( axis: Axis | None = None, skipna: bool_t = True, level: Level | None = None, - numeric_only: bool_t | None = None, + numeric_only: bool_t = False, min_count: int = 0, **kwargs, ): @@ -11333,7 +11332,7 @@ def sem( skipna: bool_t = True, level=None, ddof: int = 1, - numeric_only=None, + numeric_only: bool_t = False, **kwargs, ): return NDFrame.sem(self, axis, skipna, level, ddof, numeric_only, **kwargs) @@ -11356,7 +11355,7 @@ def var( skipna: bool_t = True, level=None, ddof: int = 1, - numeric_only=None, + numeric_only: bool_t = False, **kwargs, ): return NDFrame.var(self, axis, skipna, level, ddof, numeric_only, **kwargs) @@ -11380,7 +11379,7 @@ def std( skipna: bool_t = True, level=None, ddof: int = 1, - numeric_only=None, + numeric_only: bool_t = False, **kwargs, ): return NDFrame.std(self, axis, skipna, level, ddof, numeric_only, **kwargs) @@ -11468,7 +11467,7 @@ def sum( axis: Axis | None = None, skipna: bool_t = True, level=None, - numeric_only=None, + numeric_only: bool_t = False, min_count: int = 0, **kwargs, ): @@ -11493,7 +11492,7 @@ def prod( axis: Axis | None = None, skipna: bool_t = True, level=None, - numeric_only=None, + numeric_only: bool_t = False, min_count: int = 0, **kwargs, ): @@ -11519,7 +11518,7 @@ def mean( axis: AxisInt | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, level=None, - numeric_only=None, + numeric_only: bool_t = False, **kwargs, ): return NDFrame.mean(self, axis, skipna, level, numeric_only, **kwargs) @@ -11541,7 +11540,7 @@ def skew( axis: AxisInt | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, level=None, - numeric_only=None, + numeric_only: bool_t = False, **kwargs, ): return NDFrame.skew(self, axis, skipna, level, numeric_only, **kwargs) @@ -11566,7 +11565,7 @@ def kurt( axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, level=None, - numeric_only=None, + numeric_only: bool_t = False, **kwargs, ): return NDFrame.kurt(self, axis, skipna, level, numeric_only, **kwargs) @@ -11589,7 +11588,7 @@ def median( axis: AxisInt | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, level=None, - numeric_only=None, + numeric_only: bool_t = False, **kwargs, ): return NDFrame.median(self, axis, skipna, level, numeric_only, **kwargs) @@ -11613,7 +11612,7 @@ def max( axis: AxisInt | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, level=None, - numeric_only=None, + numeric_only: bool_t = False, **kwargs, ): return NDFrame.max(self, axis, skipna, level, numeric_only, **kwargs) @@ -11637,7 +11636,7 @@ def min( axis: AxisInt | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, level=None, - numeric_only=None, + numeric_only: bool_t = False, **kwargs, ): return NDFrame.min(self, axis, skipna, level, numeric_only, **kwargs) @@ -11872,13 +11871,8 @@ def _doc_params(cls): .. deprecated:: 1.3.0 The level keyword is deprecated. Use groupby instead. -numeric_only : bool, default None - Include only float, int, boolean columns. If None, will attempt to use - everything, then use only numeric data. Not implemented for Series. - - .. deprecated:: 1.5.0 - Specifying ``numeric_only=None`` is deprecated. The default value will be - ``False`` in a future version of pandas. +numeric_only : bool, default False + Include only float, int, boolean columns. Not implemented for Series. {min_count}\ **kwargs @@ -11910,13 +11904,8 @@ def _doc_params(cls): ddof : int, default 1 Delta Degrees of Freedom. The divisor used in calculations is N - ddof, where N represents the number of elements. -numeric_only : bool, default None - Include only float, int, boolean columns. If None, will attempt to use - everything, then use only numeric data. Not implemented for Series. - - .. deprecated:: 1.5.0 - Specifying ``numeric_only=None`` is deprecated. The default value will be - ``False`` in a future version of pandas. +numeric_only : bool, default False + Include only float, int, boolean columns. Not implemented for Series. Returns ------- diff --git a/pandas/core/series.py b/pandas/core/series.py index ac9570a69f283..9b38b47a3c898 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4611,7 +4611,7 @@ def _reduce( *, axis: Axis = 0, skipna: bool = True, - numeric_only=None, + numeric_only: bool = False, filter_type=None, **kwds, ): diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 141b852390fce..635321a340e67 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -331,9 +331,7 @@ def test_stat_operators_attempt_obj_array(self, method, df): assert df.values.dtype == np.object_ result = getattr(df, method)(1) expected = getattr(df.astype("f8"), method)(1) - - if method in ["sum", "prod"]: - tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) @pytest.mark.parametrize("op", ["mean", "std", "var", "skew", "kurt", "sem"]) def test_mixed_ops(self, op): From 48bf1eb79cdbce388be2e9439643e94d906080ce Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 6 Nov 2022 09:31:48 -0500 Subject: [PATCH 5/8] cleanup --- pandas/core/frame.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ba7195c836fa7..a17e573e0cade 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -134,6 +134,7 @@ is_iterator, is_list_like, is_numeric_dtype, + is_object_dtype, is_scalar, is_sequence, needs_i8_conversion, @@ -10576,8 +10577,6 @@ def _get_data() -> DataFrame: values = data.values result = func(values) - from pandas.core.dtypes.common import is_object_dtype - if hasattr(result, "dtype"): if filter_type == "bool" and notna(result).all(): result = result.astype(np.bool_) From 2535f7a04307dbbebe66781aa16c5c4e5056b18c Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 6 Nov 2022 09:48:22 -0500 Subject: [PATCH 6/8] Remove docs warning --- doc/source/user_guide/groupby.rst | 6 ------ 1 file changed, 6 deletions(-) diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index dae42dd4f1118..dbb990cf01767 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -1187,12 +1187,6 @@ is only interesting over one column (here ``colname``), it may be filtered If you do wish to include decimal or object columns in an aggregation with other non-nuisance data types, you must do so explicitly. -.. warning:: - The automatic dropping of nuisance columns has been deprecated and will be removed - in a future version of pandas. If columns are included that cannot be operated - on, pandas will instead raise an error. In order to avoid this, either select - the columns you wish to operate on or specify ``numeric_only=True``. - .. ipython:: python :okwarning: From aadbc17e59a39f637ec499f8975b4990b341a461 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 6 Nov 2022 11:24:02 -0500 Subject: [PATCH 7/8] fixups --- doc/source/whatsnew/v1.2.0.rst | 19 ++++++++++++------- pandas/tests/frame/test_reductions.py | 1 + 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index c5f2dbe71cb3c..fc8b59e11e001 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -383,12 +383,17 @@ this pathological behavior (:issue:`37827`): *New behavior*: -.. ipython:: python - :okwarning: +.. code-block:: ipython - df.mean() + In [3]: df.mean() + Out[3]: + A 1.0 + dtype: float64 - df[["A"]].mean() + In [4]: df[["A"]].mean() + Out[4]: + A 1.0 + dtype: float64 Moreover, DataFrame reductions with ``numeric_only=None`` will now be consistent with their Series counterparts. In particular, for @@ -415,10 +420,10 @@ instead of casting to a NumPy array which may have different semantics (:issue:` *New behavior*: -.. ipython:: python - :okwarning: +.. code-block:: ipython - df.any() + In [5]: df.any() + Out[5]: Series([], dtype: bool) .. _whatsnew_120.api_breaking.python: diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 635321a340e67..0e5c6057b9a61 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1755,6 +1755,7 @@ def test_fails_on_non_numeric(kernel): "argument must be a string or a number", "not supported between instances of", "unsupported operand type", + "argument must be a string or a real number", ] ) with pytest.raises(TypeError, match=msg): From 811bea5247a3239c97e18a02e3f772823e4b4c71 Mon Sep 17 00:00:00 2001 From: richard Date: Mon, 7 Nov 2022 21:03:54 -0500 Subject: [PATCH 8/8] Fixups --- doc/source/user_guide/groupby.rst | 6 ++++++ doc/source/whatsnew/v2.0.0.rst | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index dbb990cf01767..dae42dd4f1118 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -1187,6 +1187,12 @@ is only interesting over one column (here ``colname``), it may be filtered If you do wish to include decimal or object columns in an aggregation with other non-nuisance data types, you must do so explicitly. +.. warning:: + The automatic dropping of nuisance columns has been deprecated and will be removed + in a future version of pandas. If columns are included that cannot be operated + on, pandas will instead raise an error. In order to avoid this, either select + the columns you wish to operate on or specify ``numeric_only=True``. + .. ipython:: python :okwarning: diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 4281aaa97fde2..2c557148f4880 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -452,7 +452,7 @@ Removal of prior version deprecations/changes - Change behavior of :meth:`DataFrame.apply` with list-like so that any partial failure will raise an error (:issue:`43740`) - Changed behavior of :meth:`Series.__setitem__` with an integer key and a :class:`Float64Index` when the key is not present in the index; previously we treated the key as positional (behaving like ``series.iloc[key] = val``), now we treat it is a label (behaving like ``series.loc[key] = val``), consistent with :meth:`Series.__getitem__`` behavior (:issue:`33469`) - Removed ``na_sentinel`` argument from :func:`factorize`, :meth:`.Index.factorize`, and :meth:`.ExtensionArray.factorize` (:issue:`47157`) -- Enforced deprecation of silently dropping columns that raised in DataFrame reductions (:issue:`41480`) +- Enforced deprecation ``numeric_only=None`` (the default) in DataFrame reductions that would silently drop columns that raised; ``numeric_only`` now defaults to ``False`` (:issue:`41480`) - .. ---------------------------------------------------------------------------