diff --git a/ci/code_checks.sh b/ci/code_checks.sh index f3023bea3743e..70d3efb80fd84 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -181,6 +181,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.core.groupby.SeriesGroupBy.rolling\ pandas.core.groupby.DataFrameGroupBy.hist\ pandas.core.groupby.DataFrameGroupBy.plot\ + pandas.core.groupby.DataFrameGroupBy.corrwith\ pandas.core.groupby.SeriesGroupBy.plot\ pandas.core.window.rolling.Rolling.quantile\ pandas.core.window.expanding.Expanding.quantile\ diff --git a/doc/source/reference/groupby.rst b/doc/source/reference/groupby.rst index 771163ae1b0bc..3b02ffe20c10e 100644 --- a/doc/source/reference/groupby.rst +++ b/doc/source/reference/groupby.rst @@ -80,7 +80,6 @@ Function application DataFrameGroupBy.describe DataFrameGroupBy.diff DataFrameGroupBy.ffill - DataFrameGroupBy.fillna DataFrameGroupBy.first DataFrameGroupBy.head DataFrameGroupBy.idxmax @@ -131,7 +130,6 @@ Function application SeriesGroupBy.describe SeriesGroupBy.diff SeriesGroupBy.ffill - SeriesGroupBy.fillna SeriesGroupBy.first SeriesGroupBy.head SeriesGroupBy.last diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index 11863f8aead31..a25e2ed179b80 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -289,7 +289,7 @@ the number of groups, which is the same as the length of the ``groups`` dictiona In [1]: gb. # noqa: E225, E999 gb.agg gb.boxplot gb.cummin gb.describe gb.filter gb.get_group gb.height gb.last gb.median gb.ngroups gb.plot gb.rank gb.std gb.transform gb.aggregate gb.count gb.cumprod gb.dtype gb.first gb.groups gb.hist gb.max gb.min gb.nth gb.prod gb.resample gb.sum gb.var - gb.apply gb.cummax gb.cumsum gb.fillna gb.gender gb.head gb.indices gb.mean gb.name gb.ohlc gb.quantile gb.size gb.tail gb.weight + gb.apply gb.cummax gb.cumsum gb.gender gb.head gb.indices gb.mean gb.name gb.ohlc gb.quantile gb.size gb.tail gb.weight .. _groupby.multiindex: diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index ebcfd16f92d29..4accf8be46b9e 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -101,6 +101,8 @@ Deprecations Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- Removed :meth:`DataFrameGroupby.fillna` and :meth:`SeriesGroupBy.fillna` (:issue:`55719`) +- Removed ``axis`` argument from all groupby operations (:issue:`50405`) - Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index a443597347283..3f776cf75d43a 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -70,7 +70,6 @@ class OutputKey: "cumsum", "diff", "ffill", - "fillna", "ngroup", "pct_change", "rank", diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 9598bc0db02cc..351b4bff0162e 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -24,10 +24,7 @@ import numpy as np -from pandas._libs import ( - Interval, - lib, -) +from pandas._libs import Interval from pandas._libs.hashtable import duplicated from pandas.errors import SpecificationError from pandas.util._decorators import ( @@ -93,16 +90,13 @@ if TYPE_CHECKING: from collections.abc import ( Hashable, - Mapping, Sequence, ) from pandas._typing import ( ArrayLike, - Axis, AxisInt, CorrelationMethod, - FillnaOptions, IndexLabel, Manager, Manager2D, @@ -878,108 +872,9 @@ def build_codes(lev_codes: np.ndarray) -> np.ndarray: result = result.reset_index() return result - def fillna( - self, - value: object | ArrayLike | None = None, - method: FillnaOptions | None = None, - axis: Axis | None | lib.NoDefault = lib.no_default, - inplace: bool = False, - limit: int | None = None, - downcast: dict | None | lib.NoDefault = lib.no_default, - ) -> Series | None: - """ - Fill NA/NaN values using the specified method within groups. - - .. deprecated:: 2.2.0 - This method is deprecated and will be removed in a future version. - Use the :meth:`.SeriesGroupBy.ffill` or :meth:`.SeriesGroupBy.bfill` - for forward or backward filling instead. If you want to fill with a - single value, use :meth:`Series.fillna` instead. - - Parameters - ---------- - value : scalar, dict, Series, or DataFrame - Value to use to fill holes (e.g. 0), alternately a - dict/Series/DataFrame of values specifying which value to use for - each index (for a Series) or column (for a DataFrame). Values not - in the dict/Series/DataFrame will not be filled. This value cannot - be a list. Users wanting to use the ``value`` argument and not ``method`` - should prefer :meth:`.Series.fillna` as this - will produce the same result and be more performant. - method : {{'bfill', 'ffill', None}}, default None - Method to use for filling holes. ``'ffill'`` will propagate - the last valid observation forward within a group. - ``'bfill'`` will use next valid observation to fill the gap. - axis : {0 or 'index', 1 or 'columns'} - Unused, only for compatibility with :meth:`DataFrameGroupBy.fillna`. - inplace : bool, default False - Broken. Do not set to True. - limit : int, default None - If method is specified, this is the maximum number of consecutive - NaN values to forward/backward fill within a group. In other words, - if there is a gap with more than this number of consecutive NaNs, - it will only be partially filled. If method is not specified, this is the - maximum number of entries along the entire axis where NaNs will be - filled. Must be greater than 0 if not None. - downcast : dict, default is None - A dict of item->dtype of what to downcast if possible, - or the string 'infer' which will try to downcast to an appropriate - equal type (e.g. float64 to int64 if possible). - - Returns - ------- - Series - Object with missing values filled within groups. - - See Also - -------- - ffill : Forward fill values within a group. - bfill : Backward fill values within a group. - - Examples - -------- - For SeriesGroupBy: - - >>> lst = ['cat', 'cat', 'cat', 'mouse', 'mouse'] - >>> ser = pd.Series([1, None, None, 2, None], index=lst) - >>> ser - cat 1.0 - cat NaN - cat NaN - mouse 2.0 - mouse NaN - dtype: float64 - >>> ser.groupby(level=0).fillna(0, limit=1) - cat 1.0 - cat 0.0 - cat NaN - mouse 2.0 - mouse 0.0 - dtype: float64 - """ - warnings.warn( - f"{type(self).__name__}.fillna is deprecated and " - "will be removed in a future version. Use obj.ffill() or obj.bfill() " - "for forward or backward filling instead. If you want to fill with a " - f"single value, use {type(self.obj).__name__}.fillna instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - result = self._op_via_apply( - "fillna", - value=value, - method=method, - axis=axis, - inplace=inplace, - limit=limit, - downcast=downcast, - ) - return result - def take( self, indices: TakeIndexer, - axis: Axis | lib.NoDefault = lib.no_default, **kwargs, ) -> Series: """ @@ -997,14 +892,6 @@ def take( ---------- indices : array-like An array of ints indicating which positions to take in each group. - axis : {0 or 'index', 1 or 'columns', None}, default 0 - The axis on which to select elements. ``0`` means that we are - selecting rows, ``1`` means that we are selecting columns. - For `SeriesGroupBy` this parameter is unused and defaults to 0. - - .. deprecated:: 2.1.0 - For axis=1, operate on the underlying object instead. Otherwise - the axis keyword is not necessary. **kwargs For compatibility with :meth:`numpy.take`. Has no effect on the @@ -1060,12 +947,11 @@ def take( 1 monkey Name: name, dtype: object """ - result = self._op_via_apply("take", indices=indices, axis=axis, **kwargs) + result = self._op_via_apply("take", indices=indices, axis=0, **kwargs) return result def skew( self, - axis: Axis | lib.NoDefault = lib.no_default, skipna: bool = True, numeric_only: bool = False, **kwargs, @@ -1077,14 +963,6 @@ def skew( Parameters ---------- - axis : {0 or 'index', 1 or 'columns', None}, default 0 - Axis for the function to be applied on. - This parameter is only for compatibility with DataFrame and is unused. - - .. deprecated:: 2.1.0 - For axis=1, operate on the underlying object instead. Otherwise - the axis keyword is not necessary. - skipna : bool, default True Exclude NA/null values when computing the result. @@ -1126,18 +1004,6 @@ def skew( Parrot 1.457863 Name: Max Speed, dtype: float64 """ - if axis is lib.no_default: - axis = 0 - - if axis != 0: - result = self._op_via_apply( - "skew", - axis=axis, - skipna=skipna, - numeric_only=numeric_only, - **kwargs, - ) - return result def alt(obj): # This should not be reached since the cython path should raise @@ -1177,16 +1043,12 @@ def nsmallest( return result @doc(Series.idxmin.__doc__) - def idxmin( - self, axis: Axis | lib.NoDefault = lib.no_default, skipna: bool = True - ) -> Series: - return self._idxmax_idxmin("idxmin", axis=axis, skipna=skipna) + def idxmin(self, skipna: bool = True) -> Series: + return self._idxmax_idxmin("idxmin", skipna=skipna) @doc(Series.idxmax.__doc__) - def idxmax( - self, axis: Axis | lib.NoDefault = lib.no_default, skipna: bool = True - ) -> Series: - return self._idxmax_idxmin("idxmax", axis=axis, skipna=skipna) + def idxmax(self, skipna: bool = True) -> Series: + return self._idxmax_idxmin("idxmax", skipna=skipna) @doc(Series.corr.__doc__) def corr( @@ -2101,27 +1963,16 @@ def nunique(self, dropna: bool = True) -> DataFrame: def idxmax( self, - axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool = True, numeric_only: bool = False, ) -> DataFrame: """ - Return index of first occurrence of maximum over requested axis. + Return index of first occurrence of maximum in each group. NA/null values are excluded. Parameters ---------- - axis : {{0 or 'index', 1 or 'columns'}}, default None - The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise. - If axis is not provided, grouper's axis is used. - - .. versionchanged:: 2.0.0 - - .. deprecated:: 2.1.0 - For axis=1, operate on the underlying object instead. Otherwise - the axis keyword is not necessary. - skipna : bool, default True Exclude NA/null values. If an entire row/column is NA, the result will be NA. @@ -2133,7 +1984,7 @@ def idxmax( Returns ------- Series - Indexes of maxima along the specified axis. + Indexes of maxima in each group. Raises ------ @@ -2168,42 +2019,21 @@ def idxmax( consumption Wheat Products co2_emissions Beef dtype: object - - To return the index for the maximum value in each row, use ``axis="columns"``. - - >>> df.idxmax(axis="columns") - Pork co2_emissions - Wheat Products consumption - Beef co2_emissions - dtype: object """ - return self._idxmax_idxmin( - "idxmax", axis=axis, numeric_only=numeric_only, skipna=skipna - ) + return self._idxmax_idxmin("idxmax", numeric_only=numeric_only, skipna=skipna) def idxmin( self, - axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool = True, numeric_only: bool = False, ) -> DataFrame: """ - Return index of first occurrence of minimum over requested axis. + Return index of first occurrence of minimum in each group. NA/null values are excluded. Parameters ---------- - axis : {{0 or 'index', 1 or 'columns'}}, default None - The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise. - If axis is not provided, grouper's axis is used. - - .. versionchanged:: 2.0.0 - - .. deprecated:: 2.1.0 - For axis=1, operate on the underlying object instead. Otherwise - the axis keyword is not necessary. - skipna : bool, default True Exclude NA/null values. If an entire row/column is NA, the result will be NA. @@ -2215,7 +2045,7 @@ def idxmin( Returns ------- Series - Indexes of minima along the specified axis. + Indexes of minima in each group. Raises ------ @@ -2250,18 +2080,8 @@ def idxmin( consumption Pork co2_emissions Wheat Products dtype: object - - To return the index for the minimum value in each row, use ``axis="columns"``. - - >>> df.idxmin(axis="columns") - Pork consumption - Wheat Products co2_emissions - Beef consumption - dtype: object """ - return self._idxmax_idxmin( - "idxmin", axis=axis, numeric_only=numeric_only, skipna=skipna - ) + return self._idxmax_idxmin("idxmin", numeric_only=numeric_only, skipna=skipna) boxplot = boxplot_frame_groupby @@ -2378,156 +2198,9 @@ def value_counts( """ return self._value_counts(subset, normalize, sort, ascending, dropna) - def fillna( - self, - value: Hashable | Mapping | Series | DataFrame | None = None, - method: FillnaOptions | None = None, - axis: Axis | None | lib.NoDefault = lib.no_default, - inplace: bool = False, - limit: int | None = None, - downcast=lib.no_default, - ) -> DataFrame | None: - """ - Fill NA/NaN values using the specified method within groups. - - .. deprecated:: 2.2.0 - This method is deprecated and will be removed in a future version. - Use the :meth:`.DataFrameGroupBy.ffill` or :meth:`.DataFrameGroupBy.bfill` - for forward or backward filling instead. If you want to fill with a - single value, use :meth:`DataFrame.fillna` instead. - - Parameters - ---------- - value : scalar, dict, Series, or DataFrame - Value to use to fill holes (e.g. 0), alternately a - dict/Series/DataFrame of values specifying which value to use for - each index (for a Series) or column (for a DataFrame). Values not - in the dict/Series/DataFrame will not be filled. This value cannot - be a list. Users wanting to use the ``value`` argument and not ``method`` - should prefer :meth:`.DataFrame.fillna` as this - will produce the same result and be more performant. - method : {{'bfill', 'ffill', None}}, default None - Method to use for filling holes. ``'ffill'`` will propagate - the last valid observation forward within a group. - ``'bfill'`` will use next valid observation to fill the gap. - axis : {0 or 'index', 1 or 'columns'} - Axis along which to fill missing values. When the :class:`DataFrameGroupBy` - ``axis`` argument is ``0``, using ``axis=1`` here will produce - the same results as :meth:`.DataFrame.fillna`. When the - :class:`DataFrameGroupBy` ``axis`` argument is ``1``, using ``axis=0`` - or ``axis=1`` here will produce the same results. - inplace : bool, default False - Broken. Do not set to True. - limit : int, default None - If method is specified, this is the maximum number of consecutive - NaN values to forward/backward fill within a group. In other words, - if there is a gap with more than this number of consecutive NaNs, - it will only be partially filled. If method is not specified, this is the - maximum number of entries along the entire axis where NaNs will be - filled. Must be greater than 0 if not None. - downcast : dict, default is None - A dict of item->dtype of what to downcast if possible, - or the string 'infer' which will try to downcast to an appropriate - equal type (e.g. float64 to int64 if possible). - - Returns - ------- - DataFrame - Object with missing values filled. - - See Also - -------- - ffill : Forward fill values within a group. - bfill : Backward fill values within a group. - - Examples - -------- - >>> df = pd.DataFrame( - ... { - ... "key": [0, 0, 1, 1, 1], - ... "A": [np.nan, 2, np.nan, 3, np.nan], - ... "B": [2, 3, np.nan, np.nan, np.nan], - ... "C": [np.nan, np.nan, 2, np.nan, np.nan], - ... } - ... ) - >>> df - key A B C - 0 0 NaN 2.0 NaN - 1 0 2.0 3.0 NaN - 2 1 NaN NaN 2.0 - 3 1 3.0 NaN NaN - 4 1 NaN NaN NaN - - Propagate non-null values forward or backward within each group along columns. - - >>> df.groupby("key").fillna(method="ffill") - A B C - 0 NaN 2.0 NaN - 1 2.0 3.0 NaN - 2 NaN NaN 2.0 - 3 3.0 NaN 2.0 - 4 3.0 NaN 2.0 - - >>> df.groupby("key").fillna(method="bfill") - A B C - 0 2.0 2.0 NaN - 1 2.0 3.0 NaN - 2 3.0 NaN 2.0 - 3 3.0 NaN NaN - 4 NaN NaN NaN - - Propagate non-null values forward or backward within each group along rows. - - >>> df.T.groupby(np.array([0, 0, 1, 1])).fillna(method="ffill").T - key A B C - 0 0.0 0.0 2.0 2.0 - 1 0.0 2.0 3.0 3.0 - 2 1.0 1.0 NaN 2.0 - 3 1.0 3.0 NaN NaN - 4 1.0 1.0 NaN NaN - - >>> df.T.groupby(np.array([0, 0, 1, 1])).fillna(method="bfill").T - key A B C - 0 0.0 NaN 2.0 NaN - 1 0.0 2.0 3.0 NaN - 2 1.0 NaN 2.0 2.0 - 3 1.0 3.0 NaN NaN - 4 1.0 NaN NaN NaN - - Only replace the first NaN element within a group along rows. - - >>> df.groupby("key").fillna(method="ffill", limit=1) - A B C - 0 NaN 2.0 NaN - 1 2.0 3.0 NaN - 2 NaN NaN 2.0 - 3 3.0 NaN 2.0 - 4 3.0 NaN NaN - """ - warnings.warn( - f"{type(self).__name__}.fillna is deprecated and " - "will be removed in a future version. Use obj.ffill() or obj.bfill() " - "for forward or backward filling instead. If you want to fill with a " - f"single value, use {type(self.obj).__name__}.fillna instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - - result = self._op_via_apply( - "fillna", - value=value, - method=method, - axis=axis, - inplace=inplace, - limit=limit, - downcast=downcast, - ) - return result - def take( self, indices: TakeIndexer, - axis: Axis | None | lib.NoDefault = lib.no_default, **kwargs, ) -> DataFrame: """ @@ -2545,13 +2218,6 @@ def take( ---------- indices : array-like An array of ints indicating which positions to take. - axis : {0 or 'index', 1 or 'columns', None}, default 0 - The axis on which to select elements. ``0`` means that we are - selecting rows, ``1`` means that we are selecting columns. - - .. deprecated:: 2.1.0 - For axis=1, operate on the underlying object instead. Otherwise - the axis keyword is not necessary. **kwargs For compatibility with :meth:`numpy.take`. Has no effect on the @@ -2610,8 +2276,6 @@ def take( 2 1 monkey mammal NaN 2 lion mammal 80.5 - Take elements at indices 1 and 2 along the axis 1 (column selection). - We may take elements using negative integers for positive indices, starting from the end of the object, just like with Python lists. @@ -2622,12 +2286,11 @@ def take( 2 0 rabbit mammal 15.0 1 monkey mammal NaN """ - result = self._op_via_apply("take", indices=indices, axis=axis, **kwargs) + result = self._op_via_apply("take", indices=indices, axis=0, **kwargs) return result def skew( self, - axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool = True, numeric_only: bool = False, **kwargs, @@ -2639,17 +2302,6 @@ def skew( Parameters ---------- - axis : {0 or 'index', 1 or 'columns', None}, default 0 - Axis for the function to be applied on. - - Specifying ``axis=None`` will apply the aggregation across both axes. - - .. versionadded:: 2.0.0 - - .. deprecated:: 2.1.0 - For axis=1, operate on the underlying object instead. Otherwise - the axis keyword is not necessary. - skipna : bool, default True Exclude NA/null values when computing the result. @@ -2699,18 +2351,6 @@ def skew( bird NaN mammal 1.669046 """ - if axis is lib.no_default: - axis = 0 - - if axis != 0: - result = self._op_via_apply( - "skew", - axis=axis, - skipna=skipna, - numeric_only=numeric_only, - **kwargs, - ) - return result def alt(obj): # This should not be reached since the cython path should raise @@ -2812,7 +2452,6 @@ def dtypes(self) -> Series: def corrwith( self, other: DataFrame | Series, - axis: Axis | lib.NoDefault = lib.no_default, drop: bool = False, method: CorrelationMethod = "pearson", numeric_only: bool = False, @@ -2820,7 +2459,6 @@ def corrwith( result = self._op_via_apply( "corrwith", other=other, - axis=axis, drop=drop, method=method, numeric_only=numeric_only, diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 1ca2b4ff511ca..1f0e0567446c6 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1399,25 +1399,10 @@ def _op_via_apply(self, name: str, *args, **kwargs): f = getattr(type(self._obj_with_exclusions), name) sig = inspect.signature(f) - if "axis" in kwargs and kwargs["axis"] is not lib.no_default: - axis = self.obj._get_axis_number(kwargs["axis"]) - self._deprecate_axis(axis, name) - elif "axis" in kwargs: - # exclude skew here because that was already defaulting to lib.no_default - # before this deprecation was instituted - if name == "skew": - pass - elif name == "fillna": - # maintain the behavior from before the deprecation - kwargs["axis"] = None - else: - kwargs["axis"] = 0 - # a little trickery for aggregation functions that need an axis # argument if "axis" in sig.parameters: - if kwargs.get("axis", None) is None or kwargs.get("axis") is lib.no_default: - kwargs["axis"] = self.axis + kwargs["axis"] = self.axis def curried(x): return f(x, *args, **kwargs) @@ -4721,7 +4706,6 @@ def rank( ascending: bool = True, na_option: str = "keep", pct: bool = False, - axis: AxisInt | lib.NoDefault = lib.no_default, ) -> NDFrameT: """ Provide the rank of values within each group. @@ -4742,12 +4726,6 @@ def rank( * bottom: smallest rank if descending. pct : bool, default False Compute percentage rank of data within each group. - axis : int, default 0 - The axis of the object over which to compute the rank. - - .. deprecated:: 2.1.0 - For axis=1, operate on the underlying object instead. Otherwise - the axis keyword is not necessary. Returns ------- @@ -4792,40 +4770,24 @@ def rank( msg = "na_option must be one of 'keep', 'top', or 'bottom'" raise ValueError(msg) - if axis is not lib.no_default: - axis = self.obj._get_axis_number(axis) - self._deprecate_axis(axis, "rank") - else: - axis = 0 - kwargs = { "ties_method": method, "ascending": ascending, "na_option": na_option, "pct": pct, } - if axis != 0: - # DataFrame uses different keyword name - kwargs["method"] = kwargs.pop("ties_method") - f = lambda x: x.rank(axis=axis, numeric_only=False, **kwargs) - result = self._python_apply_general( - f, self._selected_obj, is_transform=True - ) - return result return self._cython_transform( "rank", numeric_only=False, - axis=axis, + axis=0, **kwargs, ) @final @Substitution(name="groupby") @Substitution(see_also=_common_see_also) - def cumprod( - self, axis: Axis | lib.NoDefault = lib.no_default, *args, **kwargs - ) -> NDFrameT: + def cumprod(self, *args, **kwargs) -> NDFrameT: """ Cumulative product for each group. @@ -4869,24 +4831,12 @@ def cumprod( bull 6 9 """ nv.validate_groupby_func("cumprod", args, kwargs, ["numeric_only", "skipna"]) - if axis is not lib.no_default: - axis = self.obj._get_axis_number(axis) - self._deprecate_axis(axis, "cumprod") - else: - axis = 0 - - if axis != 0: - f = lambda x: x.cumprod(axis=axis, **kwargs) - return self._python_apply_general(f, self._selected_obj, is_transform=True) - return self._cython_transform("cumprod", **kwargs) @final @Substitution(name="groupby") @Substitution(see_also=_common_see_also) - def cumsum( - self, axis: Axis | lib.NoDefault = lib.no_default, *args, **kwargs - ) -> NDFrameT: + def cumsum(self, *args, **kwargs) -> NDFrameT: """ Cumulative sum for each group. @@ -4930,16 +4880,6 @@ def cumsum( lion 6 9 """ nv.validate_groupby_func("cumsum", args, kwargs, ["numeric_only", "skipna"]) - if axis is not lib.no_default: - axis = self.obj._get_axis_number(axis) - self._deprecate_axis(axis, "cumsum") - else: - axis = 0 - - if axis != 0: - f = lambda x: x.cumsum(axis=axis, **kwargs) - return self._python_apply_general(f, self._selected_obj, is_transform=True) - return self._cython_transform("cumsum", **kwargs) @final @@ -4947,7 +4887,6 @@ def cumsum( @Substitution(see_also=_common_see_also) def cummin( self, - axis: AxisInt | lib.NoDefault = lib.no_default, numeric_only: bool = False, **kwargs, ) -> NDFrameT: @@ -5000,19 +4939,6 @@ def cummin( turtle 6 9 """ skipna = kwargs.get("skipna", True) - if axis is not lib.no_default: - axis = self.obj._get_axis_number(axis) - self._deprecate_axis(axis, "cummin") - else: - axis = 0 - - if axis != 0: - f = lambda x: np.minimum.accumulate(x, axis) - obj = self._selected_obj - if numeric_only: - obj = obj._get_numeric_data() - return self._python_apply_general(f, obj, is_transform=True) - return self._cython_transform( "cummin", numeric_only=numeric_only, skipna=skipna ) @@ -5022,7 +4948,6 @@ def cummin( @Substitution(see_also=_common_see_also) def cummax( self, - axis: AxisInt | lib.NoDefault = lib.no_default, numeric_only: bool = False, **kwargs, ) -> NDFrameT: @@ -5075,19 +5000,6 @@ def cummax( bull 6 9 """ skipna = kwargs.get("skipna", True) - if axis is not lib.no_default: - axis = self.obj._get_axis_number(axis) - self._deprecate_axis(axis, "cummax") - else: - axis = 0 - - if axis != 0: - f = lambda x: np.maximum.accumulate(x, axis) - obj = self._selected_obj - if numeric_only: - obj = obj._get_numeric_data() - return self._python_apply_general(f, obj, is_transform=True) - return self._cython_transform( "cummax", numeric_only=numeric_only, skipna=skipna ) @@ -5098,7 +5010,6 @@ def shift( self, periods: int | Sequence[int] = 1, freq=None, - axis: Axis | lib.NoDefault = lib.no_default, fill_value=lib.no_default, suffix: str | None = None, ): @@ -5114,13 +5025,6 @@ def shift( each period. freq : str, optional Frequency string. - axis : axis to shift, default 0 - Shift direction. - - .. deprecated:: 2.1.0 - For axis=1, operate on the underlying object instead. Otherwise - the axis keyword is not necessary. - fill_value : optional The scalar value to use for newly introduced missing values. @@ -5178,17 +5082,7 @@ def shift( catfish NaN NaN goldfish 5.0 8.0 """ - if axis is not lib.no_default: - axis = self.obj._get_axis_number(axis) - self._deprecate_axis(axis, "shift") - else: - axis = 0 - if is_list_like(periods): - if axis == 1: - raise ValueError( - "If `periods` contains multiple shifts, `axis` cannot be 1." - ) periods = cast(Sequence, periods) if len(periods) == 0: raise ValueError("If `periods` is an iterable, it cannot be empty.") @@ -5212,11 +5106,11 @@ def shift( f"Periods must be integer, but {period} is {type(period)}." ) period = cast(int, period) - if freq is not None or axis != 0: + if freq is not None: f = lambda x: x.shift( period, # pylint: disable=cell-var-from-loop freq, - axis, + 0, # axis fill_value, ) shifted = self._python_apply_general( @@ -5256,7 +5150,8 @@ def shift( @Substitution(name="groupby") @Substitution(see_also=_common_see_also) def diff( - self, periods: int = 1, axis: AxisInt | lib.NoDefault = lib.no_default + self, + periods: int = 1, ) -> NDFrameT: """ First discrete difference of element. @@ -5268,12 +5163,6 @@ def diff( ---------- periods : int, default 1 Periods to shift for calculating difference, accepts negative values. - axis : axis to shift, default 0 - Take difference over rows (0) or columns (1). - - .. deprecated:: 2.1.0 - For axis=1, operate on the underlying object instead. Otherwise - the axis keyword is not necessary. Returns ------- @@ -5327,15 +5216,6 @@ def diff( mouse 1.0 -2.0 mouse -5.0 -1.0 """ - if axis is not lib.no_default: - axis = self.obj._get_axis_number(axis) - self._deprecate_axis(axis, "diff") - else: - axis = 0 - - if axis != 0: - return self.apply(lambda x: x.diff(periods=periods, axis=axis)) - obj = self._obj_with_exclusions shifted = self.shift(periods=periods) @@ -5361,7 +5241,6 @@ def pct_change( fill_method: FillnaOptions | None | lib.NoDefault = lib.no_default, limit: int | None | lib.NoDefault = lib.no_default, freq=None, - axis: Axis | lib.NoDefault = lib.no_default, ): """ Calculate pct_change of each value to previous entry in group. @@ -5437,21 +5316,15 @@ def pct_change( if limit is lib.no_default: limit = None - if axis is not lib.no_default: - axis = self.obj._get_axis_number(axis) - self._deprecate_axis(axis, "pct_change") - else: - axis = 0 - # TODO(GH#23918): Remove this conditional for SeriesGroupBy when # GH#23918 is fixed - if freq is not None or axis != 0: + if freq is not None: f = lambda x: x.pct_change( periods=periods, fill_method=fill_method, limit=limit, freq=freq, - axis=axis, + axis=0, ) return self._python_apply_general(f, self._selected_obj, is_transform=True) @@ -5803,7 +5676,6 @@ def _idxmax_idxmin( self, how: Literal["idxmax", "idxmin"], ignore_unobserved: bool = False, - axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool = True, numeric_only: bool = False, ) -> NDFrameT: @@ -5813,9 +5685,6 @@ def _idxmax_idxmin( ---------- how : {'idxmin', 'idxmax'} Whether to compute idxmin or idxmax. - axis : {{0 or 'index', 1 or 'columns'}}, default None - The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise. - If axis is not provided, grouper's axis is used. numeric_only : bool, default False Include only float, int, boolean columns. skipna : bool, default True @@ -5830,14 +5699,6 @@ def _idxmax_idxmin( Series or DataFrame idxmax or idxmin for the groupby operation. """ - if axis is not lib.no_default: - if axis is None: - axis = self.axis - axis = self.obj._get_axis_number(axis) - self._deprecate_axis(axis, how) - else: - axis = self.axis - if not self.observed and any( ping._passed_categorical for ping in self._grouper.groupings ): @@ -5877,27 +5738,6 @@ def _idxmax_idxmin( stacklevel=find_stack_level(), ) - if axis == 1: - try: - - def func(df): - method = getattr(df, how) - return method(axis=axis, skipna=skipna, numeric_only=numeric_only) - - func.__name__ = how - result = self._python_apply_general( - func, self._obj_with_exclusions, not_indexed_same=True - ) - except ValueError as err: - name = "argmax" if how == "idxmax" else "argmin" - if f"attempt to get {name} of an empty sequence" in str(err): - raise ValueError( - f"Can't get {how} of an empty group due to unobserved " - "categories. Specify observed=True in groupby instead." - ) from None - raise - return result - result = self._agg_general( numeric_only=numeric_only, min_count=1, diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 86f03b04fddb3..0e86f95a93091 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -263,18 +263,9 @@ def test_std_masked_dtype(any_numeric_ea_dtype): def test_agg_str_with_kwarg_axis_1_raises(df, reduction_func): gb = df.groupby(level=0) - warn_msg = f"DataFrameGroupBy.{reduction_func} with axis=1 is deprecated" - if reduction_func in ("idxmax", "idxmin"): - error = TypeError - msg = "'[<>]' not supported between instances of 'float' and 'str'" - warn = FutureWarning - else: - error = ValueError - msg = f"Operation {reduction_func} does not support axis=1" - warn = None - with pytest.raises(error, match=msg): - with tm.assert_produces_warning(warn, match=warn_msg): - gb.agg(reduction_func, axis=1) + msg = f"Operation {reduction_func} does not support axis=1" + with pytest.raises(ValueError, match=msg): + gb.agg(reduction_func, axis=1) @pytest.mark.parametrize( diff --git a/pandas/tests/groupby/methods/test_corrwith.py b/pandas/tests/groupby/methods/test_corrwith.py deleted file mode 100644 index 53e8bdc4534dc..0000000000000 --- a/pandas/tests/groupby/methods/test_corrwith.py +++ /dev/null @@ -1,24 +0,0 @@ -import numpy as np - -from pandas import ( - DataFrame, - Index, - Series, -) -import pandas._testing as tm - - -def test_corrwith_with_1_axis(): - # GH 47723 - df = DataFrame({"a": [1, 1, 2], "b": [3, 7, 4]}) - gb = df.groupby("a") - - msg = "DataFrameGroupBy.corrwith with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = gb.corrwith(df, axis=1) - index = Index( - data=[(1, 0), (1, 1), (1, 2), (2, 2), (2, 0), (2, 1)], - name=("a", None), - ) - expected = Series([np.nan] * 6, index=index) - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/methods/test_rank.py b/pandas/tests/groupby/methods/test_rank.py index 18869033d05c6..1fb878cbc1111 100644 --- a/pandas/tests/groupby/methods/test_rank.py +++ b/pandas/tests/groupby/methods/test_rank.py @@ -605,80 +605,6 @@ def test_rank_pct_equal_values_on_group_transition(use_nan): tm.assert_series_equal(result, expected) -def test_rank_multiindex(): - # GH27721 - df = concat( - { - "a": DataFrame({"col1": [3, 4], "col2": [1, 2]}), - "b": DataFrame({"col3": [5, 6], "col4": [7, 8]}), - }, - axis=1, - ) - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby(level=0, axis=1) - msg = "DataFrameGroupBy.rank with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = gb.rank(axis=1) - - expected = concat( - [ - df["a"].rank(axis=1), - df["b"].rank(axis=1), - ], - axis=1, - keys=["a", "b"], - ) - tm.assert_frame_equal(result, expected) - - -def test_groupby_axis0_rank_axis1(): - # GH#41320 - df = DataFrame( - {0: [1, 3, 5, 7], 1: [2, 4, 6, 8], 2: [1.5, 3.5, 5.5, 7.5]}, - index=["a", "a", "b", "b"], - ) - msg = "The 'axis' keyword in DataFrame.groupby is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby(level=0, axis=0) - - msg = "DataFrameGroupBy.rank with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - res = gb.rank(axis=1) - - # This should match what we get when "manually" operating group-by-group - expected = concat([df.loc["a"].rank(axis=1), df.loc["b"].rank(axis=1)], axis=0) - tm.assert_frame_equal(res, expected) - - # check that we haven't accidentally written a case that coincidentally - # matches rank(axis=0) - msg = "The 'axis' keyword in DataFrameGroupBy.rank" - with tm.assert_produces_warning(FutureWarning, match=msg): - alt = gb.rank(axis=0) - assert not alt.equals(expected) - - -def test_groupby_axis0_cummax_axis1(): - # case where groupby axis is 0 and axis keyword in transform is 1 - - # df has mixed dtype -> multiple blocks - df = DataFrame( - {0: [1, 3, 5, 7], 1: [2, 4, 6, 8], 2: [1.5, 3.5, 5.5, 7.5]}, - index=["a", "a", "b", "b"], - ) - msg = "The 'axis' keyword in DataFrame.groupby is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby(level=0, axis=0) - - msg = "DataFrameGroupBy.cummax with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - cmax = gb.cummax(axis=1) - expected = df[[0, 1]].astype(np.float64) - expected[2] = expected[1] - tm.assert_frame_equal(cmax, expected) - - def test_non_unique_index(): # GH 16577 df = DataFrame( diff --git a/pandas/tests/groupby/test_api.py b/pandas/tests/groupby/test_api.py index 5c5982954de2f..24a02bb6b2b91 100644 --- a/pandas/tests/groupby/test_api.py +++ b/pandas/tests/groupby/test_api.py @@ -68,7 +68,6 @@ def test_tab_completion(multiindex_dataframe_random_data): "tail", "resample", "cummin", - "fillna", "cumsum", "cumcount", "ngroup", @@ -191,11 +190,12 @@ def test_frame_consistency(groupby_func): exclude_expected = {"skipna"} elif groupby_func in ("pct_change",): exclude_expected = {"kwargs"} - exclude_result = {"axis"} elif groupby_func in ("rank",): exclude_expected = {"numeric_only"} elif groupby_func in ("quantile",): exclude_expected = {"method", "axis"} + if groupby_func not in ["pct_change", "size"]: + exclude_expected |= {"axis"} # Ensure excluded arguments are actually in the signatures assert result & exclude_result == exclude_result @@ -229,8 +229,6 @@ def test_series_consistency(request, groupby_func): exclude_expected, exclude_result = set(), set() if groupby_func in ("any", "all"): exclude_expected = {"kwargs", "bool_only", "axis"} - elif groupby_func in ("diff",): - exclude_result = {"axis"} elif groupby_func in ("max", "min"): exclude_expected = {"axis", "kwargs", "skipna"} exclude_result = {"min_count", "engine", "engine_kwargs"} @@ -248,13 +246,21 @@ def test_series_consistency(request, groupby_func): exclude_expected = {"skipna"} elif groupby_func in ("pct_change",): exclude_expected = {"kwargs"} - exclude_result = {"axis"} elif groupby_func in ("rank",): exclude_expected = {"numeric_only"} elif groupby_func in ("idxmin", "idxmax"): exclude_expected = {"args", "kwargs"} elif groupby_func in ("quantile",): exclude_result = {"numeric_only"} + if groupby_func not in [ + "diff", + "pct_change", + "count", + "nunique", + "quantile", + "size", + ]: + exclude_expected |= {"axis"} # Ensure excluded arguments are actually in the signatures assert result & exclude_result == exclude_result diff --git a/pandas/tests/groupby/test_cumulative.py b/pandas/tests/groupby/test_cumulative.py index 1bdbef6d50c4c..28dcb38d173f2 100644 --- a/pandas/tests/groupby/test_cumulative.py +++ b/pandas/tests/groupby/test_cumulative.py @@ -304,16 +304,3 @@ def test_cython_api2(): # GH 5755 - cumsum is a transformer and should ignore as_index result = df.groupby("A", as_index=False).cumsum() tm.assert_frame_equal(result, expected) - - # GH 13994 - msg = "DataFrameGroupBy.cumsum with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby("A").cumsum(axis=1) - expected = df.cumsum(axis=1) - tm.assert_frame_equal(result, expected) - - msg = "DataFrameGroupBy.cumprod with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby("A").cumprod(axis=1) - expected = df.cumprod(axis=1) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 7f7d9544c3891..399cebb0d3706 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2906,9 +2906,6 @@ def test_groupby_selection_other_methods(df): g_exp = df[["C"]].groupby(df["A"]) # methods which aren't just .foo() - warn_msg = "DataFrameGroupBy.fillna is deprecated" - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - tm.assert_frame_equal(g.fillna(0), g_exp.fillna(0)) msg = "DataFrameGroupBy.dtypes is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): tm.assert_frame_equal(g.dtypes, g_exp.dtypes) diff --git a/pandas/tests/groupby/test_missing.py b/pandas/tests/groupby/test_missing.py index 6c029c10817d9..2b590c50371e9 100644 --- a/pandas/tests/groupby/test_missing.py +++ b/pandas/tests/groupby/test_missing.py @@ -5,7 +5,6 @@ from pandas import ( DataFrame, Index, - date_range, ) import pandas._testing as tm @@ -36,78 +35,6 @@ def test_groupby_fill_duplicate_column_names(func): tm.assert_frame_equal(result, expected) -def test_ffill_missing_arguments(): - # GH 14955 - df = DataFrame({"a": [1, 2], "b": [1, 1]}) - msg = "DataFrameGroupBy.fillna is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - with pytest.raises(ValueError, match="Must specify a fill"): - df.groupby("b").fillna() - - -@pytest.mark.parametrize( - "method, expected", [("ffill", [None, "a", "a"]), ("bfill", ["a", "a", None])] -) -def test_fillna_with_string_dtype(method, expected): - # GH 40250 - df = DataFrame({"a": pd.array([None, "a", None], dtype="string"), "b": [0, 0, 0]}) - grp = df.groupby("b") - msg = "DataFrameGroupBy.fillna is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = grp.fillna(method=method) - expected = DataFrame({"a": pd.array(expected, dtype="string")}) - tm.assert_frame_equal(result, expected) - - -def test_fill_consistency(): - # GH9221 - # pass thru keyword arguments to the generated wrapper - # are set if the passed kw is None (only) - df = DataFrame( - index=pd.MultiIndex.from_product( - [["value1", "value2"], date_range("2014-01-01", "2014-01-06")] - ), - columns=Index(["1", "2"], name="id"), - ) - df["1"] = [ - np.nan, - 1, - np.nan, - np.nan, - 11, - np.nan, - np.nan, - 2, - np.nan, - np.nan, - 22, - np.nan, - ] - df["2"] = [ - np.nan, - 3, - np.nan, - np.nan, - 33, - np.nan, - np.nan, - 4, - np.nan, - np.nan, - 44, - np.nan, - ] - - msg = "The 'axis' keyword in DataFrame.groupby is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = df.groupby(level=0, axis=0).fillna(method="ffill") - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.T.groupby(level=0, axis=1).fillna(method="ffill").T - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize("method", ["ffill", "bfill"]) @pytest.mark.parametrize("has_nan_group", [True, False]) def test_ffill_handles_nan_groups(dropna, method, has_nan_group): diff --git a/pandas/tests/groupby/test_numeric_only.py b/pandas/tests/groupby/test_numeric_only.py index ff4685b1e412d..1b435fd55d05e 100644 --- a/pandas/tests/groupby/test_numeric_only.py +++ b/pandas/tests/groupby/test_numeric_only.py @@ -1,6 +1,5 @@ import re -import numpy as np import pytest from pandas._libs import lib @@ -207,94 +206,6 @@ def _check(self, df, method, expected_columns, expected_columns_numeric): tm.assert_index_equal(result.columns, expected_columns) -@pytest.mark.parametrize("numeric_only", [True, False, None]) -def test_axis1_numeric_only(request, groupby_func, numeric_only, using_infer_string): - if groupby_func in ("idxmax", "idxmin"): - pytest.skip("idxmax and idx_min tested in test_idxmin_idxmax_axis1") - if groupby_func in ("corrwith", "skew"): - msg = "GH#47723 groupby.corrwith and skew do not correctly implement axis=1" - request.applymarker(pytest.mark.xfail(reason=msg)) - - df = DataFrame( - np.random.default_rng(2).standard_normal((10, 4)), columns=["A", "B", "C", "D"] - ) - df["E"] = "x" - groups = [1, 2, 3, 1, 2, 3, 1, 2, 3, 4] - gb = df.groupby(groups) - method = getattr(gb, groupby_func) - args = get_groupby_method_args(groupby_func, df) - kwargs = {"axis": 1} - if numeric_only is not None: - # when numeric_only is None we don't pass any argument - kwargs["numeric_only"] = numeric_only - - # Functions without numeric_only and axis args - no_args = ("cumprod", "cumsum", "diff", "fillna", "pct_change", "rank", "shift") - # Functions with axis args - has_axis = ( - "cumprod", - "cumsum", - "diff", - "pct_change", - "rank", - "shift", - "cummax", - "cummin", - "idxmin", - "idxmax", - "fillna", - ) - warn_msg = f"DataFrameGroupBy.{groupby_func} with axis=1 is deprecated" - if numeric_only is not None and groupby_func in no_args: - msg = "got an unexpected keyword argument 'numeric_only'" - if groupby_func in ["cumprod", "cumsum"]: - with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - method(*args, **kwargs) - else: - with pytest.raises(TypeError, match=msg): - method(*args, **kwargs) - elif groupby_func not in has_axis: - msg = "got an unexpected keyword argument 'axis'" - with pytest.raises(TypeError, match=msg): - method(*args, **kwargs) - # fillna and shift are successful even on object dtypes - elif (numeric_only is None or not numeric_only) and groupby_func not in ( - "fillna", - "shift", - ): - msgs = ( - # cummax, cummin, rank - "not supported between instances of", - # cumprod - "can't multiply sequence by non-int of type 'float'", - # cumsum, diff, pct_change - "unsupported operand type", - "has no kernel", - ) - if using_infer_string: - import pyarrow as pa - - errs = (TypeError, pa.lib.ArrowNotImplementedError) - else: - errs = TypeError - with pytest.raises(errs, match=f"({'|'.join(msgs)})"): - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - method(*args, **kwargs) - else: - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - result = method(*args, **kwargs) - - df_expected = df.drop(columns="E").T if numeric_only else df.T - expected = getattr(df_expected, groupby_func)(*args).T - if groupby_func == "shift" and not numeric_only: - # shift with axis=1 leaves the leftmost column as numeric - # but transposing for expected gives us object dtype - expected = expected.astype(float) - - tm.assert_equal(result, expected) - - @pytest.mark.parametrize( "kernel, has_arg", [ @@ -310,7 +221,6 @@ def test_axis1_numeric_only(request, groupby_func, numeric_only, using_infer_str ("cumsum", True), ("diff", False), ("ffill", False), - ("fillna", False), ("first", True), ("idxmax", True), ("idxmin", True), diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py index 738711019b5fd..ee5c61794e96d 100644 --- a/pandas/tests/groupby/test_raises.py +++ b/pandas/tests/groupby/test_raises.py @@ -698,13 +698,3 @@ def test_groupby_raises_category_on_category( else: warn_msg = "" _call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg) - - -def test_subsetting_columns_axis_1_raises(): - # GH 35443 - df = DataFrame({"a": [1], "b": [2], "c": [3]}) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby("a", axis=1) - with pytest.raises(ValueError, match="Cannot subset columns when using axis=1"): - gb["b"] diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py index bd188c729846c..08ce41edfb784 100644 --- a/pandas/tests/groupby/test_reductions.py +++ b/pandas/tests/groupby/test_reductions.py @@ -420,32 +420,6 @@ def test_first_last_skipna(any_real_nullable_dtype, sort, skipna, how): tm.assert_frame_equal(result, expected) -def test_idxmin_idxmax_axis1(): - df = DataFrame( - np.random.default_rng(2).standard_normal((10, 4)), columns=["A", "B", "C", "D"] - ) - df["A"] = [1, 2, 3, 1, 2, 3, 1, 2, 3, 4] - - gb = df.groupby("A") - - warn_msg = "DataFrameGroupBy.idxmax with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - res = gb.idxmax(axis=1) - - alt = df.iloc[:, 1:].idxmax(axis=1) - indexer = res.index.get_level_values(1) - - tm.assert_series_equal(alt[indexer], res.droplevel("A")) - - df["E"] = date_range("2016-01-01", periods=10) - gb2 = df.groupby("A") - - msg = "'>' not supported between instances of 'Timestamp' and 'float'" - with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - gb2.idxmax(axis=1) - - def test_groupby_mean_no_overflow(): # Regression test for (#22487) df = DataFrame( diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 40732b32f0111..3bccacf3dec6f 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -398,33 +398,12 @@ def test_dispatch_transform(tsframe): grouped = df.groupby(lambda x: x.month) - msg = "DataFrameGroupBy.fillna is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - filled = grouped.fillna(method="pad") - msg = "Series.fillna with 'method' is deprecated" - fillit = lambda x: x.fillna(method="pad") - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = df.groupby(lambda x: x.month).transform(fillit) + filled = grouped.ffill() + fillit = lambda x: x.ffill() + expected = df.groupby(lambda x: x.month).transform(fillit) tm.assert_frame_equal(filled, expected) -def test_transform_fillna_null(): - df = DataFrame( - { - "price": [10, 10, 20, 20, 30, 30], - "color": [10, 10, 20, 20, 30, 30], - "cost": (100, 200, 300, 400, 500, 600), - } - ) - msg = "DataFrameGroupBy.fillna is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - with pytest.raises(ValueError, match="Must specify a fill 'value' or 'method'"): - df.groupby(["price"]).transform("fillna") - with tm.assert_produces_warning(FutureWarning, match=msg): - with pytest.raises(ValueError, match="Must specify a fill 'value' or 'method'"): - df.groupby(["price"]).fillna() - - def test_transform_transformation_func(transformation_func): # GH 30918 df = DataFrame( @@ -1690,11 +1669,10 @@ def test_idxmin_idxmax_transform_args(how, skipna, numeric_only): # GH#55268 - ensure *args are passed through when calling transform df = DataFrame({"a": [1, 1, 1, 2], "b": [3.0, 4.0, np.nan, 6.0], "c": list("abcd")}) gb = df.groupby("a") - msg = f"'axis' keyword in DataFrameGroupBy.{how} is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = gb.transform(how, 0, skipna, numeric_only) warn = None if skipna else FutureWarning msg = f"The behavior of DataFrameGroupBy.{how} with .* any-NA and skipna=False" + with tm.assert_produces_warning(warn, match=msg): + result = gb.transform(how, skipna, numeric_only) with tm.assert_produces_warning(warn, match=msg): expected = gb.transform(how, skipna=skipna, numeric_only=numeric_only) tm.assert_frame_equal(result, expected)