From b4df373cb3e3969cc1dfd1005414b020fff2d964 Mon Sep 17 00:00:00 2001 From: richard Date: Sat, 27 Jan 2024 21:49:40 -0500 Subject: [PATCH 1/3] DEPR: Enforce deprecation of groupby(..., axis=1) --- doc/source/user_guide/groupby.rst | 2 +- doc/source/user_guide/window.rst | 6 +- doc/source/whatsnew/v3.0.0.rst | 2 + pandas/core/frame.py | 21 -- pandas/core/generic.py | 101 +-------- pandas/core/groupby/generic.py | 102 +++------ pandas/core/groupby/groupby.py | 204 +++++------------- pandas/core/groupby/grouper.py | 52 ++--- pandas/core/groupby/ops.py | 13 +- pandas/core/resample.py | 33 +-- pandas/core/reshape/merge.py | 2 +- pandas/core/reshape/pivot.py | 2 +- pandas/core/series.py | 3 - pandas/core/shared_docs.py | 9 - pandas/core/window/ewm.py | 24 +-- pandas/core/window/expanding.py | 12 +- pandas/core/window/numba_.py | 3 +- pandas/core/window/online.py | 7 +- pandas/core/window/rolling.py | 47 +--- pandas/plotting/_matplotlib/boxplot.py | 7 +- pandas/tests/apply/test_str.py | 30 +-- .../tests/groupby/aggregate/test_aggregate.py | 94 -------- pandas/tests/groupby/aggregate/test_cython.py | 15 -- pandas/tests/groupby/methods/test_describe.py | 12 -- pandas/tests/groupby/methods/test_nth.py | 73 ------- pandas/tests/groupby/methods/test_quantile.py | 26 --- pandas/tests/groupby/methods/test_size.py | 31 --- .../groupby/methods/test_value_counts.py | 8 - pandas/tests/groupby/test_apply.py | 59 ----- pandas/tests/groupby/test_apply_mutate.py | 63 ------ pandas/tests/groupby/test_categorical.py | 16 -- pandas/tests/groupby/test_filters.py | 13 -- pandas/tests/groupby/test_groupby.py | 163 +------------- pandas/tests/groupby/test_grouping.py | 61 +----- pandas/tests/groupby/test_indexing.py | 14 -- pandas/tests/groupby/test_numba.py | 7 - pandas/tests/groupby/test_reductions.py | 24 +-- pandas/tests/groupby/test_timegrouper.py | 2 +- .../tests/groupby/transform/test_transform.py | 112 +--------- pandas/tests/plotting/test_boxplot_method.py | 19 -- pandas/tests/resample/test_datetime_index.py | 35 --- pandas/tests/resample/test_resample_api.py | 55 +---- pandas/tests/resample/test_time_grouper.py | 4 +- pandas/tests/test_multilevel.py | 31 --- pandas/tests/window/test_api.py | 39 ---- pandas/tests/window/test_apply.py | 10 - pandas/tests/window/test_ewm.py | 50 ----- pandas/tests/window/test_expanding.py | 42 ++-- pandas/tests/window/test_numba.py | 39 ++-- pandas/tests/window/test_rolling.py | 163 +------------- pandas/tests/window/test_timeseries_window.py | 14 +- pandas/tests/window/test_win_type.py | 17 -- 52 files changed, 209 insertions(+), 1784 deletions(-) diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index a25e2ed179b80..2a4d7791322e5 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -247,7 +247,7 @@ GroupBy object attributes ~~~~~~~~~~~~~~~~~~~~~~~~~ The ``groups`` attribute is a dictionary whose keys are the computed unique groups -and corresponding values are the axis labels belonging to each group. In the +and corresponding values are the index labels belonging to each group. In the above example we have: .. ipython:: python diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst index d997aa119b359..e25c4c2441920 100644 --- a/doc/source/user_guide/window.rst +++ b/doc/source/user_guide/window.rst @@ -79,9 +79,9 @@ which will first group the data by the specified keys and then perform a windowi .. versionadded:: 1.3.0 Some windowing operations also support the ``method='table'`` option in the constructor which -performs the windowing operation over an entire :class:`DataFrame` instead of a single column or row at a time. -This can provide a useful performance benefit for a :class:`DataFrame` with many columns or rows -(with the corresponding ``axis`` argument) or the ability to utilize other columns during the windowing +performs the windowing operation over an entire :class:`DataFrame` instead of a single column at a time. +This can provide a useful performance benefit for a :class:`DataFrame` with many columns +or the ability to utilize other columns during the windowing operation. The ``method='table'`` option can only be used if ``engine='numba'`` is specified in the corresponding method call. diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4accf8be46b9e..528b1ffcd4a55 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -102,8 +102,10 @@ Deprecations Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Removed :meth:`DataFrameGroupby.fillna` and :meth:`SeriesGroupBy.fillna` (:issue:`55719`) +- Removed ``axis`` argument from :meth:`DataFrame.groupby` and :meth:`Series.groupby` (:issue:`51203`) - Removed ``axis`` argument from all groupby operations (:issue:`50405`) - Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`) +- .. --------------------------------------------------------------------------- .. _whatsnew_300.performance: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8a0783dab5214..2524d196265fb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9098,7 +9098,6 @@ def update( def groupby( self, by=None, - axis: Axis | lib.NoDefault = lib.no_default, level: IndexLabel | None = None, as_index: bool = True, sort: bool = True, @@ -9106,25 +9105,6 @@ def groupby( observed: bool | lib.NoDefault = lib.no_default, dropna: bool = True, ) -> DataFrameGroupBy: - if axis is not lib.no_default: - axis = self._get_axis_number(axis) - if axis == 1: - warnings.warn( - "DataFrame.groupby with axis=1 is deprecated. Do " - "`frame.T.groupby(...)` without axis instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - warnings.warn( - "The 'axis' keyword in DataFrame.groupby is deprecated and " - "will be removed in a future version.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - axis = 0 - from pandas.core.groupby.generic import DataFrameGroupBy if level is None and by is None: @@ -9133,7 +9113,6 @@ def groupby( return DataFrameGroupBy( obj=self, keys=by, - axis=axis, level=level, as_index=as_index, sort=sort, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9440ad67256e1..dd39b29b6134a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9399,7 +9399,6 @@ def between_time( def resample( self, rule, - axis: Axis | lib.NoDefault = lib.no_default, closed: Literal["right", "left"] | None = None, label: Literal["right", "left"] | None = None, convention: Literal["start", "end", "s", "e"] | lib.NoDefault = lib.no_default, @@ -9422,13 +9421,6 @@ def resample( ---------- rule : DateOffset, Timedelta or str The offset string or object representing target conversion. - axis : {{0 or 'index', 1 or 'columns'}}, default 0 - Which axis to use for up- or down-sampling. For `Series` this parameter - is unused and defaults to 0. Must be - `DatetimeIndex`, `TimedeltaIndex` or `PeriodIndex`. - - .. deprecated:: 2.0.0 - Use frame.T.resample(...) instead. closed : {{'right', 'left'}}, default None Which side of bin interval is closed. The default is 'left' for all frequency offsets except for 'ME', 'YE', 'QE', 'BME', @@ -9740,25 +9732,6 @@ def resample( """ from pandas.core.resample import get_resampler - if axis is not lib.no_default: - axis = self._get_axis_number(axis) - if axis == 1: - warnings.warn( - "DataFrame.resample with axis=1 is deprecated. Do " - "`frame.T.resample(...)` without axis instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - warnings.warn( - f"The 'axis' keyword in {type(self).__name__}.resample is " - "deprecated and will be removed in a future version.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - axis = 0 - if kind is not lib.no_default: # GH#55895 warnings.warn( @@ -9788,7 +9761,6 @@ def resample( freq=rule, label=label, closed=closed, - axis=axis, kind=kind, convention=convention, key=on, @@ -12559,33 +12531,10 @@ def rolling( center: bool_t = False, win_type: str | None = None, on: str | None = None, - axis: Axis | lib.NoDefault = lib.no_default, closed: IntervalClosedType | None = None, step: int | None = None, method: str = "single", ) -> Window | Rolling: - if axis is not lib.no_default: - axis = self._get_axis_number(axis) - name = "rolling" - if axis == 1: - warnings.warn( - f"Support for axis=1 in {type(self).__name__}.{name} is " - "deprecated and will be removed in a future version. " - f"Use obj.T.{name}(...) instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - warnings.warn( - f"The 'axis' keyword in {type(self).__name__}.{name} is " - "deprecated and will be removed in a future version. " - "Call the method without the axis keyword instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - axis = 0 - if win_type is not None: return Window( self, @@ -12594,7 +12543,6 @@ def rolling( center=center, win_type=win_type, on=on, - axis=axis, closed=closed, step=step, method=method, @@ -12607,7 +12555,6 @@ def rolling( center=center, win_type=win_type, on=on, - axis=axis, closed=closed, step=step, method=method, @@ -12618,31 +12565,9 @@ def rolling( def expanding( self, min_periods: int = 1, - axis: Axis | lib.NoDefault = lib.no_default, method: Literal["single", "table"] = "single", ) -> Expanding: - if axis is not lib.no_default: - axis = self._get_axis_number(axis) - name = "expanding" - if axis == 1: - warnings.warn( - f"Support for axis=1 in {type(self).__name__}.{name} is " - "deprecated and will be removed in a future version. " - f"Use obj.T.{name}(...) instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - warnings.warn( - f"The 'axis' keyword in {type(self).__name__}.{name} is " - "deprecated and will be removed in a future version. " - "Call the method without the axis keyword instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - axis = 0 - return Expanding(self, min_periods=min_periods, axis=axis, method=method) + return Expanding(self, min_periods=min_periods, method=method) @final @doc(ExponentialMovingWindow) @@ -12655,32 +12580,9 @@ def ewm( min_periods: int | None = 0, adjust: bool_t = True, ignore_na: bool_t = False, - axis: Axis | lib.NoDefault = lib.no_default, times: np.ndarray | DataFrame | Series | None = None, method: Literal["single", "table"] = "single", ) -> ExponentialMovingWindow: - if axis is not lib.no_default: - axis = self._get_axis_number(axis) - name = "ewm" - if axis == 1: - warnings.warn( - f"Support for axis=1 in {type(self).__name__}.{name} is " - "deprecated and will be removed in a future version. " - f"Use obj.T.{name}(...) instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - warnings.warn( - f"The 'axis' keyword in {type(self).__name__}.{name} is " - "deprecated and will be removed in a future version. " - "Call the method without the axis keyword instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - axis = 0 - return ExponentialMovingWindow( self, com=com, @@ -12690,7 +12592,6 @@ def ewm( min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, - axis=axis, times=times, method=method, ) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 351b4bff0162e..c59c0248b8702 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -95,7 +95,6 @@ from pandas._typing import ( ArrayLike, - AxisInt, CorrelationMethod, IndexLabel, Manager, @@ -446,9 +445,7 @@ def _aggregate_named(self, func, *args, **kwargs): result = {} initialized = False - for name, group in self._grouper.get_iterator( - self._obj_with_exclusions, axis=self.axis - ): + for name, group in self._grouper.get_iterator(self._obj_with_exclusions): # needed for pandas/tests/groupby/test_groupby.py::test_basic_aggregations object.__setattr__(group, "name", name) @@ -512,16 +509,12 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs ) - def _cython_transform( - self, how: str, numeric_only: bool = False, axis: AxisInt = 0, **kwargs - ): - assert axis == 0 # handled by caller - + def _cython_transform(self, how: str, numeric_only: bool = False, **kwargs): obj = self._obj_with_exclusions try: result = self._grouper._cython_operation( - "transform", obj._values, how, axis, **kwargs + "transform", obj._values, how, 0, **kwargs ) except NotImplementedError as err: # e.g. test_groupby_raises_string @@ -544,7 +537,7 @@ def _transform_general( results = [] for name, group in self._grouper.get_iterator( - self._obj_with_exclusions, axis=self.axis + self._obj_with_exclusions, ): # this setattr is needed for test_transform_lambda_with_datetimetz object.__setattr__(group, "name", name) @@ -615,9 +608,7 @@ def true_and_notna(x) -> bool: try: indices = [ self._get_index(name) - for name, group in self._grouper.get_iterator( - self._obj_with_exclusions, axis=self.axis - ) + for name, group in self._grouper.get_iterator(self._obj_with_exclusions) if true_and_notna(group) ] except (ValueError, TypeError) as err: @@ -928,7 +919,7 @@ def take( 0 rabbit mammal 15.0 >>> gb = df["name"].groupby([1, 1, 2, 2, 2]) - Take elements at positions 0 and 1 along the axis 0 in each group (default). + Take elements at rows 0 and 1 in each group. >>> gb.take([0, 1]) 1 4 falcon @@ -947,7 +938,7 @@ def take( 1 monkey Name: name, dtype: object """ - result = self._op_via_apply("take", indices=indices, axis=0, **kwargs) + result = self._op_via_apply("take", indices=indices, **kwargs) return result def skew( @@ -1334,12 +1325,6 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) # can't return early result = self._aggregate_frame(func, *args, **kwargs) - elif self.axis == 1: - # _aggregate_multiple_funcs does not allow self.axis == 1 - # Note: axis == 1 precludes 'not self.as_index', see __init__ - result = self._aggregate_frame(func) - return result - else: # try to treat as if we are passing a list gba = GroupByApply(self, [func], args=(), kwargs={}) @@ -1385,8 +1370,6 @@ def _python_agg_general(self, func, *args, **kwargs): return self._python_apply_general(f, self._selected_obj, is_agg=True) obj = self._obj_with_exclusions - if self.axis == 1: - obj = obj.T if not len(obj.columns): # e.g. test_margins_no_values_no_cols @@ -1408,15 +1391,13 @@ def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame: obj = self._obj_with_exclusions result: dict[Hashable, NDFrame | np.ndarray] = {} - for name, grp_df in self._grouper.get_iterator(obj, self.axis): + for name, grp_df in self._grouper.get_iterator(obj): fres = func(grp_df, *args, **kwargs) result[name] = fres result_index = self._grouper.result_index - other_ax = obj.axes[1 - self.axis] - out = self.obj._constructor(result, index=other_ax, columns=result_index) - if self.axis == 0: - out = out.T + out = self.obj._constructor(result, index=obj.columns, columns=result_index) + out = out.T return out @@ -1516,18 +1497,13 @@ def _wrap_applied_output_series( # vstack+constructor is faster than concat and handles MI-columns stacked_values = np.vstack([np.asarray(v) for v in values]) - if self.axis == 0: - index = key_index - columns = first_not_none.index.copy() - if columns.name is None: - # GH6124 - propagate name of Series when it's consistent - names = {v.name for v in values} - if len(names) == 1: - columns.name = next(iter(names)) - else: - index = first_not_none.index - columns = key_index - stacked_values = stacked_values.T + index = key_index + columns = first_not_none.index.copy() + if columns.name is None: + # GH6124 - propagate name of Series when it's consistent + names = {v.name for v in values} + if len(names) == 1: + columns.name = next(iter(names)) if stacked_values.dtype == object: # We'll have the DataFrame constructor do inference @@ -1543,16 +1519,11 @@ def _cython_transform( self, how: str, numeric_only: bool = False, - axis: AxisInt = 0, **kwargs, ) -> DataFrame: - assert axis == 0 # handled by caller - - # With self.axis == 0, we have multi-block tests + # We have multi-block tests # e.g. test_rank_min_int, test_cython_transform_frame # test_transform_numeric_ret - # With self.axis == 1, _get_data_to_aggregate does a transpose - # so we always have a single block. mgr: Manager2D = self._get_data_to_aggregate( numeric_only=numeric_only, name=how ) @@ -1568,7 +1539,6 @@ def arr_func(bvalues: ArrayLike) -> ArrayLike: res_mgr.set_axis(1, mgr.axes[1]) res_df = self.obj._constructor_from_mgr(res_mgr, axes=res_mgr.axes) - res_df = self._maybe_transpose_result(res_df) return res_df def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs): @@ -1580,7 +1550,7 @@ def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs): applied = [] obj = self._obj_with_exclusions - gen = self._grouper.get_iterator(obj, axis=self.axis) + gen = self._grouper.get_iterator(obj) fast_path, slow_path = self._define_paths(func, *args, **kwargs) # Determine whether to use slow or fast path by evaluating on the first group. @@ -1614,10 +1584,9 @@ def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs): res = _wrap_transform_general_frame(self.obj, group, res) applied.append(res) - concat_index = obj.columns if self.axis == 0 else obj.index - other_axis = 1 if self.axis == 0 else 0 # switches between 0 & 1 - concatenated = concat(applied, axis=self.axis, verify_integrity=False) - concatenated = concatenated.reindex(concat_index, axis=other_axis, copy=False) + concat_index = obj.columns + concatenated = concat(applied, axis=0, verify_integrity=False) + concatenated = concatenated.reindex(concat_index, axis=1, copy=False) return self._set_result_index_ordered(concatenated) __examples_dataframe_doc = dedent( @@ -1685,12 +1654,12 @@ def _define_paths(self, func, *args, **kwargs): if isinstance(func, str): fast_path = lambda group: getattr(group, func)(*args, **kwargs) slow_path = lambda group: group.apply( - lambda x: getattr(x, func)(*args, **kwargs), axis=self.axis + lambda x: getattr(x, func)(*args, **kwargs), axis=0 ) else: fast_path = lambda group: func(group, *args, **kwargs) slow_path = lambda group: group.apply( - lambda x: func(x, *args, **kwargs), axis=self.axis + lambda x: func(x, *args, **kwargs), axis=0 ) return fast_path, slow_path @@ -1774,7 +1743,7 @@ def filter(self, func, dropna: bool = True, *args, **kwargs): indices = [] obj = self._selected_obj - gen = self._grouper.get_iterator(obj, axis=self.axis) + gen = self._grouper.get_iterator(obj) for name, group in gen: # 2023-02-27 no tests are broken this pinning, but it is documented in the @@ -1802,9 +1771,6 @@ def filter(self, func, dropna: bool = True, *args, **kwargs): return self._apply_filter(indices, dropna) def __getitem__(self, key) -> DataFrameGroupBy | SeriesGroupBy: - if self.axis == 1: - # GH 37725 - raise ValueError("Cannot subset columns when using axis=1") # per GH 23566 if isinstance(key, tuple) and len(key) > 1: # if len == 1, then it becomes a SeriesGroupBy and this is actually @@ -1834,7 +1800,6 @@ def _gotitem(self, key, ndim: int, subset=None): return DataFrameGroupBy( subset, self.keys, - axis=self.axis, level=self.level, grouper=self._grouper, exclusions=self.exclusions, @@ -1868,11 +1833,7 @@ def _get_data_to_aggregate( self, *, numeric_only: bool = False, name: str | None = None ) -> Manager2D: obj = self._obj_with_exclusions - if self.axis == 1: - mgr = obj.T._mgr - else: - mgr = obj._mgr - + mgr = obj._mgr if numeric_only: mgr = mgr.get_numeric_data() return mgr @@ -1952,13 +1913,6 @@ def nunique(self, dropna: bool = True) -> DataFrame: 4 ham 5 x 5 ham 5 y """ - - if self.axis != 0: - # see test_groupby_crash_on_nunique - return self._python_apply_general( - lambda sgb: sgb.nunique(dropna), self._obj_with_exclusions, is_agg=True - ) - return self._apply_to_column_groupbys(lambda sgb: sgb.nunique(dropna)) def idxmax( @@ -2253,7 +2207,7 @@ def take( 0 rabbit mammal 15.0 >>> gb = df.groupby([1, 1, 2, 2, 2]) - Take elements at positions 0 and 1 along the axis 0 (default). + Take elements at rows 0 and 1. Note how the indices selected in the result do not correspond to our input indices 0 and 1. That's because we are selecting the 0th @@ -2286,7 +2240,7 @@ def take( 2 0 rabbit mammal 15.0 1 monkey mammal NaN """ - result = self._op_via_apply("take", indices=indices, axis=0, **kwargs) + result = self._op_via_apply("take", indices=indices, **kwargs) return result def skew( diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 1f0e0567446c6..8e6f7a166375c 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -19,7 +19,6 @@ class providing the base-class of operations. partial, wraps, ) -import inspect from textwrap import dedent from typing import ( TYPE_CHECKING, @@ -47,8 +46,6 @@ class providing the base-class of operations. from pandas._typing import ( AnyArrayLike, ArrayLike, - Axis, - AxisInt, DtypeObj, FillnaOptions, IndexLabel, @@ -533,8 +530,7 @@ class providing the base-class of operations. -------- %(klass)s.groupby.apply : Apply function ``func`` group-wise and combine the results together. -%(klass)s.groupby.aggregate : Aggregate using one or more - operations over the specified axis. +%(klass)s.groupby.aggregate : Aggregate using one or more operations. %(klass)s.transform : Call ``func`` on self producing a %(klass)s with the same axis shape as self. @@ -576,7 +572,7 @@ class providing the base-class of operations. %(example)s""" _agg_template_series = """ -Aggregate using one or more operations over the specified axis. +Aggregate using one or more operations. Parameters ---------- @@ -638,8 +634,7 @@ class providing the base-class of operations. and combine the results together. {klass}.groupby.transform : Transforms the Series on each group based on the given function. -{klass}.aggregate : Aggregate using one or more - operations over the specified axis. +{klass}.aggregate : Aggregate using one or more operations. Notes ----- @@ -658,7 +653,7 @@ class providing the base-class of operations. {examples}""" _agg_template_frame = """ -Aggregate using one or more operations over the specified axis. +Aggregate using one or more operations. Parameters ---------- @@ -671,7 +666,7 @@ class providing the base-class of operations. - function - string function name - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` - - dict of axis labels -> functions, function names or list of such. + - dict of index labels -> functions, function names or list of such. - None, in which case ``**kwargs`` are used with Named Aggregation. Here the output has one column for each element in ``**kwargs``. The name of the column is keyword, whereas the value determines the aggregation used to compute @@ -717,8 +712,7 @@ class providing the base-class of operations. and combine the results together. {klass}.groupby.transform : Transforms the Series on each group based on the given function. -{klass}.aggregate : Aggregate using one or more - operations over the specified axis. +{klass}.aggregate : Aggregate using one or more operations. Notes ----- @@ -775,7 +769,6 @@ def f(self): class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin): _hidden_attrs = PandasObject._hidden_attrs | { "as_index", - "axis", "dropna", "exclusions", "grouper", @@ -787,7 +780,6 @@ class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin): "sort", } - axis: AxisInt _grouper: ops.BaseGrouper keys: _KeysArgType | None = None level: IndexLabel | None = None @@ -1210,7 +1202,7 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: """ keys = self.keys level = self.level - result = self._grouper.get_iterator(self._selected_obj, axis=self.axis) + result = self._grouper.get_iterator(self._selected_obj) # error: Argument 1 to "len" has incompatible type "Hashable"; expected "Sized" if is_list_like(level) and len(level) == 1: # type: ignore[arg-type] # GH 51583 @@ -1247,7 +1239,6 @@ class GroupBy(BaseGroupBy[NDFrameT]): Parameters ---------- obj : pandas object - axis : int, default 0 level : int, default None Level of MultiIndex groupings : list of Grouping objects @@ -1276,7 +1267,7 @@ class GroupBy(BaseGroupBy[NDFrameT]): :: - grouped = obj.groupby(keys, axis=axis) + grouped = obj.groupby(keys) for key, group in grouped: # do something with the data @@ -1308,7 +1299,6 @@ def __init__( self, obj: NDFrameT, keys: _KeysArgType | None = None, - axis: Axis = 0, level: IndexLabel | None = None, grouper: ops.BaseGrouper | None = None, exclusions: frozenset[Hashable] | None = None, @@ -1324,11 +1314,6 @@ def __init__( assert isinstance(obj, NDFrame), type(obj) self.level = level - - if not as_index: - if axis != 0: - raise ValueError("as_index=False only valid for axis=0") - self.as_index = as_index self.keys = keys self.sort = sort @@ -1339,7 +1324,6 @@ def __init__( grouper, exclusions, obj = get_grouper( obj, keys, - axis=axis, level=level, sort=sort, observed=False if observed is lib.no_default else observed, @@ -1360,7 +1344,6 @@ def __init__( self.observed = observed self.obj = obj - self.axis = obj._get_axis_number(axis) self._grouper = grouper self.exclusions = frozenset(exclusions) if exclusions else frozenset() @@ -1374,35 +1357,10 @@ def __getattr__(self, attr: str): f"'{type(self).__name__}' object has no attribute '{attr}'" ) - @final - def _deprecate_axis(self, axis: int, name: str) -> None: - if axis == 1: - warnings.warn( - f"{type(self).__name__}.{name} with axis=1 is deprecated and " - "will be removed in a future version. Operate on the un-grouped " - "DataFrame instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - warnings.warn( - f"The 'axis' keyword in {type(self).__name__}.{name} is deprecated " - "and will be removed in a future version. " - "Call without passing 'axis' instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - @final def _op_via_apply(self, name: str, *args, **kwargs): """Compute the result of an operation by using GroupBy's apply.""" f = getattr(type(self._obj_with_exclusions), name) - sig = inspect.signature(f) - - # a little trickery for aggregation functions that need an axis - # argument - if "axis" in sig.parameters: - kwargs["axis"] = self.axis def curried(x): return f(x, *args, **kwargs) @@ -1451,7 +1409,7 @@ def _concat_objects( result = concat( values, - axis=self.axis, + axis=0, keys=group_keys, levels=group_levels, names=group_names, @@ -1461,12 +1419,12 @@ def _concat_objects( # GH5610, returns a MI, with the first level being a # range index keys = list(range(len(values))) - result = concat(values, axis=self.axis, keys=keys) + result = concat(values, axis=0, keys=keys) elif not not_indexed_same: - result = concat(values, axis=self.axis) + result = concat(values, axis=0) - ax = self._selected_obj._get_axis(self.axis) + ax = self._selected_obj.index if self.dropna: labels = self._grouper.group_info[0] mask = labels != -1 @@ -1478,16 +1436,16 @@ def _concat_objects( # so we resort to this # GH 14776, 30667 # TODO: can we reuse e.g. _reindex_non_unique? - if ax.has_duplicates and not result.axes[self.axis].equals(ax): + if ax.has_duplicates and not result.axes[0].equals(ax): # e.g. test_category_order_transformer target = algorithms.unique1d(ax._values) indexer, _ = result.index.get_indexer_non_unique(target) - result = result.take(indexer, axis=self.axis) + result = result.take(indexer, axis=0) else: - result = result.reindex(ax, axis=self.axis, copy=False) + result = result.reindex(ax, axis=0, copy=False) else: - result = concat(values, axis=self.axis) + result = concat(values, axis=0) if self.obj.ndim == 1: name = self.obj.name @@ -1508,22 +1466,22 @@ def _set_result_index_ordered( # set the result index on the passed values object and # return the new object, xref 8046 - obj_axis = self.obj._get_axis(self.axis) + index = self.obj.index if self._grouper.is_monotonic and not self._grouper.has_dropped_na: # shortcut if we have an already ordered grouper - result = result.set_axis(obj_axis, axis=self.axis, copy=False) + result = result.set_axis(index, axis=0, copy=False) return result # row order is scrambled => sort the rows by position in original index original_positions = Index(self._grouper.result_ilocs()) - result = result.set_axis(original_positions, axis=self.axis, copy=False) - result = result.sort_index(axis=self.axis) + result = result.set_axis(original_positions, axis=0, copy=False) + result = result.sort_index(axis=0) if self._grouper.has_dropped_na: # Add back in any missing rows due to dropna - index here is integral # with values referring to the row of the input so can use RangeIndex - result = result.reindex(RangeIndex(len(obj_axis)), axis=self.axis) - result = result.set_axis(obj_axis, axis=self.axis, copy=False) + result = result.reindex(RangeIndex(len(index)), axis=0) + result = result.set_axis(index, axis=0, copy=False) return result @@ -1559,17 +1517,6 @@ def _insert_inaxis_grouper(self, result: Series | DataFrame) -> DataFrame: return result - @final - def _maybe_transpose_result(self, result: NDFrameT) -> NDFrameT: - if self.axis == 1: - # Only relevant for DataFrameGroupBy, no-op for SeriesGroupBy - result = result.T - if result.index.equals(self.obj.index): - # Retain e.g. DatetimeIndex/TimedeltaIndex freq - # e.g. test_groupby_crash_on_nunique - result.index = self.obj.index.copy() - return result - @final def _wrap_aggregated_output( self, @@ -1607,10 +1554,7 @@ def _wrap_aggregated_output( result.index = index - # error: Argument 1 to "_maybe_transpose_result" of "GroupBy" has - # incompatible type "Union[Series, DataFrame]"; expected "NDFrameT" - res = self._maybe_transpose_result(result) # type: ignore[arg-type] - return self._reindex_output(res, qs=qs) + return self._reindex_output(result, qs=qs) def _wrap_applied_output( self, @@ -1630,7 +1574,7 @@ def _numba_prep(self, data: DataFrame): sorted_index = self._grouper._sort_idx sorted_ids = self._grouper._sorted_ids - sorted_data = data.take(sorted_index, axis=self.axis).to_numpy() + sorted_data = data.take(sorted_index, axis=0).to_numpy() # GH 46867 index_data = data.index if isinstance(index_data, MultiIndex): @@ -1666,8 +1610,6 @@ def _numba_agg_general( raise NotImplementedError( "as_index=False is not supported. Use .reset_index() instead." ) - if self.axis == 1: - raise NotImplementedError("axis=1 is not supported.") data = self._obj_with_exclusions df = data if data.ndim == 2 else data.to_frame() @@ -1877,7 +1819,7 @@ def _python_apply_general( Series or DataFrame data after applying f """ - values, mutated = self._grouper.apply_groupwise(f, data, self.axis) + values, mutated = self._grouper.apply_groupwise(f, data, axis=0) if not_indexed_same is None: not_indexed_same = mutated @@ -1995,13 +1937,9 @@ def array_func(values: ArrayLike) -> ArrayLike: if how in ["idxmin", "idxmax"]: res = self._wrap_idxmax_idxmin(res) out = self._wrap_aggregated_output(res) - if self.axis == 1: - out = out.infer_objects(copy=False) return out - def _cython_transform( - self, how: str, numeric_only: bool = False, axis: AxisInt = 0, **kwargs - ): + def _cython_transform(self, how: str, numeric_only: bool = False, **kwargs): raise AbstractMethodError(self) @final @@ -2055,7 +1993,7 @@ def _wrap_transform_fast_result(self, result: NDFrameT) -> NDFrameT: # for each col, reshape to size of original frame by take operation ids, _, _ = self._grouper.group_info - result = result.reindex(self._grouper.result_index, axis=self.axis, copy=False) + result = result.reindex(self._grouper.result_index, axis=0, copy=False) if self.obj.ndim == 1: # i.e. SeriesGroupBy @@ -2063,15 +2001,14 @@ def _wrap_transform_fast_result(self, result: NDFrameT) -> NDFrameT: output = obj._constructor(out, index=obj.index, name=obj.name) else: # `.size()` gives Series output on DataFrame input, need axis 0 - axis = 0 if result.ndim == 1 else self.axis # GH#46209 # Don't convert indices: negative indices need to give rise # to null values in the result - new_ax = result.axes[axis].take(ids) + new_ax = result.index.take(ids) output = result._reindex_with_indexers( - {axis: (new_ax, ids)}, allow_dups=True, copy=False + {0: (new_ax, ids)}, allow_dups=True, copy=False ) - output = output.set_axis(obj._get_axis(self.axis), axis=axis) + output = output.set_axis(obj.index, axis=0) return output # ----------------------------------------------------------------- @@ -2084,7 +2021,7 @@ def _apply_filter(self, indices, dropna): else: indices = np.sort(np.concatenate(indices)) if dropna: - filtered = self._selected_obj.take(indices, axis=self.axis) + filtered = self._selected_obj.take(indices, axis=0) else: mask = np.empty(len(self._selected_obj.index), dtype=bool) mask.fill(False) @@ -2762,10 +2699,6 @@ def _value_counts( SeriesGroupBy additionally supports a bins argument. See the docstring of DataFrameGroupBy.value_counts for a description of arguments. """ - if self.axis == 1: - raise NotImplementedError( - "DataFrameGroupBy.value_counts only handles axis=0" - ) name = "proportion" if normalize else "count" df = self.obj @@ -2808,7 +2741,6 @@ def _value_counts( grouper, _, _ = get_grouper( df, key=key, - axis=self.axis, sort=self.sort, observed=False, dropna=dropna, @@ -3386,7 +3318,7 @@ def first( 3 6.0 3 """ - def first_compat(obj: NDFrameT, axis: AxisInt = 0): + def first_compat(obj: NDFrameT): def first(x: Series): """Helper function for first item that isn't NA.""" arr = x.array[notna(x.array)] @@ -3395,7 +3327,7 @@ def first(x: Series): return arr[0] if isinstance(obj, DataFrame): - return obj.apply(first, axis=axis) + return obj.apply(first) elif isinstance(obj, Series): return first(obj) else: # pragma: no cover @@ -3455,7 +3387,7 @@ def last( 3 6.0 3 """ - def last_compat(obj: NDFrameT, axis: AxisInt = 0): + def last_compat(obj: NDFrameT): def last(x: Series): """Helper function for last item that isn't NA.""" arr = x.array[notna(x.array)] @@ -3464,7 +3396,7 @@ def last(x: Series): return arr[-1] if isinstance(obj, DataFrame): - return obj.apply(last, axis=axis) + return obj.apply(last) elif isinstance(obj, Series): return last(obj) else: # pragma: no cover @@ -3595,8 +3527,6 @@ def describe( obj, not_indexed_same=True, ) - if self.axis == 1: - return result.T # GH#49256 - properly handle the grouping column(s) result = result.unstack() @@ -3871,6 +3801,8 @@ def rolling(self, *args, **kwargs) -> RollingGroupby: """ from pandas.core.window import RollingGroupby + assert kwargs.get("axis", 0) == 0 + return RollingGroupby( self._selected_obj, *args, @@ -3992,12 +3924,6 @@ def blk_func(values: ArrayLike) -> ArrayLike: res_mgr = mgr.apply(blk_func) new_obj = self._wrap_agged_manager(res_mgr) - - if self.axis == 1: - # Only relevant for DataFrameGroupBy - new_obj = new_obj.T - new_obj.columns = self.obj.columns - new_obj.index = self.obj.index return new_obj @@ -4299,7 +4225,7 @@ def _nth( # old behaviour, but with all and any support for DataFrames. # modified in GH 7559 to have better perf n = cast(int, n) - dropped = self._selected_obj.dropna(how=dropna, axis=self.axis) + dropped = self._selected_obj.dropna(how=dropna, axis=0) # get a new grouper for our dropped obj grouper: np.ndarray | Index | ops.BaseGrouper @@ -4320,10 +4246,7 @@ def _nth( values = np.where(nulls, NA, grouper) # type: ignore[call-overload] grouper = Index(values, dtype="Int64") - if self.axis == 1: - grb = dropped.T.groupby(grouper, as_index=self.as_index, sort=self.sort) - else: - grb = dropped.groupby(grouper, as_index=self.as_index, sort=self.sort) + grb = dropped.groupby(grouper, as_index=self.as_index, sort=self.sort) return grb.nth(n) @final @@ -4376,12 +4299,8 @@ def quantile( """ mgr = self._get_data_to_aggregate(numeric_only=numeric_only, name="quantile") obj = self._wrap_agged_manager(mgr) - if self.axis == 1: - splitter = self._grouper._get_splitter(obj.T, axis=self.axis) - sdata = splitter._sorted_data.T - else: - splitter = self._grouper._get_splitter(obj, axis=self.axis) - sdata = splitter._sorted_data + splitter = self._grouper._get_splitter(obj, axis=0) + sdata = splitter._sorted_data starts, ends = lib.generate_slices(splitter._slabels, splitter.ngroups) @@ -4619,7 +4538,7 @@ def ngroup(self, ascending: bool = True): dtype: int64 """ obj = self._obj_with_exclusions - index = obj._get_axis(self.axis) + index = obj.index comp_ids = self._grouper.group_info[0] dtype: type @@ -4693,7 +4612,7 @@ def cumcount(self, ascending: bool = True): 5 0 dtype: int64 """ - index = self._obj_with_exclusions._get_axis(self.axis) + index = self._obj_with_exclusions.index cumcounts = self._cumcount_array(ascending=ascending) return self._obj_1d_constructor(cumcounts, index) @@ -4780,7 +4699,6 @@ def rank( return self._cython_transform( "rank", numeric_only=False, - axis=0, **kwargs, ) @@ -5127,7 +5045,7 @@ def shift( obj = self._obj_with_exclusions shifted = obj._reindex_with_indexers( - {self.axis: (obj.axes[self.axis], res_indexer)}, + {0: (obj.index, res_indexer)}, fill_value=fill_value, allow_dups=True, ) @@ -5332,13 +5250,8 @@ def pct_change( fill_method = "ffill" limit = 0 filled = getattr(self, fill_method)(limit=limit) - if self.axis == 0: - fill_grp = filled.groupby(self._grouper.codes, group_keys=self.group_keys) - else: - fill_grp = filled.T.groupby(self._grouper.codes, group_keys=self.group_keys) + fill_grp = filled.groupby(self._grouper.codes, group_keys=self.group_keys) shifted = fill_grp.shift(periods=periods, freq=freq) - if self.axis == 1: - shifted = shifted.T return (filled / shifted) - 1 @final @@ -5425,7 +5338,7 @@ def tail(self, n: int = 5) -> NDFrameT: @final def _mask_selected_obj(self, mask: npt.NDArray[np.bool_]) -> NDFrameT: """ - Return _selected_obj with mask applied to the correct axis. + Return _selected_obj with mask applied. Parameters ---------- @@ -5439,11 +5352,7 @@ def _mask_selected_obj(self, mask: npt.NDArray[np.bool_]) -> NDFrameT: """ ids = self._grouper.group_info[0] mask = mask & (ids != -1) - - if self.axis == 0: - return self._selected_obj[mask] - else: - return self._selected_obj.iloc[:, mask] + return self._selected_obj[mask] @final def _reindex_output( @@ -5506,12 +5415,7 @@ def _reindex_output( if self.as_index: # Always holds for SeriesGroupBy unless GH#36507 is implemented - d = { - self.obj._get_axis_name(self.axis): index, - "copy": False, - "fill_value": fill_value, - } - return output.reindex(**d) # type: ignore[arg-type] + return output.reindex(index=index, copy=False, fill_value=fill_value) # GH 13204 # Here, the categorical in-axis groupers, which need to be fully @@ -5642,13 +5546,11 @@ def sample( return self._selected_obj size = sample.process_sampling_size(n, frac, replace) if weights is not None: - weights_arr = sample.preprocess_weights( - self._selected_obj, weights, axis=self.axis - ) + weights_arr = sample.preprocess_weights(self._selected_obj, weights, axis=0) random_state = com.random_state(random_state) - group_iterator = self._grouper.get_iterator(self._selected_obj, self.axis) + group_iterator = self._grouper.get_iterator(self._selected_obj) sampled_indices = [] for labels, obj in group_iterator: @@ -5670,7 +5572,7 @@ def sample( sampled_indices.append(grp_indices[grp_sample]) sampled_indices = np.concatenate(sampled_indices) - return self._selected_obj.take(sampled_indices, axis=self.axis) + return self._selected_obj.take(sampled_indices, axis=0) def _idxmax_idxmin( self, @@ -5747,7 +5649,7 @@ def _idxmax_idxmin( return result def _wrap_idxmax_idxmin(self, res: NDFrameT) -> NDFrameT: - index = self.obj._get_axis(self.axis) + index = self.obj.index if res.size == 0: result = res.astype(index.dtype) else: @@ -5778,7 +5680,6 @@ def _wrap_idxmax_idxmin(self, res: NDFrameT) -> NDFrameT: def get_groupby( obj: NDFrame, by: _KeysArgType | None = None, - axis: AxisInt = 0, grouper: ops.BaseGrouper | None = None, group_keys: bool = True, ) -> GroupBy: @@ -5797,7 +5698,6 @@ def get_groupby( return klass( obj=obj, keys=by, - axis=axis, grouper=grouper, group_keys=group_keys, ) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index d36bfa62f4be5..83642b89c99a0 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -17,7 +17,6 @@ warn_copy_on_write, ) -from pandas._libs import lib from pandas._libs.tslibs import OutOfBoundsDatetime from pandas.errors import InvalidIndexError from pandas.util._decorators import cache_readonly @@ -55,7 +54,6 @@ from pandas._typing import ( ArrayLike, - Axis, NDFrameT, npt, ) @@ -68,10 +66,10 @@ class Grouper: A Grouper allows the user to specify a groupby instruction for an object. This specification will select a column via the key parameter, or if the - level and/or axis parameters are given, a level of the index of the target + level parameter is given, a level of the index of the target object. - If `axis` and/or `level` are passed as keywords to both `Grouper` and + If ``level`` is passed as a keyword to both `Grouper` and `groupby`, the values passed to `Grouper` take precedence. Parameters @@ -85,8 +83,6 @@ class Grouper: (via key or level) is a datetime-like object. For full specification of available frequencies, please see `here `_. - axis : str, int, defaults to 0 - Number/name of the axis. sort : bool, default to False Whether to sort the resulting labels. closed : {'left' or 'right'} @@ -249,7 +245,7 @@ class Grouper: _gpr_index: Index | None _grouper: Index | None - _attributes: tuple[str, ...] = ("key", "level", "freq", "axis", "sort", "dropna") + _attributes: tuple[str, ...] = ("key", "level", "freq", "sort", "dropna") def __new__(cls, *args, **kwargs): if kwargs.get("freq") is not None: @@ -263,29 +259,12 @@ def __init__( key=None, level=None, freq=None, - axis: Axis | lib.NoDefault = lib.no_default, sort: bool = False, dropna: bool = True, ) -> None: - if type(self) is Grouper: - # i.e. not TimeGrouper - if axis is not lib.no_default: - warnings.warn( - "Grouper axis keyword is deprecated and will be removed in a " - "future version. To group on axis=1, use obj.T.groupby(...) " - "instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - axis = 0 - if axis is lib.no_default: - axis = 0 - self.key = key self.level = level self.freq = freq - self.axis = axis self.sort = sort self.dropna = dropna @@ -315,7 +294,6 @@ def _get_grouper( grouper, _, obj = get_grouper( obj, [self.key], - axis=self.axis, level=self.level, sort=self.sort, validate=validate, @@ -381,7 +359,7 @@ def _set_grouper( ax = Index(obj[key], name=key) else: - ax = obj._get_axis(self.axis) + ax = obj.index if self.level is not None: level = self.level @@ -404,7 +382,7 @@ def _set_grouper( kind="mergesort", na_position="first" ) ax = ax.take(indexer) - obj = obj.take(indexer, axis=self.axis) + obj = obj.take(indexer, axis=0) # error: Incompatible types in assignment (expression has type # "NDFrameT", variable has type "None") @@ -846,7 +824,6 @@ def groups(self) -> dict[Hashable, np.ndarray]: def get_grouper( obj: NDFrameT, key=None, - axis: Axis = 0, level=None, sort: bool = True, observed: bool = False, @@ -862,8 +839,8 @@ def get_grouper( Groupers are ultimately index mappings. They can originate as: index mappings, keys to columns, functions, or Groupers - Groupers enable local references to axis,level,sort, while - the passed in axis, level, and sort are 'global'. + Groupers enable local references to level,sort, while + the passed in level, and sort are 'global'. This routine tries to figure out what the passing in references are and then creates a Grouping for each one, combined into @@ -875,10 +852,10 @@ def get_grouper( If validate, then check for key/level overlaps. """ - group_axis = obj._get_axis(axis) + group_axis = obj.index # validate that the passed single level is compatible with the passed - # axis of the object + # index of the object if level is not None: # TODO: These if-block and else-block are almost same. # MultiIndex instance check is removable, but it seems that there are @@ -911,11 +888,8 @@ def get_grouper( raise ValueError("multiple levels only valid with MultiIndex") if isinstance(level, str): - if obj._get_axis(axis).name != level: - raise ValueError( - f"level name {level} is not the name " - f"of the {obj._get_axis_name(axis)}" - ) + if obj.index.name != level: + raise ValueError(f"level name {level} is not the name of the index") elif level > 0 or level < -1: raise ValueError("level > 0 or level < -1 only valid with MultiIndex") @@ -1031,14 +1005,14 @@ def is_in_obj(gpr) -> bool: elif is_in_axis(gpr): # df.groupby('name') if obj.ndim != 1 and gpr in obj: if validate: - obj._check_label_or_level_ambiguity(gpr, axis=axis) + obj._check_label_or_level_ambiguity(gpr, axis=0) in_axis, name, gpr = True, gpr, obj[gpr] if gpr.ndim != 1: # non-unique columns; raise here to get the name in the # exception message raise ValueError(f"Grouper for '{name}' not 1-dimensional") exclusions.add(name) - elif obj._is_level_reference(gpr, axis=axis): + elif obj._is_level_reference(gpr, axis=0): in_axis, level, gpr = False, gpr, None else: raise KeyError(gpr) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index e2ddf9aa5c0c1..0693fb5104b78 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -604,9 +604,7 @@ def __iter__(self) -> Iterator[Hashable]: def nkeys(self) -> int: return len(self.groupings) - def get_iterator( - self, data: NDFrameT, axis: AxisInt = 0 - ) -> Iterator[tuple[Hashable, NDFrameT]]: + def get_iterator(self, data: NDFrameT) -> Iterator[tuple[Hashable, NDFrameT]]: """ Groupby iterator @@ -615,7 +613,7 @@ def get_iterator( Generator yielding sequence of (name, subsetted object) for each group """ - splitter = self._get_splitter(data, axis=axis) + splitter = self._get_splitter(data) keys = self.group_keys_seq yield from zip(keys, splitter) @@ -626,6 +624,7 @@ def _get_splitter(self, data: NDFrame, axis: AxisInt = 0) -> DataSplitter: ------- Generator yielding subsetted objects """ + assert axis == 0 ids, _, ngroups = self.group_info return _get_splitter( data, @@ -898,6 +897,7 @@ def _aggregate_series_pure_python( def apply_groupwise( self, f: Callable, data: DataFrame | Series, axis: AxisInt = 0 ) -> tuple[list, bool]: + assert axis == 0 mutated = False splitter = self._get_splitter(data, axis=axis) group_keys = self.group_keys_seq @@ -1033,6 +1033,7 @@ def get_iterator(self, data: NDFrame, axis: AxisInt = 0): Generator yielding sequence of (name, subsetted object) for each group """ + assert axis == 0 if axis == 0: slicer = lambda start, edge: data.iloc[start:edge] else: @@ -1111,6 +1112,7 @@ def groupings(self) -> list[grouper.Grouping]: def _is_indexed_like(obj, axes, axis: AxisInt) -> bool: + assert axis == 0 if isinstance(obj, Series): if len(axes) > 1: return False @@ -1136,6 +1138,7 @@ def __init__( sorted_ids: npt.NDArray[np.intp], axis: AxisInt = 0, ) -> None: + assert axis == 0 self.data = data self.labels = ensure_platform_int(labels) # _should_ already be np.intp self.ngroups = ngroups @@ -1183,6 +1186,7 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: # return sdata.iloc[slice_obj] # else: # return sdata.iloc[:, slice_obj] + assert self.axis == 0 mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis) df = sdata._constructor_from_mgr(mgr, axes=mgr.axes) return df.__finalize__(sdata, method="groupby") @@ -1197,6 +1201,7 @@ def _get_splitter( sorted_ids: npt.NDArray[np.intp], axis: AxisInt = 0, ) -> DataSplitter: + assert axis == 0 if isinstance(data, Series): klass: type[DataSplitter] = SeriesSplitter else: diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 4d6507d89ec90..bf5b7e5906180 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -101,7 +101,6 @@ Any, AnyArrayLike, Axis, - AxisInt, Concatenate, Frequency, IndexLabel, @@ -134,7 +133,6 @@ class Resampler(BaseGroupBy, PandasObject): ---------- obj : Series or DataFrame groupby : TimeGrouper - axis : int, default 0 kind : str or None 'period', 'timestamp' to override default index treatment @@ -156,7 +154,6 @@ class Resampler(BaseGroupBy, PandasObject): # to the groupby descriptor _attributes = [ "freq", - "axis", "closed", "label", "convention", @@ -169,7 +166,6 @@ def __init__( self, obj: NDFrame, timegrouper: TimeGrouper, - axis: Axis = 0, kind=None, *, gpr_index: Index, @@ -180,7 +176,6 @@ def __init__( self._timegrouper = timegrouper self.keys = None self.sort = True - self.axis = obj._get_axis_number(axis) self.kind = kind self.group_keys = group_keys self.as_index = True @@ -449,7 +444,7 @@ def _gotitem(self, key, ndim: int, subset=None): assert subset.ndim == 1 grouped = get_groupby( - subset, by=None, grouper=grouper, axis=self.axis, group_keys=self.group_keys + subset, by=None, grouper=grouper, group_keys=self.group_keys ) return grouped @@ -462,9 +457,7 @@ def _groupby_and_aggregate(self, how, *args, **kwargs): # Excludes `on` column when provided obj = self._obj_with_exclusions - grouped = get_groupby( - obj, by=None, grouper=grouper, axis=self.axis, group_keys=self.group_keys - ) + grouped = get_groupby(obj, by=None, grouper=grouper, group_keys=self.group_keys) try: if callable(how): @@ -1801,12 +1794,7 @@ def _downsample(self, how, **kwargs): # we are downsampling # we want to call the actual grouper method here - if self.axis == 0: - result = obj.groupby(self._grouper).aggregate(how, **kwargs) - else: - # test_resample_axis1 - result = obj.T.groupby(self._grouper).aggregate(how, **kwargs).T - + result = obj.groupby(self._grouper).aggregate(how, **kwargs) return self._wrap_result(result) def _adjust_binner_for_upsample(self, binner): @@ -1837,8 +1825,6 @@ def _upsample(self, method, limit: int | None = None, fill_value=None): .fillna: Fill NA/NaN values using the specified method. """ - if self.axis: - raise AssertionError("axis must be 0") if self._from_selection: raise ValueError( "Upsampling from level= or on= selection " @@ -2010,7 +1996,6 @@ def _upsample(self, method, limit: int | None = None, fill_value=None): obj, indexer, new_index, - axis=self.axis, ) return self._wrap_result(new_obj) @@ -2131,7 +2116,6 @@ def __init__( closed: Literal["left", "right"] | None = None, label: Literal["left", "right"] | None = None, how: str = "mean", - axis: Axis = 0, fill_method=None, limit: int | None = None, kind: str | None = None, @@ -2228,7 +2212,7 @@ def __init__( # always sort time groupers kwargs["sort"] = True - super().__init__(freq=freq, key=key, axis=axis, **kwargs) + super().__init__(freq=freq, key=key, **kwargs) def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler: """ @@ -2255,7 +2239,6 @@ def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler: obj, timegrouper=self, kind=kind, - axis=self.axis, group_keys=self.group_keys, gpr_index=ax, ) @@ -2279,7 +2262,6 @@ def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler: obj, timegrouper=self, kind=kind, - axis=self.axis, group_keys=self.group_keys, gpr_index=ax, ) @@ -2287,7 +2269,6 @@ def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler: return TimedeltaIndexResampler( obj, timegrouper=self, - axis=self.axis, group_keys=self.group_keys, gpr_index=ax, ) @@ -2559,7 +2540,9 @@ def _set_grouper( def _take_new_index( - obj: NDFrameT, indexer: npt.NDArray[np.intp], new_index: Index, axis: AxisInt = 0 + obj: NDFrameT, + indexer: npt.NDArray[np.intp], + new_index: Index, ) -> NDFrameT: if isinstance(obj, ABCSeries): new_values = algos.take_nd(obj._values, indexer) @@ -2568,8 +2551,6 @@ def _take_new_index( new_values, index=new_index, name=obj.name ) elif isinstance(obj, ABCDataFrame): - if axis == 1: - raise NotImplementedError("axis 1 is not supported") new_mgr = obj._mgr.reindex_indexer(new_axis=new_index, indexer=indexer, axis=1) return obj._constructor_from_mgr(new_mgr, axes=new_mgr.axes) else: diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 22304bbdd1575..e53eea6f7f075 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -264,7 +264,7 @@ def _groupby_and_merge( if all(item in right.columns for item in by): rby = right.groupby(by, sort=False) - for key, lhs in lby._grouper.get_iterator(lby._selected_obj, axis=lby.axis): + for key, lhs in lby._grouper.get_iterator(lby._selected_obj): if rby is None: rhs = right else: diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index ff973f6defc09..db28bfb1e9200 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -472,7 +472,7 @@ def _all_key(): margin_keys.append(all_key) else: - margin = data.groupby(level=0, axis=0, observed=observed).apply(aggfunc) + margin = data.groupby(level=0, observed=observed).apply(aggfunc) all_key = _all_key() table[all_key] = margin result = table diff --git a/pandas/core/series.py b/pandas/core/series.py index 19e54954bb41c..01196b7efd498 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2222,7 +2222,6 @@ def _set_name( def groupby( self, by=None, - axis: Axis = 0, level: IndexLabel | None = None, as_index: bool = True, sort: bool = True, @@ -2236,12 +2235,10 @@ def groupby( raise TypeError("You have to supply one of 'by' and 'level'") if not as_index: raise TypeError("as_index=False only valid with DataFrame") - axis = self._get_axis_number(axis) return SeriesGroupBy( obj=self, keys=by, - axis=axis, level=level, as_index=as_index, sort=sort, diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 49b380e0af01e..9d693f034d911 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -102,15 +102,6 @@ the values are used as-is to determine the groups. A label or list of labels may be passed to group by the columns in ``self``. Notice that a tuple is interpreted as a (single) key. -axis : {0 or 'index', 1 or 'columns'}, default 0 - Split along rows (0) or columns (1). For `Series` this parameter - is unused and defaults to 0. - - .. deprecated:: 2.1.0 - - Will be removed and behave like axis=0 in a future version. - For ``axis=1``, do ``frame.T.groupby(...)`` instead. - level : int, level name, or sequence of such, default None If the axis is a MultiIndex (hierarchical), group by a particular level or levels. Do not specify both ``by`` and ``level``. diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 9ebf32d3e536e..3c07fc156aea1 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -56,7 +56,6 @@ if TYPE_CHECKING: from pandas._typing import ( - Axis, TimedeltaConvertibleTypes, npt, ) @@ -204,13 +203,6 @@ class ExponentialMovingWindow(BaseWindow): [:math:`x_0`, None, :math:`x_2`] are :math:`1-\alpha` and :math:`1` if ``adjust=True``, and :math:`1-\alpha` and :math:`\alpha` if ``adjust=False``. - axis : {0, 1}, default 0 - If ``0`` or ``'index'``, calculate across the rows. - - If ``1`` or ``'columns'``, calculate across the columns. - - For `Series` this parameter is unused and defaults to 0. - times : np.ndarray, Series, default None Only applicable to ``mean()``. @@ -328,7 +320,6 @@ class ExponentialMovingWindow(BaseWindow): "min_periods", "adjust", "ignore_na", - "axis", "times", "method", ] @@ -343,7 +334,6 @@ def __init__( min_periods: int | None = 0, adjust: bool = True, ignore_na: bool = False, - axis: Axis = 0, times: np.ndarray | NDFrame | None = None, method: str = "single", *, @@ -356,7 +346,6 @@ def __init__( center=False, closed=None, method=method, - axis=axis, selection=selection, ) self.com = com @@ -397,9 +386,7 @@ def __init__( "times is not None." ) # Without times, points are equally spaced - self._deltas = np.ones( - max(self.obj.shape[self.axis] - 1, 0), dtype=np.float64 - ) + self._deltas = np.ones(max(self.obj.shape[0] - 1, 0), dtype=np.float64) self._com = get_center_of_mass( # error: Argument 3 to "get_center_of_mass" has incompatible type # "Union[float, Any, None, timedelta64, signedinteger[_64Bit]]"; @@ -460,7 +447,6 @@ def online( min_periods=self.min_periods, adjust=self.adjust, ignore_na=self.ignore_na, - axis=self.axis, times=self.times, engine=engine, engine_kwargs=engine_kwargs, @@ -941,7 +927,6 @@ def __init__( min_periods: int | None = 0, adjust: bool = True, ignore_na: bool = False, - axis: Axis = 0, times: np.ndarray | NDFrame | None = None, engine: str = "numba", engine_kwargs: dict[str, bool] | None = None, @@ -961,13 +946,10 @@ def __init__( min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, - axis=axis, times=times, selection=selection, ) - self._mean = EWMMeanState( - self._com, self.adjust, self.ignore_na, self.axis, obj.shape - ) + self._mean = EWMMeanState(self._com, self.adjust, self.ignore_na, obj.shape) if maybe_use_numba(engine): self.engine = engine self.engine_kwargs = engine_kwargs @@ -1055,7 +1037,7 @@ def mean(self, *args, update=None, update_times=None, **kwargs): if update_times is not None: raise NotImplementedError("update_times is not implemented.") update_deltas = np.ones( - max(self._selected_obj.shape[self.axis - 1] - 1, 0), dtype=np.float64 + max(self._selected_obj.shape[-1] - 1, 0), dtype=np.float64 ) if update is not None: if self._mean.last_ewm is None: diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index aac10596ffc69..1bf26c482337c 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -36,7 +36,6 @@ if TYPE_CHECKING: from pandas._typing import ( - Axis, QuantileInterpolation, WindowingRankType, ) @@ -58,13 +57,6 @@ class Expanding(RollingAndExpandingMixin): Minimum number of observations in window required to have a value; otherwise, result is ``np.nan``. - axis : int or str, default 0 - If ``0`` or ``'index'``, roll across the rows. - - If ``1`` or ``'columns'``, roll across the columns. - - For `Series` this parameter is unused and defaults to 0. - method : str {'single', 'table'}, default 'single' Execute the rolling operation per single column or row (``'single'``) or over the entire object (``'table'``). @@ -119,20 +111,18 @@ class Expanding(RollingAndExpandingMixin): 4 7.0 """ - _attributes: list[str] = ["min_periods", "axis", "method"] + _attributes: list[str] = ["min_periods", "method"] def __init__( self, obj: NDFrame, min_periods: int = 1, - axis: Axis = 0, method: str = "single", selection=None, ) -> None: super().__init__( obj=obj, min_periods=min_periods, - axis=axis, method=method, selection=selection, ) diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py index 9357945e78c63..eb06479fc325e 100644 --- a/pandas/core/window/numba_.py +++ b/pandas/core/window/numba_.py @@ -186,8 +186,7 @@ def generate_numba_table_func( Generate a numba jitted function to apply window calculations table-wise. Func will be passed a M window size x N number of columns array, and - must return a 1 x N number of columns array. Func is intended to operate - row-wise, but the result will be transposed for axis=1. + must return a 1 x N number of columns array. 1. jit the user's function 2. Return a rolling apply function with the jitted function inline diff --git a/pandas/core/window/online.py b/pandas/core/window/online.py index 29d1f740e021f..72236bf5ccea2 100644 --- a/pandas/core/window/online.py +++ b/pandas/core/window/online.py @@ -87,15 +87,14 @@ def online_ewma( class EWMMeanState: - def __init__(self, com, adjust, ignore_na, axis, shape) -> None: + def __init__(self, com, adjust, ignore_na, shape) -> None: alpha = 1.0 / (1.0 + com) - self.axis = axis self.shape = shape self.adjust = adjust self.ignore_na = ignore_na self.new_wt = 1.0 if adjust else alpha self.old_wt_factor = 1.0 - alpha - self.old_wt = np.ones(self.shape[self.axis - 1]) + self.old_wt = np.ones(self.shape[-1]) self.last_ewm = None def run_ewm(self, weighted_avg, deltas, min_periods, ewm_func): @@ -114,5 +113,5 @@ def run_ewm(self, weighted_avg, deltas, min_periods, ewm_func): return result def reset(self) -> None: - self.old_wt = np.ones(self.shape[self.axis - 1]) + self.old_wt = np.ones(self.shape[-1]) self.last_ewm = None diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 67885fcaec852..dddd0a7ac48c3 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -100,7 +100,6 @@ from pandas._typing import ( ArrayLike, - Axis, NDFrameT, QuantileInterpolation, WindowingRankType, @@ -131,7 +130,6 @@ def __init__( min_periods: int | None = None, center: bool | None = False, win_type: str | None = None, - axis: Axis = 0, on: str | Index | None = None, closed: str | None = None, step: int | None = None, @@ -147,15 +145,10 @@ def __init__( self.min_periods = min_periods self.center = center self.win_type = win_type - self.axis = obj._get_axis_number(axis) if axis is not None else None self.method = method self._win_freq_i8: int | None = None if self.on is None: - if self.axis == 0: - self._on = self.obj.index - else: - # i.e. self.axis == 1 - self._on = self.obj.columns + self._on = self.obj.index elif isinstance(self.on, Index): self._on = self.on elif isinstance(self.obj, ABCDataFrame) and self.on in self.obj.columns: @@ -278,14 +271,8 @@ def _create_data(self, obj: NDFrameT, numeric_only: bool = False) -> NDFrameT: # filter out the on from the object if self.on is not None and not isinstance(self.on, Index) and obj.ndim == 2: obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False) - if obj.ndim > 1 and (numeric_only or self.axis == 1): - # GH: 20649 in case of mixed dtype and axis=1 we have to convert everything - # to float to calculate the complete row at once. We exclude all non-numeric - # dtypes. + if obj.ndim > 1 and numeric_only: obj = self._make_numeric_only(obj) - if self.axis == 1: - obj = obj.astype("float64", copy=False) - obj._mgr = obj._mgr.consolidate() return obj def _gotitem(self, key, ndim, subset=None): @@ -477,9 +464,6 @@ def _apply_columnwise( obj = notna(obj).astype(int) obj._mgr = obj._mgr.consolidate() - if self.axis == 1: - obj = obj.T - taker = [] res_values = [] for i, arr in enumerate(obj._iter_column_arrays()): @@ -505,9 +489,6 @@ def _apply_columnwise( verify_integrity=False, ) - if self.axis == 1: - df = df.T - return self._resolve_output(df, obj) def _apply_tablewise( @@ -523,9 +504,7 @@ def _apply_tablewise( raise ValueError("method='table' not applicable for Series objects.") obj = self._create_data(self._selected_obj, numeric_only) values = self._prep_values(obj.to_numpy()) - values = values.T if self.axis == 1 else values result = homogeneous_func(values) - result = result.T if self.axis == 1 else result index = self._slice_axis_for_step(obj.index, result) columns = ( obj.columns @@ -633,8 +612,6 @@ def _numba_apply( else window_indexer.window_size ) obj = self._create_data(self._selected_obj) - if self.axis == 1: - obj = obj.T values = self._prep_values(obj.to_numpy()) if values.ndim == 1: values = values.reshape(-1, 1) @@ -660,7 +637,6 @@ def _numba_apply( result = aggregator( values.T, start=start, end=end, min_periods=min_periods, **func_kwargs ).T - result = result.T if self.axis == 1 else result index = self._slice_axis_for_step(obj.index, result) if obj.ndim == 1: result = result.squeeze() @@ -935,18 +911,6 @@ class Window(BaseWindow): Provided integer column is ignored and excluded from result since an integer index is not used to calculate the rolling window. - axis : int or str, default 0 - If ``0`` or ``'index'``, roll across the rows. - - If ``1`` or ``'columns'``, roll across the columns. - - For `Series` this parameter is unused and defaults to 0. - - .. deprecated:: 2.1.0 - - The axis keyword is deprecated. For ``axis=1``, - transpose the DataFrame first instead. - closed : str, default None If ``'right'``, the first point in the window is excluded from calculations. @@ -1138,7 +1102,6 @@ class Window(BaseWindow): "min_periods", "center", "win_type", - "axis", "on", "closed", "step", @@ -1858,7 +1821,6 @@ class Rolling(RollingAndExpandingMixin): "min_periods", "center", "win_type", - "axis", "on", "closed", "step", @@ -1926,10 +1888,7 @@ def _validate_datetimelike_monotonic(self) -> None: def _raise_monotonic_error(self, msg: str): on = self.on if on is None: - if self.axis == 0: - on = "index" - else: - on = "column" + on = "index" raise ValueError(f"{on} {msg}") @doc( diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index 084452ec23719..b41e03d87b275 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -544,12 +544,7 @@ def boxplot_frame_groupby( maybe_adjust_figure(fig, bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) else: keys, frames = zip(*grouped) - if grouped.axis == 0: - df = pd.concat(frames, keys=keys, axis=1) - elif len(frames) > 1: - df = frames[0].join(frames[1::]) - else: - df = frames[0] + df = pd.concat(frames, keys=keys, axis=1) # GH 16748, DataFrameGroupby fails when subplots=False and `column` argument # is assigned, and in this case, since `df` here becomes MI after groupby, diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py index e9967b75becce..e9192dae66a46 100644 --- a/pandas/tests/apply/test_str.py +++ b/pandas/tests/apply/test_str.py @@ -270,7 +270,7 @@ def test_transform_groupby_kernel_series(request, string_series, op): @pytest.mark.parametrize("op", frame_transform_kernels) -def test_transform_groupby_kernel_frame(request, axis, float_frame, op): +def test_transform_groupby_kernel_frame(request, float_frame, op): if op == "ngroup": request.applymarker( pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame") @@ -279,22 +279,15 @@ def test_transform_groupby_kernel_frame(request, axis, float_frame, op): # GH 35964 args = [0.0] if op == "fillna" else [] - if axis in (0, "index"): - ones = np.ones(float_frame.shape[0]) - msg = "The 'axis' keyword in DataFrame.groupby is deprecated" - else: - ones = np.ones(float_frame.shape[1]) - msg = "DataFrame.groupby with axis=1 is deprecated" - - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = float_frame.groupby(ones, axis=axis) + ones = np.ones(float_frame.shape[0]) + gb = float_frame.groupby(ones) warn = FutureWarning if op == "fillna" else None op_msg = "DataFrameGroupBy.fillna is deprecated" with tm.assert_produces_warning(warn, match=op_msg): expected = gb.transform(op, *args) - result = float_frame.transform(op, axis, *args) + result = float_frame.transform(op, 0, *args) tm.assert_frame_equal(result, expected) # same thing, but ensuring we have multiple blocks @@ -302,17 +295,10 @@ def test_transform_groupby_kernel_frame(request, axis, float_frame, op): float_frame["E"] = float_frame["A"].copy() assert len(float_frame._mgr.arrays) > 1 - if axis in (0, "index"): - ones = np.ones(float_frame.shape[0]) - else: - ones = np.ones(float_frame.shape[1]) - with tm.assert_produces_warning(FutureWarning, match=msg): - gb2 = float_frame.groupby(ones, axis=axis) - warn = FutureWarning if op == "fillna" else None - op_msg = "DataFrameGroupBy.fillna is deprecated" - with tm.assert_produces_warning(warn, match=op_msg): - expected2 = gb2.transform(op, *args) - result2 = float_frame.transform(op, axis, *args) + ones = np.ones(float_frame.shape[0]) + gb2 = float_frame.groupby(ones) + expected2 = gb2.transform(op, *args) + result2 = float_frame.transform(op, 0, *args) tm.assert_frame_equal(result2, expected2) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 0e86f95a93091..c6962815ffda1 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -111,28 +111,6 @@ def test_groupby_aggregation_mixed_dtype(): tm.assert_frame_equal(result, expected) -def test_groupby_aggregation_multi_level_column(): - # GH 29772 - lst = [ - [True, True, True, False], - [True, False, np.nan, False], - [True, True, np.nan, False], - [True, True, np.nan, False], - ] - df = DataFrame( - data=lst, - columns=MultiIndex.from_tuples([("A", 0), ("A", 1), ("B", 0), ("B", 1)]), - ) - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby(level=1, axis=1) - result = gb.sum(numeric_only=False) - expected = DataFrame({0: [2.0, True, True, True], 1: [1, 0, 1, 1]}) - - tm.assert_frame_equal(result, expected) - - def test_agg_apply_corner(ts, tsframe): # nothing to group, all NA grouped = ts.groupby(ts * np.nan, group_keys=False) @@ -268,65 +246,6 @@ def test_agg_str_with_kwarg_axis_1_raises(df, reduction_func): gb.agg(reduction_func, axis=1) -@pytest.mark.parametrize( - "func, expected, dtype, result_dtype_dict", - [ - ("sum", [5, 7, 9], "int64", {}), - ("std", [4.5**0.5] * 3, int, {"i": float, "j": float, "k": float}), - ("var", [4.5] * 3, int, {"i": float, "j": float, "k": float}), - ("sum", [5, 7, 9], "Int64", {"j": "int64"}), - ("std", [4.5**0.5] * 3, "Int64", {"i": float, "j": float, "k": float}), - ("var", [4.5] * 3, "Int64", {"i": "float64", "j": "float64", "k": "float64"}), - ], -) -def test_multiindex_groupby_mixed_cols_axis1(func, expected, dtype, result_dtype_dict): - # GH#43209 - df = DataFrame( - [[1, 2, 3, 4, 5, 6]] * 3, - columns=MultiIndex.from_product([["a", "b"], ["i", "j", "k"]]), - ).astype({("a", "j"): dtype, ("b", "j"): dtype}) - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby(level=1, axis=1) - result = gb.agg(func) - expected = DataFrame([expected] * 3, columns=["i", "j", "k"]).astype( - result_dtype_dict - ) - - tm.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize( - "func, expected_data, result_dtype_dict", - [ - ("sum", [[2, 4], [10, 12], [18, 20]], {10: "int64", 20: "int64"}), - # std should ideally return Int64 / Float64 #43330 - ("std", [[2**0.5] * 2] * 3, "float64"), - ("var", [[2] * 2] * 3, {10: "float64", 20: "float64"}), - ], -) -def test_groupby_mixed_cols_axis1(func, expected_data, result_dtype_dict): - # GH#43209 - df = DataFrame( - np.arange(12).reshape(3, 4), - index=Index([0, 1, 0], name="y"), - columns=Index([10, 20, 10, 20], name="x"), - dtype="int64", - ).astype({10: "Int64"}) - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby("x", axis=1) - result = gb.agg(func) - expected = DataFrame( - data=expected_data, - index=Index([0, 1, 0], name="y"), - columns=Index([10, 20], name="x"), - ).astype(result_dtype_dict) - tm.assert_frame_equal(result, expected) - - def test_aggregate_item_by_item(df): grouped = df.groupby("A") @@ -1616,19 +1535,6 @@ def test_groupby_complex_raises(func): data.groupby(data.index % 2).agg(func) -@pytest.mark.parametrize( - "func", [["min"], ["mean", "max"], {"b": "sum"}, {"b": "prod", "c": "median"}] -) -def test_multi_axis_1_raises(func): - # GH#46995 - df = DataFrame({"a": [1, 1, 2], "b": [3, 4, 5], "c": [6, 7, 8]}) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby("a", axis=1) - with pytest.raises(NotImplementedError, match="axis other than 0 is not supported"): - gb.agg(func) - - @pytest.mark.parametrize( "test, constant", [ diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index 5c99882cef6d2..b1f8ecc9c8a39 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -126,21 +126,6 @@ def test_cython_agg_nothing_to_agg_with_dates(): frame.groupby("b").dates.mean(numeric_only=True) -def test_cython_agg_frame_columns(): - # #2113 - df = DataFrame({"x": [1, 2, 3], "y": [3, 4, 5]}) - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - df.groupby(level=0, axis="columns").mean() - with tm.assert_produces_warning(FutureWarning, match=msg): - df.groupby(level=0, axis="columns").mean() - with tm.assert_produces_warning(FutureWarning, match=msg): - df.groupby(level=0, axis="columns").mean() - with tm.assert_produces_warning(FutureWarning, match=msg): - df.groupby(level=0, axis="columns").mean() - - def test_cython_agg_return_dict(): # GH 16741 df = DataFrame( diff --git a/pandas/tests/groupby/methods/test_describe.py b/pandas/tests/groupby/methods/test_describe.py index e73fb15a54181..274e23abd77d9 100644 --- a/pandas/tests/groupby/methods/test_describe.py +++ b/pandas/tests/groupby/methods/test_describe.py @@ -87,18 +87,6 @@ def test_frame_describe_multikey(tsframe): expected = pd.concat(desc_groups, axis=1) tm.assert_frame_equal(result, expected) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - groupedT = tsframe.groupby({"A": 0, "B": 0, "C": 1, "D": 1}, axis=1) - result = groupedT.describe() - expected = tsframe.describe().T - # reverting the change from https://github.com/pandas-dev/pandas/pull/35441/ - expected.index = MultiIndex( - levels=[[0, 1], expected.index], - codes=[[0, 0, 1, 1], range(len(expected.index))], - ) - tm.assert_frame_equal(result, expected) - def test_frame_describe_tupleindex(): # GH 14848 - regression from 0.19.0 to 0.19.1 diff --git a/pandas/tests/groupby/methods/test_nth.py b/pandas/tests/groupby/methods/test_nth.py index 52d63cb720485..1b852abad6c8e 100644 --- a/pandas/tests/groupby/methods/test_nth.py +++ b/pandas/tests/groupby/methods/test_nth.py @@ -540,32 +540,6 @@ def test_groupby_head_tail(op, n, expected_rows, columns, as_index): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize( - "op, n, expected_cols", - [ - ("head", -1, [0]), - ("head", 0, []), - ("head", 1, [0, 2]), - ("head", 7, [0, 1, 2]), - ("tail", -1, [1]), - ("tail", 0, []), - ("tail", 1, [1, 2]), - ("tail", 7, [0, 1, 2]), - ], -) -def test_groupby_head_tail_axis_1(op, n, expected_cols): - # GH 9772 - df = DataFrame( - [[1, 2, 3], [1, 4, 5], [2, 6, 7], [3, 8, 9]], columns=["A", "B", "C"] - ) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - g = df.groupby([0, 0, 1], axis=1) - expected = df.iloc[:, expected_cols] - result = getattr(g, op)(n) - tm.assert_frame_equal(result, expected) - - def test_group_selection_cache(): # GH 12839 nth, head, and tail should return same result consistently df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"]) @@ -773,24 +747,6 @@ def test_np_ints(slice_test_df, slice_test_grouped): tm.assert_frame_equal(result, expected) -def test_groupby_nth_with_column_axis(): - # GH43926 - df = DataFrame( - [ - [4, 5, 6], - [8, 8, 7], - ], - index=["z", "y"], - columns=["C", "B", "A"], - ) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby(df.iloc[1], axis=1) - result = gb.nth(0) - expected = df.iloc[:, [0, 2]] - tm.assert_frame_equal(result, expected) - - def test_groupby_nth_interval(): # GH#24205 idx_result = MultiIndex( @@ -814,35 +770,6 @@ def test_groupby_nth_interval(): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize( - "start, stop, expected_values, expected_columns", - [ - (None, None, [0, 1, 2, 3, 4], list("ABCDE")), - (None, 1, [0, 3], list("AD")), - (None, 9, [0, 1, 2, 3, 4], list("ABCDE")), - (None, -1, [0, 1, 3], list("ABD")), - (1, None, [1, 2, 4], list("BCE")), - (1, -1, [1], list("B")), - (-1, None, [2, 4], list("CE")), - (-1, 2, [4], list("E")), - ], -) -@pytest.mark.parametrize("method", ["call", "index"]) -def test_nth_slices_with_column_axis( - start, stop, expected_values, expected_columns, method -): - df = DataFrame([range(5)], columns=[list("ABCDE")]) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby([5, 5, 5, 6, 6], axis=1) - result = { - "call": lambda start, stop: gb.nth(slice(start, stop)), - "index": lambda start, stop: gb.nth[start:stop], - }[method](start, stop) - expected = DataFrame([expected_values], columns=[expected_columns]) - tm.assert_frame_equal(result, expected) - - @pytest.mark.filterwarnings( "ignore:invalid value encountered in remainder:RuntimeWarning" ) diff --git a/pandas/tests/groupby/methods/test_quantile.py b/pandas/tests/groupby/methods/test_quantile.py index 1d2e639314cba..af0deba138469 100644 --- a/pandas/tests/groupby/methods/test_quantile.py +++ b/pandas/tests/groupby/methods/test_quantile.py @@ -377,32 +377,6 @@ def test_groupby_timedelta_quantile(): tm.assert_frame_equal(result, expected) -def test_columns_groupby_quantile(): - # GH 33795 - df = DataFrame( - np.arange(12).reshape(3, -1), - index=list("XYZ"), - columns=pd.Series(list("ABAB"), name="col"), - ) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby("col", axis=1) - result = gb.quantile(q=[0.8, 0.2]) - expected = DataFrame( - [ - [1.6, 0.4, 2.6, 1.4], - [5.6, 4.4, 6.6, 5.4], - [9.6, 8.4, 10.6, 9.4], - ], - index=list("XYZ"), - columns=pd.MultiIndex.from_tuples( - [("A", 0.8), ("A", 0.2), ("B", 0.8), ("B", 0.2)], names=["col", None] - ), - ) - - tm.assert_frame_equal(result, expected) - - def test_timestamp_groupby_quantile(unit): # GH 33168 dti = pd.date_range( diff --git a/pandas/tests/groupby/methods/test_size.py b/pandas/tests/groupby/methods/test_size.py index fd55ceedd1083..5a3eb49e97fb7 100644 --- a/pandas/tests/groupby/methods/test_size.py +++ b/pandas/tests/groupby/methods/test_size.py @@ -3,8 +3,6 @@ import pandas.util._test_decorators as td -from pandas.core.dtypes.common import is_integer_dtype - from pandas import ( DataFrame, Index, @@ -22,35 +20,6 @@ def test_size(df, by): assert result[key] == len(group) -@pytest.mark.parametrize( - "by", - [ - [0, 0, 0, 0], - [0, 1, 1, 1], - [1, 0, 1, 1], - [0, None, None, None], - pytest.param([None, None, None, None], marks=pytest.mark.xfail), - ], -) -@pytest.mark.parametrize("axis_1", [1, "columns"]) -def test_size_axis_1(df, axis_1, by, sort, dropna): - # GH#45715 - counts = {key: sum(value == key for value in by) for key in dict.fromkeys(by)} - if dropna: - counts = {key: value for key, value in counts.items() if key is not None} - expected = Series(counts, dtype="int64") - if sort: - expected = expected.sort_index() - if is_integer_dtype(expected.index.dtype) and not any(x is None for x in by): - expected.index = expected.index.astype(int) - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - grouped = df.groupby(by=by, axis=axis_1, sort=sort, dropna=dropna) - result = grouped.size() - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("by", ["A", "B", ["A", "B"]]) def test_size_sort(sort, by): df = DataFrame(np.random.default_rng(2).choice(20, (1000, 3)), columns=list("ABC")) diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py index 42f949443e33f..4d610018917f6 100644 --- a/pandas/tests/groupby/methods/test_value_counts.py +++ b/pandas/tests/groupby/methods/test_value_counts.py @@ -232,14 +232,6 @@ def education_df(): ) -def test_axis(education_df): - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gp = education_df.groupby("country", axis=1) - with pytest.raises(NotImplementedError, match="axis"): - gp.value_counts() - - def test_bad_subset(education_df): gp = education_df.groupby("country") with pytest.raises(ValueError, match="subset"): diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 29070e686d91a..26b31e202e6e6 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -122,40 +122,6 @@ def test_apply_index_date_object(using_infer_string): tm.assert_series_equal(result, expected) -def test_apply_trivial(using_infer_string): - # GH 20066 - # trivial apply: ignore input and return a constant dataframe. - df = DataFrame( - {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]}, - columns=["key", "data"], - ) - dtype = "string" if using_infer_string else "object" - expected = pd.concat([df.iloc[1:], df.iloc[1:]], axis=1, keys=["float64", dtype]) - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby([str(x) for x in df.dtypes], axis=1) - result = gb.apply(lambda x: df.iloc[1:]) - - tm.assert_frame_equal(result, expected) - - -def test_apply_trivial_fail(using_infer_string): - # GH 20066 - df = DataFrame( - {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]}, - columns=["key", "data"], - ) - dtype = "string" if using_infer_string else "object" - expected = pd.concat([df, df], axis=1, keys=["float64", dtype]) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby([str(x) for x in df.dtypes], axis=1, group_keys=True) - result = gb.apply(lambda x: df) - - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( "df, group_names", [ @@ -1257,31 +1223,6 @@ def test_apply_with_date_in_multiindex_does_not_convert_to_timestamp(): assert type(val) is date -def test_apply_by_cols_equals_apply_by_rows_transposed(): - # GH 16646 - # Operating on the columns, or transposing and operating on the rows - # should give the same result. There was previously a bug where the - # by_rows operation would work fine, but by_cols would throw a ValueError - - df = DataFrame( - np.random.default_rng(2).random([6, 4]), - columns=MultiIndex.from_product([["A", "B"], [1, 2]]), - ) - - msg = "The 'axis' keyword in DataFrame.groupby is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.T.groupby(axis=0, level=0) - by_rows = gb.apply(lambda x: x.droplevel(axis=0, level=0)) - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb2 = df.groupby(axis=1, level=0) - by_cols = gb2.apply(lambda x: x.droplevel(axis=1, level=0)) - - tm.assert_frame_equal(by_cols, by_rows.T) - tm.assert_frame_equal(by_cols, df) - - def test_apply_dropna_with_indexed_same(dropna): # GH 38227 # GH#43205 diff --git a/pandas/tests/groupby/test_apply_mutate.py b/pandas/tests/groupby/test_apply_mutate.py index cfd1a4bca9d91..29d82cce44807 100644 --- a/pandas/tests/groupby/test_apply_mutate.py +++ b/pandas/tests/groupby/test_apply_mutate.py @@ -98,66 +98,3 @@ def fn(x): name="col2", ) tm.assert_series_equal(result, expected) - - -def test_apply_mutate_columns_multiindex(): - # GH 12652 - df = pd.DataFrame( - { - ("C", "julian"): [1, 2, 3], - ("B", "geoffrey"): [1, 2, 3], - ("A", "julian"): [1, 2, 3], - ("B", "julian"): [1, 2, 3], - ("A", "geoffrey"): [1, 2, 3], - ("C", "geoffrey"): [1, 2, 3], - }, - columns=pd.MultiIndex.from_tuples( - [ - ("A", "julian"), - ("A", "geoffrey"), - ("B", "julian"), - ("B", "geoffrey"), - ("C", "julian"), - ("C", "geoffrey"), - ] - ), - ) - - def add_column(grouped): - name = grouped.columns[0][1] - grouped["sum", name] = grouped.sum(axis=1) - return grouped - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby(level=1, axis=1) - result = gb.apply(add_column) - expected = pd.DataFrame( - [ - [1, 1, 1, 3, 1, 1, 1, 3], - [2, 2, 2, 6, 2, 2, 2, 6], - [ - 3, - 3, - 3, - 9, - 3, - 3, - 3, - 9, - ], - ], - columns=pd.MultiIndex.from_tuples( - [ - ("geoffrey", "A", "geoffrey"), - ("geoffrey", "B", "geoffrey"), - ("geoffrey", "C", "geoffrey"), - ("geoffrey", "sum", "geoffrey"), - ("julian", "A", "julian"), - ("julian", "B", "julian"), - ("julian", "C", "julian"), - ("julian", "sum", "julian"), - ] - ), - ) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 7db08c8879b0c..727a77f52fe48 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1348,22 +1348,6 @@ def test_groupby_categorical_series_dataframe_consistent(df_cat): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("code", [([1, 0, 0]), ([0, 0, 0])]) -def test_groupby_categorical_axis_1(code): - # GH 13420 - df = DataFrame({"a": [1, 2, 3, 4], "b": [-1, -2, -3, -4], "c": [5, 6, 7, 8]}) - cat = Categorical.from_codes(code, categories=list("abc")) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby(cat, axis=1, observed=False) - result = gb.mean() - msg = "The 'axis' keyword in DataFrame.groupby is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb2 = df.T.groupby(cat, axis=0, observed=False) - expected = gb2.mean().T - tm.assert_frame_equal(result, expected) - - def test_groupby_cat_preserves_structure(observed, ordered): # GH 28787 df = DataFrame( diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 309c4b7b57e84..a34170e9b55db 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -121,19 +121,6 @@ def raise_if_sum_is_zero(x): grouped.filter(raise_if_sum_is_zero) -def test_filter_with_axis_in_groupby(): - # issue 11041 - index = pd.MultiIndex.from_product([range(10), [0, 1]]) - data = DataFrame(np.arange(100).reshape(-1, 20), columns=index, dtype="int64") - - msg = "DataFrame.groupby with axis=1" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = data.groupby(level=0, axis=1) - result = gb.filter(lambda x: x.iloc[0, 0] > 10) - expected = data.iloc[:, 12:20] - tm.assert_frame_equal(result, expected) - - def test_filter_bad_shapes(): df = DataFrame({"A": np.arange(8), "B": list("aabbbbcc"), "C": np.arange(8)}) s = df["B"] diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 399cebb0d3706..fa1842388fba8 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -39,7 +39,7 @@ def test_repr(): # GH18203 result = repr(Grouper(key="A", level="B")) - expected = "Grouper(key='A', level='B', axis=0, sort=False, dropna=True)" + expected = "Grouper(key='A', level='B', sort=False, dropna=True)" assert result == expected @@ -288,29 +288,6 @@ def test_frame_groupby(tsframe): assert (samething == v).all() -def test_frame_groupby_columns(tsframe): - mapping = {"A": 0, "B": 0, "C": 1, "D": 1} - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - grouped = tsframe.groupby(mapping, axis=1) - - # aggregate - aggregated = grouped.aggregate("mean") - assert len(aggregated) == len(tsframe) - assert len(aggregated.columns) == 2 - - # transform - tf = lambda x: x - x.mean() - msg = "The 'axis' keyword in DataFrame.groupby is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - groupedT = tsframe.T.groupby(mapping, axis=0) - tm.assert_frame_equal(groupedT.transform(tf).T, grouped.transform(tf)) - - # iterate - for k, v in grouped: - assert len(v.columns) == 2 - - def test_frame_set_name_single(df): grouped = df.groupby("A") @@ -638,18 +615,6 @@ def test_groupby_as_index_series_scalar(df): tm.assert_frame_equal(result, expected) -def test_groupby_as_index_corner(df, ts): - msg = "as_index=False only valid with DataFrame" - with pytest.raises(TypeError, match=msg): - ts.groupby(lambda x: x.weekday(), as_index=False) - - msg = "as_index=False only valid for axis=0" - depr_msg = "DataFrame.groupby with axis=1 is deprecated" - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - df.groupby(lambda x: x.lower(), as_index=False, axis=1) - - def test_groupby_multiple_key(): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), @@ -660,19 +625,6 @@ def test_groupby_multiple_key(): agged = grouped.sum() tm.assert_almost_equal(df.values, agged.values) - depr_msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - grouped = df.T.groupby( - [lambda x: x.year, lambda x: x.month, lambda x: x.day], axis=1 - ) - - agged = grouped.agg(lambda x: x.sum()) - tm.assert_index_equal(agged.index, df.columns) - tm.assert_almost_equal(df.T.values, agged.values) - - agged = grouped.agg(lambda x: x.sum()) - tm.assert_almost_equal(df.T.values, agged.values) - def test_groupby_multi_corner(df): # test that having an all-NA column doesn't mess you up @@ -703,14 +655,6 @@ def test_raises_on_nuisance(df): with pytest.raises(TypeError, match=msg): grouped.sum() - # won't work with axis = 1 - depr_msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - grouped = df.groupby({"A": 0, "C": 0, "D": 1, "E": 1}, axis=1) - msg = "does not support reduction 'sum'" - with pytest.raises(TypeError, match=msg): - grouped.agg(lambda x: x.sum(0, numeric_only=False)) - @pytest.mark.parametrize( "agg_function", @@ -978,24 +922,12 @@ def test_groupby_with_hier_columns(): result = df.groupby(level=0).mean() tm.assert_index_equal(result.columns, columns) - depr_msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - gb = df.groupby(level=0, axis=1) - result = gb.mean() - tm.assert_index_equal(result.index, df.index) - result = df.groupby(level=0).agg("mean") tm.assert_index_equal(result.columns, columns) result = df.groupby(level=0).apply(lambda x: x.mean()) tm.assert_index_equal(result.columns, columns) - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - gb = df.groupby(level=0, axis=1) - result = gb.agg(lambda x: x.mean(1)) - tm.assert_index_equal(result.columns, Index(["A", "B"])) - tm.assert_index_equal(result.index, df.index) - # add a nuisance column sorted_columns, _ = columns.sortlevel(0) df["A", "foo"] = "bar" @@ -1997,34 +1929,6 @@ def test_groupby_groups_in_BaseGrouper(): assert result.groups == expected.groups -@pytest.mark.parametrize("group_name", ["x", ["x"]]) -def test_groupby_axis_1(group_name): - # GH 27614 - df = DataFrame( - np.arange(12).reshape(3, 4), index=[0, 1, 0], columns=[10, 20, 10, 20] - ) - df.index.name = "y" - df.columns.name = "x" - - depr_msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - gb = df.groupby(group_name, axis=1) - - results = gb.sum() - expected = df.T.groupby(group_name).sum().T - tm.assert_frame_equal(results, expected) - - # test on MI column - iterables = [["bar", "baz", "foo"], ["one", "two"]] - mi = MultiIndex.from_product(iterables=iterables, names=["x", "x1"]) - df = DataFrame(np.arange(18).reshape(3, 6), index=[0, 1, 0], columns=mi) - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - gb = df.groupby(group_name, axis=1) - results = gb.sum() - expected = df.T.groupby(group_name).sum().T - tm.assert_frame_equal(results, expected) - - @pytest.mark.parametrize( "op, expected", [ @@ -2156,42 +2060,27 @@ def test_group_on_empty_multiindex(transformation_func, request): tm.assert_equal(result, expected) -def test_groupby_crash_on_nunique(axis): +def test_groupby_crash_on_nunique(): # Fix following 30253 dti = date_range("2016-01-01", periods=2, name="foo") df = DataFrame({("A", "B"): [1, 2], ("A", "C"): [1, 3], ("D", "B"): [0, 0]}) df.columns.names = ("bar", "baz") df.index = dti - axis_number = df._get_axis_number(axis) - if not axis_number: - df = df.T - msg = "The 'axis' keyword in DataFrame.groupby is deprecated" - else: - msg = "DataFrame.groupby with axis=1 is deprecated" - - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby(axis=axis_number, level=0) + # TODO: Do we need a transpose? + df = df.T + gb = df.groupby(level=0) result = gb.nunique() expected = DataFrame({"A": [1, 2], "D": [1, 1]}, index=dti) expected.columns.name = "bar" - if not axis_number: - expected = expected.T + expected = expected.T tm.assert_frame_equal(result, expected) - if axis_number == 0: - # same thing, but empty columns - with tm.assert_produces_warning(FutureWarning, match=msg): - gb2 = df[[]].groupby(axis=axis_number, level=0) - exp = expected[[]] - else: - # same thing, but empty rows - with tm.assert_produces_warning(FutureWarning, match=msg): - gb2 = df.loc[[]].groupby(axis=axis_number, level=0) - # default for empty when we can't infer a dtype is float64 - exp = expected.loc[[]].astype(np.float64) + # same thing, but empty columns + gb2 = df[[]].groupby(level=0) + exp = expected[[]] res = gb2.nunique() tm.assert_frame_equal(res, exp) @@ -2267,17 +2156,6 @@ def test_subsetting_columns_keeps_attrs(klass, attr, value): assert getattr(result, attr) == getattr(expected, attr) -def test_subsetting_columns_axis_1(): - # GH 37725 - df = DataFrame({"A": [1], "B": [2], "C": [3]}) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - g = df.groupby([0, 0, 1], axis=1) - match = "Cannot subset columns when using axis=1" - with pytest.raises(ValueError, match=match): - g[["A", "B"]].sum() - - @pytest.mark.parametrize("func", ["sum", "any", "shift"]) def test_groupby_column_index_name_lost(func): # GH: 29764 groupby loses index sometimes @@ -2992,29 +2870,6 @@ def test_groupby_ngroup_with_nan(): tm.assert_series_equal(result, expected) -def test_get_group_axis_1(): - # GH#54858 - df = DataFrame( - { - "col1": [0, 3, 2, 3], - "col2": [4, 1, 6, 7], - "col3": [3, 8, 2, 10], - "col4": [1, 13, 6, 15], - "col5": [-4, 5, 6, -7], - } - ) - with tm.assert_produces_warning(FutureWarning, match="deprecated"): - grouped = df.groupby(axis=1, by=[1, 2, 3, 2, 1]) - result = grouped.get_group(1) - expected = DataFrame( - { - "col1": [0, 3, 2, 3], - "col5": [-4, 5, 6, -7], - } - ) - tm.assert_frame_equal(result, expected) - - def test_groupby_ffill_with_duplicated_index(): # GH#43412 df = DataFrame({"a": [1, 2, 3, 4, np.nan, np.nan]}, index=[0, 1, 2, 0, 1, 2]) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 39d1ba207fba7..f8529bf060c88 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -230,13 +230,6 @@ def test_grouper_creation_bug(self): result = g.sum() tm.assert_frame_equal(result, expected) - msg = "Grouper axis keyword is deprecated and will be removed" - with tm.assert_produces_warning(FutureWarning, match=msg): - gpr = Grouper(key="A", axis=0) - g = df.groupby(gpr) - result = g.sum() - tm.assert_frame_equal(result, expected) - msg = "DataFrameGroupBy.apply operated on the grouping columns" with tm.assert_produces_warning(DeprecationWarning, match=msg): result = g.apply(lambda x: x.sum()) @@ -386,22 +379,15 @@ def test_groupby_categorical_index_and_columns(self, observed): [[1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2]], int ) cat_columns = CategoricalIndex(columns, categories=categories, ordered=True) - df = DataFrame(data=data, columns=cat_columns) - depr_msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - result = df.groupby(axis=1, level=0, observed=observed).sum() expected_data = np.array([[4, 2], [4, 2], [4, 2], [4, 2], [4, 2]], int) expected_columns = CategoricalIndex( categories, categories=categories, ordered=True ) - expected = DataFrame(data=expected_data, columns=expected_columns) - tm.assert_frame_equal(result, expected) + # TODO: Why transpose? # test transposed version df = DataFrame(data.T, index=cat_columns) - msg = "The 'axis' keyword in DataFrame.groupby is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby(axis=0, level=0, observed=observed).sum() + result = df.groupby(level=0, observed=observed).sum() expected = DataFrame(data=expected_data.T, index=expected_columns) tm.assert_frame_equal(result, expected) @@ -529,18 +515,6 @@ def test_grouping_error_on_multidim_input(self, df): with pytest.raises(ValueError, match=msg): Grouping(df.index, df[["A", "A"]]) - def test_multiindex_passthru(self): - # GH 7997 - # regression from 0.14.1 - df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) - df.columns = MultiIndex.from_tuples([(0, 1), (1, 1), (2, 1)]) - - depr_msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - gb = df.groupby(axis=1, level=[0, 1]) - result = gb.first() - tm.assert_frame_equal(result, df) - def test_multiindex_negative_level(self, multiindex_dataframe_random_data): # GH 13901 result = multiindex_dataframe_random_data.groupby(level=-1).sum() @@ -677,35 +651,20 @@ def test_groupby_level(self, sort, multiindex_dataframe_random_data, df): tm.assert_frame_equal(result0, expected0) tm.assert_frame_equal(result1, expected1) - # axis=1 - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result0 = frame.T.groupby(level=0, axis=1, sort=sort).sum() - result1 = frame.T.groupby(level=1, axis=1, sort=sort).sum() - tm.assert_frame_equal(result0, expected0.T) - tm.assert_frame_equal(result1, expected1.T) - # raise exception for non-MultiIndex msg = "level > 0 or level < -1 only valid with MultiIndex" with pytest.raises(ValueError, match=msg): df.groupby(level=1) - def test_groupby_level_index_names(self, axis): + def test_groupby_level_index_names(self): # GH4014 this used to raise ValueError since 'exp'>1 (in py2) df = DataFrame({"exp": ["A"] * 3 + ["B"] * 3, "var1": range(6)}).set_index( "exp" ) - if axis in (1, "columns"): - df = df.T - depr_msg = "DataFrame.groupby with axis=1 is deprecated" - else: - depr_msg = "The 'axis' keyword in DataFrame.groupby is deprecated" - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - df.groupby(level="exp", axis=axis) - msg = f"level name foo is not the name of the {df._get_axis_name(axis)}" + df.groupby(level="exp") + msg = "level name foo is not the name of the index" with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - df.groupby(level="foo", axis=axis) + df.groupby(level="foo") def test_groupby_level_with_nas(self, sort): # GH 17537 @@ -1099,14 +1058,6 @@ def test_multi_iter_frame(self, three_group): groups = {key: gp for key, gp in grouped} # noqa: C416 assert len(groups) == 2 - # axis = 1 - three_levels = three_group.groupby(["A", "B", "C"]).mean() - depr_msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - grouped = three_levels.T.groupby(axis=1, level=(1, 2)) - for key, group in grouped: - pass - def test_dictify(self, df): dict(iter(df.groupby("A"))) dict(iter(df.groupby(["A", "B"]))) diff --git a/pandas/tests/groupby/test_indexing.py b/pandas/tests/groupby/test_indexing.py index f839bf156ca00..a3d3f509e186a 100644 --- a/pandas/tests/groupby/test_indexing.py +++ b/pandas/tests/groupby/test_indexing.py @@ -266,20 +266,6 @@ def test_step(step): tm.assert_frame_equal(result, expected) -def test_column_axis(): - column_group_df = pd.DataFrame( - [[0, 1, 2, 3, 4, 5, 6], [0, 0, 1, 0, 1, 0, 2]], - columns=["A", "B", "C", "D", "E", "F", "G"], - ) - msg = "DataFrame.groupby with axis=1" - with tm.assert_produces_warning(FutureWarning, match=msg): - g = column_group_df.groupby(column_group_df.iloc[1], axis=1) - result = g._positional_selector[1:-1] - expected = column_group_df.iloc[:, [1, 3]] - - tm.assert_frame_equal(result, expected) - - def test_columns_on_iter(): # GitHub issue #44821 df = pd.DataFrame({k: range(10) for k in "ABC"}) diff --git a/pandas/tests/groupby/test_numba.py b/pandas/tests/groupby/test_numba.py index ee7d342472493..3e32031e51138 100644 --- a/pandas/tests/groupby/test_numba.py +++ b/pandas/tests/groupby/test_numba.py @@ -61,13 +61,6 @@ def test_as_index_false_unsupported(self, numba_supported_reductions): with pytest.raises(NotImplementedError, match="as_index=False"): getattr(gb, func)(engine="numba", **kwargs) - def test_axis_1_unsupported(self, numba_supported_reductions): - func, kwargs = numba_supported_reductions - df = DataFrame({"a": [3, 2, 3, 2], "b": range(4), "c": range(1, 5)}) - gb = df.groupby("a", axis=1) - with pytest.raises(NotImplementedError, match="axis=1"): - getattr(gb, func)(engine="numba", **kwargs) - def test_no_engine_doesnt_raise(self): # GH55520 df = DataFrame({"a": [3, 2, 3, 2], "b": range(4), "c": range(1, 5)}) diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py index 08ce41edfb784..50103011693bc 100644 --- a/pandas/tests/groupby/test_reductions.py +++ b/pandas/tests/groupby/test_reductions.py @@ -1149,36 +1149,26 @@ def test_apply_to_nullable_integer_returns_float(values, function): "sem", ], ) -@pytest.mark.parametrize("axis", [0, 1]) -def test_regression_allowlist_methods(op, axis, skipna, sort): +def test_regression_allowlist_methods(op, skipna, sort): # GH6944 # GH 17537 # explicitly test the allowlist methods - raw_frame = DataFrame([0]) - if axis == 0: - frame = raw_frame - msg = "The 'axis' keyword in DataFrame.groupby is deprecated and will be" - else: - frame = raw_frame.T - msg = "DataFrame.groupby with axis=1 is deprecated" + frame = DataFrame([0]) - with tm.assert_produces_warning(FutureWarning, match=msg): - grouped = frame.groupby(level=0, axis=axis, sort=sort) + grouped = frame.groupby(level=0, sort=sort) if op == "skew": # skew has skipna result = getattr(grouped, op)(skipna=skipna) - expected = frame.groupby(level=0).apply( - lambda h: getattr(h, op)(axis=axis, skipna=skipna) - ) + expected = frame.groupby(level=0).apply(lambda h: getattr(h, op)(skipna=skipna)) if sort: - expected = expected.sort_index(axis=axis) + expected = expected.sort_index() tm.assert_frame_equal(result, expected) else: result = getattr(grouped, op)() - expected = frame.groupby(level=0).apply(lambda h: getattr(h, op)(axis=axis)) + expected = frame.groupby(level=0).apply(lambda h: getattr(h, op)()) if sort: - expected = expected.sort_index(axis=axis) + expected = expected.sort_index() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 8ef7c2b8ce859..b8891da388695 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -925,7 +925,7 @@ def test_groupby_apply_timegrouper_with_nat_apply_squeeze( # check that we will go through the singular_series path # in _wrap_applied_output_series assert gb.ngroups == 1 - assert gb._selected_obj._get_axis(gb.axis).nlevels == 1 + assert gb._selected_obj.index.nlevels == 1 # function that returns a Series msg = "DataFrameGroupBy.apply operated on the grouping columns" diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 3bccacf3dec6f..67bebddaa63ca 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -170,69 +170,9 @@ def test_transform_broadcast(tsframe, ts): for col in tsframe: assert_fp_equal(res[col], agged[col]) - # group columns - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - grouped = tsframe.groupby({"A": 0, "B": 0, "C": 1, "D": 1}, axis=1) - msg = "using DataFrameGroupBy.mean" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = grouped.transform(np.mean) - tm.assert_index_equal(result.index, tsframe.index) - tm.assert_index_equal(result.columns, tsframe.columns) - for _, gp in grouped: - agged = gp.mean(1) - res = result.reindex(columns=gp.columns) - for idx in gp.index: - assert_fp_equal(res.xs(idx), agged[idx]) - - -def test_transform_axis_1(request, transformation_func): - # GH 36308 - - df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"]) - args = get_groupby_method_args(transformation_func, df) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby([0, 0, 1], axis=1) - warn = FutureWarning if transformation_func == "fillna" else None - msg = "DataFrameGroupBy.fillna is deprecated" - with tm.assert_produces_warning(warn, match=msg): - result = gb.transform(transformation_func, *args) - msg = "DataFrameGroupBy.fillna is deprecated" - with tm.assert_produces_warning(warn, match=msg): - expected = df.T.groupby([0, 0, 1]).transform(transformation_func, *args).T - - if transformation_func in ["diff", "shift"]: - # Result contains nans, so transpose coerces to float - expected["b"] = expected["b"].astype("int64") - - # cumcount returns Series; the rest are DataFrame - tm.assert_equal(result, expected) - - -def test_transform_axis_1_reducer(request, reduction_func): - # GH#45715 - if reduction_func in ( - "corrwith", - "ngroup", - "nth", - ): - marker = pytest.mark.xfail(reason="transform incorrectly fails - GH#45986") - request.applymarker(marker) - - df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"]) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby([0, 0, 1], axis=1) - - result = gb.transform(reduction_func) - expected = df.T.groupby([0, 0, 1]).transform(reduction_func).T - tm.assert_equal(result, expected) - def test_transform_axis_ts(tsframe): - # make sure that we are setting the axes - # correctly when on axis=0 or 1 + # make sure that we are setting the axes correctly # in the presence of a non-monotonic indexer # GH12713 @@ -252,14 +192,6 @@ def test_transform_axis_ts(tsframe): expected = grouped.apply(lambda x: x - x.mean(axis=0)) tm.assert_frame_equal(result, expected) - ts = ts.T - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - grouped = ts.groupby(lambda x: x.weekday(), axis=1, group_keys=False) - result = ts - grouped.transform("mean") - expected = grouped.apply(lambda x: (x.T - x.mean(1)).T) - tm.assert_frame_equal(result, expected) - # non-monotonic ts = tso.iloc[[1, 0] + list(range(2, len(base)))] grouped = ts.groupby(lambda x: x.weekday(), group_keys=False) @@ -267,14 +199,6 @@ def test_transform_axis_ts(tsframe): expected = grouped.apply(lambda x: x - x.mean(axis=0)) tm.assert_frame_equal(result, expected) - ts = ts.T - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - grouped = ts.groupby(lambda x: x.weekday(), axis=1, group_keys=False) - result = ts - grouped.transform("mean") - expected = grouped.apply(lambda x: (x.T - x.mean(1)).T) - tm.assert_frame_equal(result, expected) - def test_transform_dtype(): # GH 9807 @@ -894,38 +818,6 @@ def test_cython_transform_frame_column( tm.assert_series_equal(expected, res2) -def test_transform_with_non_scalar_group(): - # GH 10165 - cols = MultiIndex.from_tuples( - [ - ("syn", "A"), - ("foo", "A"), - ("non", "A"), - ("syn", "C"), - ("foo", "C"), - ("non", "C"), - ("syn", "T"), - ("foo", "T"), - ("non", "T"), - ("syn", "G"), - ("foo", "G"), - ("non", "G"), - ] - ) - df = DataFrame( - np.random.default_rng(2).integers(1, 10, (4, 12)), - columns=cols, - index=["A", "C", "G", "T"], - ) - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby(axis=1, level=1) - msg = "transform must return a scalar value for each group.*" - with pytest.raises(ValueError, match=msg): - gb.transform(lambda z: z.div(z.sum(axis=1), axis=0)) - - @pytest.mark.parametrize( "cols,expected", [ @@ -1330,7 +1222,7 @@ def func(grp): # Check that the fastpath raises, see _transform_general obj = gb._obj_with_exclusions - gen = gb._grouper.get_iterator(obj, axis=gb.axis) + gen = gb._grouper.get_iterator(obj) fast_path, slow_path = gb._define_paths(func) _, group = next(gen) diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 2470aae78d701..abafad5b1d7da 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -426,25 +426,6 @@ def test_boxplot_legacy2_return_type(self): axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes") _check_axes_shape(axes, axes_num=1, layout=(1, 1)) - @pytest.mark.parametrize( - "subplots, warn, axes_num, layout", - [[True, UserWarning, 3, (2, 2)], [False, None, 1, (1, 1)]], - ) - def test_boxplot_legacy3(self, subplots, warn, axes_num, layout): - tuples = zip(string.ascii_letters[:10], range(10)) - df = DataFrame( - np.random.default_rng(2).random((10, 3)), - index=MultiIndex.from_tuples(tuples), - ) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - grouped = df.unstack(level=1).groupby(level=0, axis=1) - with tm.assert_produces_warning(warn, check_stacklevel=False): - axes = _check_plot_works( - grouped.boxplot, subplots=subplots, return_type="axes" - ) - _check_axes_shape(axes, axes_num=axes_num, layout=layout) - def test_grouped_plot_fignums(self): n = 10 weight = Series(np.random.default_rng(2).normal(166, 20, size=n)) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index dfcdc2ce26bcf..c5ef0f39ece19 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -639,26 +639,6 @@ def test_resample_ohlc_dataframe(unit): # df.columns = ['PRICE', 'PRICE'] -def test_resample_dup_index(): - # GH 4812 - # dup columns with resample raising - df = DataFrame( - np.random.default_rng(2).standard_normal((4, 12)), - index=[2000, 2000, 2000, 2000], - columns=[Period(year=2000, month=i + 1, freq="M") for i in range(12)], - ) - df.iloc[3, :] = np.nan - warning_msg = "DataFrame.resample with axis=1 is deprecated." - with tm.assert_produces_warning(FutureWarning, match=warning_msg): - result = df.resample("QE", axis=1).mean() - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = df.groupby(lambda x: int((x.month - 1) / 3), axis=1).mean() - expected.columns = [Period(year=2000, quarter=i + 1, freq="Q") for i in range(4)] - tm.assert_frame_equal(result, expected) - - def test_resample_reresample(unit): dti = date_range( start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D" @@ -737,21 +717,6 @@ def test_asfreq_non_unique(unit): ts.asfreq("B") -def test_resample_axis1(unit): - rng = date_range("1/1/2000", "2/29/2000").as_unit(unit) - df = DataFrame( - np.random.default_rng(2).standard_normal((3, len(rng))), - columns=rng, - index=["a", "b", "c"], - ) - - warning_msg = "DataFrame.resample with axis=1 is deprecated." - with tm.assert_produces_warning(FutureWarning, match=warning_msg): - result = df.resample("ME", axis=1).mean() - expected = df.T.resample("ME").mean().T - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize("freq", ["min", "5min", "15min", "30min", "4h", "12h"]) def test_resample_anchored_ticks(freq, unit): # If a fixed delta (5 minute, 4 hour) evenly divides a day, we should diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 12abd1c98784b..17c286c4651e6 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -35,13 +35,13 @@ def test_frame(dti, _test_series): def test_str(_test_series): r = _test_series.resample("h") assert ( - "DatetimeIndexResampler [freq=, axis=0, closed=left, " + "DatetimeIndexResampler [freq=, closed=left, " "label=left, convention=start, origin=start_day]" in str(r) ) r = _test_series.resample("h", origin="2000-01-01") assert ( - "DatetimeIndexResampler [freq=, axis=0, closed=left, " + "DatetimeIndexResampler [freq=, closed=left, " "label=left, convention=start, origin=2000-01-01 00:00:00]" in str(r) ) @@ -620,26 +620,6 @@ def test_agg_specificationerror_invalid_names(cases): cases[["A"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]}) -@pytest.mark.parametrize( - "func", [["min"], ["mean", "max"], {"A": "sum"}, {"A": "prod", "B": "median"}] -) -def test_multi_agg_axis_1_raises(func): - # GH#46904 - - index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") - index.name = "date" - df = DataFrame( - np.random.default_rng(2).random((10, 2)), columns=list("AB"), index=index - ).T - warning_msg = "DataFrame.resample with axis=1 is deprecated." - with tm.assert_produces_warning(FutureWarning, match=warning_msg): - res = df.resample("ME", axis=1) - with pytest.raises( - NotImplementedError, match="axis other than 0 is not supported" - ): - res.agg(func) - - def test_agg_nested_dicts(): index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") index.name = "date" @@ -1047,37 +1027,6 @@ def test_args_kwargs_depr(method, raises): func(*args, 1, 2, 3, 4) -def test_df_axis_param_depr(): - index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") - index.name = "date" - df = DataFrame( - np.random.default_rng(2).random((10, 2)), columns=list("AB"), index=index - ).T - - # Deprecation error when axis=1 is explicitly passed - warning_msg = "DataFrame.resample with axis=1 is deprecated." - with tm.assert_produces_warning(FutureWarning, match=warning_msg): - df.resample("ME", axis=1) - - # Deprecation error when axis=0 is explicitly passed - df = df.T - warning_msg = ( - "The 'axis' keyword in DataFrame.resample is deprecated and " - "will be removed in a future version." - ) - with tm.assert_produces_warning(FutureWarning, match=warning_msg): - df.resample("ME", axis=0) - - -def test_series_axis_param_depr(_test_series): - warning_msg = ( - "The 'axis' keyword in Series.resample is " - "deprecated and will be removed in a future version." - ) - with tm.assert_produces_warning(FutureWarning, match=warning_msg): - _test_series.resample("h", axis=0) - - def test_resample_empty(): # GH#52484 df = DataFrame( diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 3f9340b800eae..c5e202f36659b 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -296,7 +296,7 @@ def test_repr(): # GH18203 result = repr(Grouper(key="A", freq="h")) expected = ( - "TimeGrouper(key='A', freq=, axis=0, sort=True, dropna=True, " + "TimeGrouper(key='A', freq=, sort=True, dropna=True, " "closed='left', label='left', how='mean', " "convention='e', origin='start_day')" ) @@ -304,7 +304,7 @@ def test_repr(): result = repr(Grouper(key="A", freq="h", origin="2000-01-01")) expected = ( - "TimeGrouper(key='A', freq=, axis=0, sort=True, dropna=True, " + "TimeGrouper(key='A', freq=, sort=True, dropna=True, " "closed='left', label='left', how='mean', " "convention='e', origin=Timestamp('2000-01-01 00:00:00'))" ) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 6644ec82fab17..fda51b157cd75 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -28,16 +28,6 @@ def test_reindex_level(self, multiindex_year_month_day_dataframe_random_data): expected = ymd["A"].groupby(level="month").transform("sum") tm.assert_series_equal(result, expected, check_names=False) - # axis=1 - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = ymd.T.groupby("month", axis=1) - - month_sums = gb.sum() - result = month_sums.reindex(columns=ymd.index, level=1) - expected = ymd.groupby(level="month").transform("sum").T - tm.assert_frame_equal(result, expected) - def test_reindex(self, multiindex_dataframe_random_data): frame = multiindex_dataframe_random_data @@ -97,27 +87,6 @@ def test_groupby_corner(self): # should work df.groupby(level="three") - def test_groupby_level_no_obs(self): - # #1697 - midx = MultiIndex.from_tuples( - [ - ("f1", "s1"), - ("f1", "s2"), - ("f2", "s1"), - ("f2", "s2"), - ("f3", "s1"), - ("f3", "s2"), - ] - ) - df = DataFrame([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], columns=midx) - df1 = df.loc(axis=1)[df.columns.map(lambda u: u[0] in ["f2", "f3"])] - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - grouped = df1.groupby(axis=1, level=0) - result = grouped.sum() - assert (result.columns == ["f2", "f3"]).all() - def test_setitem_with_expansion_multiindex_columns( self, multiindex_year_month_day_dataframe_random_data ): diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py index fe2da210c6fe9..b4d555203212e 100644 --- a/pandas/tests/window/test_api.py +++ b/pandas/tests/window/test_api.py @@ -127,19 +127,6 @@ def test_agg(step): tm.assert_frame_equal(result, expected, check_like=True) -@pytest.mark.parametrize( - "func", [["min"], ["mean", "max"], {"b": "sum"}, {"b": "prod", "c": "median"}] -) -def test_multi_axis_1_raises(func): - # GH#46904 - df = DataFrame({"a": [1, 1, 2], "b": [3, 4, 5], "c": [6, 7, 8]}) - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - r = df.rolling(window=3, axis=1) - with pytest.raises(NotImplementedError, match="axis other than 0 is not supported"): - r.agg(func) - - def test_agg_apply(raw): # passed lambda df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) @@ -352,32 +339,6 @@ def test_dont_modify_attributes_after_methods( assert result == expected -def test_centered_axis_validation(step): - # ok - msg = "The 'axis' keyword in Series.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - Series(np.ones(10)).rolling(window=3, center=True, axis=0, step=step).mean() - - # bad axis - msg = "No axis named 1 for object type Series" - with pytest.raises(ValueError, match=msg): - Series(np.ones(10)).rolling(window=3, center=True, axis=1, step=step).mean() - - # ok ok - df = DataFrame(np.ones((10, 10))) - msg = "The 'axis' keyword in DataFrame.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - df.rolling(window=3, center=True, axis=0, step=step).mean() - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - df.rolling(window=3, center=True, axis=1, step=step).mean() - - # bad axis - msg = "No axis named 2 for object type DataFrame" - with pytest.raises(ValueError, match=msg): - (df.rolling(window=3, center=True, axis=2, step=step).mean()) - - def test_rolling_min_min_periods(step): a = Series([1, 2, 3, 4, 5]) result = a.rolling(window=100, min_periods=1, step=step).min() diff --git a/pandas/tests/window/test_apply.py b/pandas/tests/window/test_apply.py index 136f81632cb0a..2398713585cfb 100644 --- a/pandas/tests/window/test_apply.py +++ b/pandas/tests/window/test_apply.py @@ -316,13 +316,3 @@ def test_center_reindex_frame(raw): ) frame_rs = frame.rolling(window=25, min_periods=minp, center=True).apply(f, raw=raw) tm.assert_frame_equal(frame_xp, frame_rs) - - -def test_axis1(raw): - # GH 45912 - df = DataFrame([1, 2]) - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.rolling(window=1, axis=1).apply(np.sum, raw=raw) - expected = DataFrame([1.0, 2.0]) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index d7c72105a673b..2e2cfa156019f 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -158,56 +158,6 @@ def test_ewma_times_adjust_false_raises(): ) -@pytest.mark.parametrize( - "func, expected", - [ - [ - "mean", - DataFrame( - { - 0: range(5), - 1: range(4, 9), - 2: [7.428571, 9, 10.571429, 12.142857, 13.714286], - }, - dtype=float, - ), - ], - [ - "std", - DataFrame( - { - 0: [np.nan] * 5, - 1: [4.242641] * 5, - 2: [4.6291, 5.196152, 5.781745, 6.380775, 6.989788], - } - ), - ], - [ - "var", - DataFrame( - { - 0: [np.nan] * 5, - 1: [18.0] * 5, - 2: [21.428571, 27, 33.428571, 40.714286, 48.857143], - } - ), - ], - ], -) -def test_float_dtype_ewma(func, expected, float_numpy_dtype): - # GH#42452 - - df = DataFrame( - {0: range(5), 1: range(6, 11), 2: range(10, 20, 2)}, dtype=float_numpy_dtype - ) - msg = "Support for axis=1 in DataFrame.ewm is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - e = df.ewm(alpha=0.5, axis=1) - result = getattr(e, func)() - - tm.assert_frame_equal(result, expected) - - def test_times_string_col_raises(): # GH 43265 df = DataFrame( diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 9174307cec5d1..6d452b27f3654 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -79,23 +79,14 @@ def test_missing_minp_zero(): tm.assert_series_equal(result, expected) -def test_expanding_axis(axis): +def test_expanding(): # see gh-23372. df = DataFrame(np.ones((10, 20))) - axis = df._get_axis_number(axis) - if axis == 0: - msg = "The 'axis' keyword in DataFrame.expanding is deprecated" - expected = DataFrame( - {i: [np.nan] * 2 + [float(j) for j in range(3, 11)] for i in range(20)} - ) - else: - # axis == 1 - msg = "Support for axis=1 in DataFrame.expanding is deprecated" - expected = DataFrame([[np.nan] * 2 + [float(i) for i in range(3, 21)]] * 10) - - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.expanding(3, axis=axis).sum() + expected = DataFrame( + {i: [np.nan] * 2 + [float(j) for j in range(3, 11)] for i in range(20)} + ) + result = df.expanding(3).sum() tm.assert_frame_equal(result, expected) @@ -329,9 +320,7 @@ def test_expanding_corr_pairwise(frame): def test_expanding_func(func, static_comp, frame_or_series): data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10)) - msg = "The 'axis' keyword in (Series|DataFrame).expanding is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - obj = data.expanding(min_periods=1, axis=0) + obj = data.expanding(min_periods=1) result = getattr(obj, func)() assert isinstance(result, frame_or_series) @@ -355,33 +344,26 @@ def test_expanding_func(func, static_comp, frame_or_series): def test_expanding_min_periods(func, static_comp): ser = Series(np.random.default_rng(2).standard_normal(50)) - msg = "The 'axis' keyword in Series.expanding is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = getattr(ser.expanding(min_periods=30, axis=0), func)() + result = getattr(ser.expanding(min_periods=30), func)() assert result[:29].isna().all() tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) # min_periods is working correctly - with tm.assert_produces_warning(FutureWarning, match=msg): - result = getattr(ser.expanding(min_periods=15, axis=0), func)() + result = getattr(ser.expanding(min_periods=15), func)() assert isna(result.iloc[13]) assert notna(result.iloc[14]) ser2 = Series(np.random.default_rng(2).standard_normal(20)) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = getattr(ser2.expanding(min_periods=5, axis=0), func)() + result = getattr(ser2.expanding(min_periods=5), func)() assert isna(result[3]) assert notna(result[4]) # min_periods=0 - with tm.assert_produces_warning(FutureWarning, match=msg): - result0 = getattr(ser.expanding(min_periods=0, axis=0), func)() - with tm.assert_produces_warning(FutureWarning, match=msg): - result1 = getattr(ser.expanding(min_periods=1, axis=0), func)() + result0 = getattr(ser.expanding(min_periods=0), func)() + result1 = getattr(ser.expanding(min_periods=1), func)() tm.assert_almost_equal(result0, result1) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = getattr(ser.expanding(min_periods=1, axis=0), func)() + result = getattr(ser.expanding(min_periods=1), func)() tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 139e1ff7f65fd..650eb911e410b 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -328,7 +328,6 @@ def f(x): def test_table_method_rolling_methods( self, - axis, nogil, parallel, nopython, @@ -340,16 +339,14 @@ def test_table_method_rolling_methods( engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} df = DataFrame(np.eye(3)) - roll_table = df.rolling(2, method="table", axis=axis, min_periods=0, step=step) + roll_table = df.rolling(2, method="table", min_periods=0, step=step) if method in ("var", "std"): with pytest.raises(NotImplementedError, match=f"{method} not supported"): getattr(roll_table, method)( engine_kwargs=engine_kwargs, engine="numba", **kwargs ) else: - roll_single = df.rolling( - 2, method="single", axis=axis, min_periods=0, step=step - ) + roll_single = df.rolling(2, method="single", min_periods=0, step=step) result = getattr(roll_table, method)( engine_kwargs=engine_kwargs, engine="numba", **kwargs ) @@ -358,19 +355,19 @@ def test_table_method_rolling_methods( ) tm.assert_frame_equal(result, expected) - def test_table_method_rolling_apply(self, axis, nogil, parallel, nopython, step): + def test_table_method_rolling_apply(self, nogil, parallel, nopython, step): engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} def f(x): return np.sum(x, axis=0) + 1 df = DataFrame(np.eye(3)) - result = df.rolling( - 2, method="table", axis=axis, min_periods=0, step=step - ).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba") - expected = df.rolling( - 2, method="single", axis=axis, min_periods=0, step=step - ).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba") + result = df.rolling(2, method="table", min_periods=0, step=step).apply( + f, raw=True, engine_kwargs=engine_kwargs, engine="numba" + ) + expected = df.rolling(2, method="single", min_periods=0, step=step).apply( + f, raw=True, engine_kwargs=engine_kwargs, engine="numba" + ) tm.assert_frame_equal(result, expected) def test_table_method_rolling_weighted_mean(self, step): @@ -393,37 +390,37 @@ def weighted_mean(x): )[::step] tm.assert_frame_equal(result, expected) - def test_table_method_expanding_apply(self, axis, nogil, parallel, nopython): + def test_table_method_expanding_apply(self, nogil, parallel, nopython): engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} def f(x): return np.sum(x, axis=0) + 1 df = DataFrame(np.eye(3)) - result = df.expanding(method="table", axis=axis).apply( + result = df.expanding(method="table").apply( f, raw=True, engine_kwargs=engine_kwargs, engine="numba" ) - expected = df.expanding(method="single", axis=axis).apply( + expected = df.expanding(method="single").apply( f, raw=True, engine_kwargs=engine_kwargs, engine="numba" ) tm.assert_frame_equal(result, expected) def test_table_method_expanding_methods( - self, axis, nogil, parallel, nopython, arithmetic_numba_supported_operators + self, nogil, parallel, nopython, arithmetic_numba_supported_operators ): method, kwargs = arithmetic_numba_supported_operators engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} df = DataFrame(np.eye(3)) - expand_table = df.expanding(method="table", axis=axis) + expand_table = df.expanding(method="table") if method in ("var", "std"): with pytest.raises(NotImplementedError, match=f"{method} not supported"): getattr(expand_table, method)( engine_kwargs=engine_kwargs, engine="numba", **kwargs ) else: - expand_single = df.expanding(method="single", axis=axis) + expand_single = df.expanding(method="single") result = getattr(expand_table, method)( engine_kwargs=engine_kwargs, engine="numba", **kwargs ) @@ -434,15 +431,15 @@ def test_table_method_expanding_methods( @pytest.mark.parametrize("data", [np.eye(3), np.ones((2, 3)), np.ones((3, 2))]) @pytest.mark.parametrize("method", ["mean", "sum"]) - def test_table_method_ewm(self, data, method, axis, nogil, parallel, nopython): + def test_table_method_ewm(self, data, method, nogil, parallel, nopython): engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} df = DataFrame(data) - result = getattr(df.ewm(com=1, method="table", axis=axis), method)( + result = getattr(df.ewm(com=1, method="table"), method)( engine_kwargs=engine_kwargs, engine="numba" ) - expected = getattr(df.ewm(com=1, method="single", axis=axis), method)( + expected = getattr(df.ewm(com=1, method="single"), method)( engine_kwargs=engine_kwargs, engine="numba" ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 0ca6bf0de94dd..fda631987255a 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -21,8 +21,6 @@ Timestamp, date_range, period_range, - to_datetime, - to_timedelta, ) import pandas._testing as tm from pandas.api.indexers import BaseIndexer @@ -594,39 +592,20 @@ def test_multi_index_names(): assert result.index.names == [None, "1", "2"] -def test_rolling_axis_sum(axis): +def test_rolling_axis_sum(): # see gh-23372. df = DataFrame(np.ones((10, 20))) - axis = df._get_axis_number(axis) - - if axis == 0: - msg = "The 'axis' keyword in DataFrame.rolling" - expected = DataFrame({i: [np.nan] * 2 + [3.0] * 8 for i in range(20)}) - else: - # axis == 1 - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - expected = DataFrame([[np.nan] * 2 + [3.0] * 18] * 10) - - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.rolling(3, axis=axis).sum() + expected = DataFrame({i: [np.nan] * 2 + [3.0] * 8 for i in range(20)}) + result = df.rolling(3).sum() tm.assert_frame_equal(result, expected) -def test_rolling_axis_count(axis): +def test_rolling_axis_count(): # see gh-26055 df = DataFrame({"x": range(3), "y": range(3)}) - axis = df._get_axis_number(axis) - - if axis in [0, "index"]: - msg = "The 'axis' keyword in DataFrame.rolling" - expected = DataFrame({"x": [1.0, 2.0, 2.0], "y": [1.0, 2.0, 2.0]}) - else: - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - expected = DataFrame({"x": [1.0, 1.0, 1.0], "y": [2.0, 2.0, 2.0]}) - - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.rolling(2, axis=axis, min_periods=0).count() + expected = DataFrame({"x": [1.0, 2.0, 2.0], "y": [1.0, 2.0, 2.0]}) + result = df.rolling(2, min_periods=0).count() tm.assert_frame_equal(result, expected) @@ -639,21 +618,14 @@ def test_readonly_array(): tm.assert_series_equal(result, expected) -def test_rolling_datetime(axis, tz_naive_fixture): +def test_rolling_datetime(tz_naive_fixture): # GH-28192 tz = tz_naive_fixture df = DataFrame( {i: [1] * 2 for i in date_range("2019-8-01", "2019-08-03", freq="D", tz=tz)} ) - if axis in [0, "index"]: - msg = "The 'axis' keyword in DataFrame.rolling" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.T.rolling("2D", axis=axis).sum().T - else: - msg = "Support for axis=1 in DataFrame.rolling" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.rolling("2D", axis=axis).sum() + result = df.T.rolling("2D").sum().T expected = DataFrame( { **{ @@ -1065,75 +1037,6 @@ def test_rolling_numerical_too_large_numbers(): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize( - ("func", "value"), - [("sum", 2.0), ("max", 1.0), ("min", 1.0), ("mean", 1.0), ("median", 1.0)], -) -def test_rolling_mixed_dtypes_axis_1(func, value): - # GH: 20649 - df = DataFrame(1, index=[1, 2], columns=["a", "b", "c"]) - df["c"] = 1.0 - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - roll = df.rolling(window=2, min_periods=1, axis=1) - result = getattr(roll, func)() - expected = DataFrame( - {"a": [1.0, 1.0], "b": [value, value], "c": [value, value]}, - index=[1, 2], - ) - tm.assert_frame_equal(result, expected) - - -def test_rolling_axis_one_with_nan(): - # GH: 35596 - df = DataFrame( - [ - [0, 1, 2, 4, np.nan, np.nan, np.nan], - [0, 1, 2, np.nan, np.nan, np.nan, np.nan], - [0, 2, 2, np.nan, 2, np.nan, 1], - ] - ) - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.rolling(window=7, min_periods=1, axis="columns").sum() - expected = DataFrame( - [ - [0.0, 1.0, 3.0, 7.0, 7.0, 7.0, 7.0], - [0.0, 1.0, 3.0, 3.0, 3.0, 3.0, 3.0], - [0.0, 2.0, 4.0, 4.0, 6.0, 6.0, 7.0], - ] - ) - tm.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize( - "value", - ["test", to_datetime("2019-12-31"), to_timedelta("1 days 06:05:01.00003")], -) -def test_rolling_axis_1_non_numeric_dtypes(value): - # GH: 20649 - df = DataFrame({"a": [1, 2]}) - df["b"] = value - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.rolling(window=2, min_periods=1, axis=1).sum() - expected = DataFrame({"a": [1.0, 2.0]}) - tm.assert_frame_equal(result, expected) - - -def test_rolling_on_df_transposed(): - # GH: 32724 - df = DataFrame({"A": [1, None], "B": [4, 5], "C": [7, 8]}) - expected = DataFrame({"A": [1.0, np.nan], "B": [5.0, 5.0], "C": [11.0, 13.0]}) - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.rolling(min_periods=1, window=2, axis=1).sum() - tm.assert_frame_equal(result, expected) - - result = df.T.rolling(min_periods=1, window=2).sum().T - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( ("index", "window"), [ @@ -1576,56 +1479,6 @@ def test_rolling_zero_window(): tm.assert_series_equal(result, expected) -def test_rolling_float_dtype(float_numpy_dtype): - # GH#42452 - df = DataFrame({"A": range(5), "B": range(10, 15)}, dtype=float_numpy_dtype) - expected = DataFrame( - {"A": [np.nan] * 5, "B": range(10, 20, 2)}, - dtype=float_numpy_dtype, - ) - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.rolling(2, axis=1).sum() - tm.assert_frame_equal(result, expected, check_dtype=False) - - -def test_rolling_numeric_dtypes(): - # GH#41779 - df = DataFrame(np.arange(40).reshape(4, 10), columns=list("abcdefghij")).astype( - { - "a": "float16", - "b": "float32", - "c": "float64", - "d": "int8", - "e": "int16", - "f": "int32", - "g": "uint8", - "h": "uint16", - "i": "uint32", - "j": "uint64", - } - ) - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.rolling(window=2, min_periods=1, axis=1).min() - expected = DataFrame( - { - "a": range(0, 40, 10), - "b": range(0, 40, 10), - "c": range(1, 40, 10), - "d": range(2, 40, 10), - "e": range(3, 40, 10), - "f": range(4, 40, 10), - "g": range(5, 40, 10), - "h": range(6, 40, 10), - "i": range(7, 40, 10), - "j": range(8, 40, 10), - }, - dtype="float64", - ) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize("window", [1, 3, 10, 20]) @pytest.mark.parametrize("method", ["min", "max", "average"]) @pytest.mark.parametrize("pct", [True, False]) diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index bd0fadeb3e475..820b0134cc577 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -689,17 +689,11 @@ def test_rolling_on_multi_index_level(self): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("msg, axis", [["column", 1], ["index", 0]]) -def test_nat_axis_error(msg, axis): +def test_nat_axis_error(): idx = [Timestamp("2020"), NaT] - kwargs = {"columns" if axis == 1 else "index": idx} - df = DataFrame(np.eye(2), **kwargs) - warn_msg = "The 'axis' keyword in DataFrame.rolling is deprecated" - if axis == 1: - warn_msg = "Support for axis=1 in DataFrame.rolling is deprecated" - with pytest.raises(ValueError, match=f"{msg} values must not have NaT"): - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - df.rolling("D", axis=axis).mean() + df = DataFrame(np.eye(2), index=idx) + with pytest.raises(ValueError, match="index values must not have NaT"): + df.rolling("D").mean() @td.skip_if_no("pyarrow") diff --git a/pandas/tests/window/test_win_type.py b/pandas/tests/window/test_win_type.py index 5c785ed3fccb2..574dfc34b6d26 100644 --- a/pandas/tests/window/test_win_type.py +++ b/pandas/tests/window/test_win_type.py @@ -668,20 +668,3 @@ def test_weighted_var_big_window_no_segfault(win_types, center): expected = Series(np.nan) tm.assert_series_equal(result, expected) - - -def test_rolling_center_axis_1(): - pytest.importorskip("scipy") - df = DataFrame( - {"a": [1, 1, 0, 0, 0, 1], "b": [1, 0, 0, 1, 0, 0], "c": [1, 0, 0, 1, 0, 1]} - ) - - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.rolling(window=3, axis=1, win_type="boxcar", center=True).sum() - - expected = DataFrame( - {"a": [np.nan] * 6, "b": [3.0, 1.0, 0.0, 2.0, 0.0, 2.0], "c": [np.nan] * 6} - ) - - tm.assert_frame_equal(result, expected, check_dtype=True) From d206af0e7fdb11514408f4e3eb8b59a98427005c Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Wed, 31 Jan 2024 17:27:48 -0500 Subject: [PATCH 2/3] More removals and cleanups --- pandas/core/groupby/groupby.py | 17 ++------- pandas/core/groupby/ops.py | 53 ++++++++------------------- pandas/tests/groupby/test_groupby.py | 1 - pandas/tests/groupby/test_grouping.py | 1 - 4 files changed, 18 insertions(+), 54 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 8e6f7a166375c..c4ae47348a64c 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1124,9 +1124,7 @@ def get_group(self, name) -> DataFrame | Series: inds = self._get_index(name) if not len(inds): raise KeyError(name) - - indexer = inds if self.axis == 0 else (slice(None), inds) - return self._selected_obj.iloc[indexer] + return self._selected_obj.iloc[inds] @final def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: @@ -1819,7 +1817,7 @@ def _python_apply_general( Series or DataFrame data after applying f """ - values, mutated = self._grouper.apply_groupwise(f, data, axis=0) + values, mutated = self._grouper.apply_groupwise(f, data) if not_indexed_same is None: not_indexed_same = mutated @@ -3725,13 +3723,6 @@ def rolling(self, *args, **kwargs) -> RollingGroupby: Provided integer column is ignored and excluded from result since an integer index is not used to calculate the rolling window. - axis : int or str, default 0 - If ``0`` or ``'index'``, roll across the rows. - - If ``1`` or ``'columns'``, roll across the columns. - - For `Series` this parameter is unused and defaults to 0. - closed : str, default None If ``'right'``, the first point in the window is excluded from calculations. @@ -3801,8 +3792,6 @@ def rolling(self, *args, **kwargs) -> RollingGroupby: """ from pandas.core.window import RollingGroupby - assert kwargs.get("axis", 0) == 0 - return RollingGroupby( self._selected_obj, *args, @@ -4299,7 +4288,7 @@ def quantile( """ mgr = self._get_data_to_aggregate(numeric_only=numeric_only, name="quantile") obj = self._wrap_agged_manager(mgr) - splitter = self._grouper._get_splitter(obj, axis=0) + splitter = self._grouper._get_splitter(obj) sdata = splitter._sorted_data starts, ends = lib.generate_slices(splitter._slabels, splitter.ngroups) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 0693fb5104b78..632ff7356d1c7 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -618,13 +618,12 @@ def get_iterator(self, data: NDFrameT) -> Iterator[tuple[Hashable, NDFrameT]]: yield from zip(keys, splitter) @final - def _get_splitter(self, data: NDFrame, axis: AxisInt = 0) -> DataSplitter: + def _get_splitter(self, data: NDFrame) -> DataSplitter: """ Returns ------- Generator yielding subsetted objects """ - assert axis == 0 ids, _, ngroups = self.group_info return _get_splitter( data, @@ -632,7 +631,6 @@ def _get_splitter(self, data: NDFrame, axis: AxisInt = 0) -> DataSplitter: ngroups, sorted_ids=self._sorted_ids, sort_idx=self._sort_idx, - axis=axis, ) @final @@ -878,7 +876,7 @@ def _aggregate_series_pure_python( result = np.empty(ngroups, dtype="O") initialized = False - splitter = self._get_splitter(obj, axis=0) + splitter = self._get_splitter(obj) for i, group in enumerate(splitter): res = func(group) @@ -895,11 +893,10 @@ def _aggregate_series_pure_python( @final def apply_groupwise( - self, f: Callable, data: DataFrame | Series, axis: AxisInt = 0 + self, f: Callable, data: DataFrame | Series ) -> tuple[list, bool]: - assert axis == 0 mutated = False - splitter = self._get_splitter(data, axis=axis) + splitter = self._get_splitter(data) group_keys = self.group_keys_seq result_values = [] @@ -917,7 +914,7 @@ def apply_groupwise( # group might be modified group_axes = group.axes res = f(group) - if not mutated and not _is_indexed_like(res, group_axes, axis): + if not mutated and not _is_indexed_like(res, group_axes): mutated = True result_values.append(res) # getattr pattern for __name__ is needed for functools.partial objects @@ -1024,7 +1021,7 @@ def codes_info(self) -> npt.NDArray[np.intp]: ids = ids[sorter] return ids - def get_iterator(self, data: NDFrame, axis: AxisInt = 0): + def get_iterator(self, data: NDFrame): """ Groupby iterator @@ -1033,13 +1030,7 @@ def get_iterator(self, data: NDFrame, axis: AxisInt = 0): Generator yielding sequence of (name, subsetted object) for each group """ - assert axis == 0 - if axis == 0: - slicer = lambda start, edge: data.iloc[start:edge] - else: - slicer = lambda start, edge: data.iloc[:, start:edge] - - length = len(data.axes[axis]) + slicer = lambda start, edge: data.iloc[start:edge] start = 0 for edge, label in zip(self.bins, self.binlabels): @@ -1047,7 +1038,7 @@ def get_iterator(self, data: NDFrame, axis: AxisInt = 0): yield label, slicer(start, edge) start = edge - if start < length: + if start < len(data): yield self.binlabels[-1], slicer(start, None) @cache_readonly @@ -1111,14 +1102,13 @@ def groupings(self) -> list[grouper.Grouping]: return [ping] -def _is_indexed_like(obj, axes, axis: AxisInt) -> bool: - assert axis == 0 +def _is_indexed_like(obj, axes) -> bool: if isinstance(obj, Series): if len(axes) > 1: return False - return obj.axes[axis].equals(axes[axis]) + return obj.index.equals(axes[0]) elif isinstance(obj, DataFrame): - return obj.axes[axis].equals(axes[axis]) + return obj.index.equals(axes[0]) return False @@ -1136,9 +1126,7 @@ def __init__( *, sort_idx: npt.NDArray[np.intp], sorted_ids: npt.NDArray[np.intp], - axis: AxisInt = 0, ) -> None: - assert axis == 0 self.data = data self.labels = ensure_platform_int(labels) # _should_ already be np.intp self.ngroups = ngroups @@ -1146,9 +1134,6 @@ def __init__( self._slabels = sorted_ids self._sort_idx = sort_idx - self.axis = axis - assert isinstance(axis, int), axis - def __iter__(self) -> Iterator: sdata = self._sorted_data @@ -1164,7 +1149,7 @@ def __iter__(self) -> Iterator: @cache_readonly def _sorted_data(self) -> NDFrameT: - return self.data.take(self._sort_idx, axis=self.axis) + return self.data.take(self._sort_idx, axis=0) def _chop(self, sdata, slice_obj: slice) -> NDFrame: raise AbstractMethodError(self) @@ -1182,12 +1167,8 @@ def _chop(self, sdata: Series, slice_obj: slice) -> Series: class FrameSplitter(DataSplitter): def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: # Fastpath equivalent to: - # if self.axis == 0: - # return sdata.iloc[slice_obj] - # else: - # return sdata.iloc[:, slice_obj] - assert self.axis == 0 - mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis) + # return sdata.iloc[slice_obj] + mgr = sdata._mgr.get_slice(slice_obj, axis=1) df = sdata._constructor_from_mgr(mgr, axes=mgr.axes) return df.__finalize__(sdata, method="groupby") @@ -1199,15 +1180,11 @@ def _get_splitter( *, sort_idx: npt.NDArray[np.intp], sorted_ids: npt.NDArray[np.intp], - axis: AxisInt = 0, ) -> DataSplitter: - assert axis == 0 if isinstance(data, Series): klass: type[DataSplitter] = SeriesSplitter else: # i.e. DataFrame klass = FrameSplitter - return klass( - data, labels, ngroups, sort_idx=sort_idx, sorted_ids=sorted_ids, axis=axis - ) + return klass(data, labels, ngroups, sort_idx=sort_idx, sorted_ids=sorted_ids) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index fa1842388fba8..a06d104e7e44c 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2067,7 +2067,6 @@ def test_groupby_crash_on_nunique(): df.columns.names = ("bar", "baz") df.index = dti - # TODO: Do we need a transpose? df = df.T gb = df.groupby(level=0) result = gb.nunique() diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index f8529bf060c88..841dd29edab10 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -384,7 +384,6 @@ def test_groupby_categorical_index_and_columns(self, observed): categories, categories=categories, ordered=True ) - # TODO: Why transpose? # test transposed version df = DataFrame(data.T, index=cat_columns) result = df.groupby(level=0, observed=observed).sum() From b465eae24d0ad3ce36bb13a5dd465a5c156ac3ce Mon Sep 17 00:00:00 2001 From: richard Date: Wed, 31 Jan 2024 21:37:03 -0500 Subject: [PATCH 3/3] whatsnew --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 2f2672a8dfb15..c5ac2a800223b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -102,7 +102,7 @@ Deprecations Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Removed :meth:`DataFrameGroupby.fillna` and :meth:`SeriesGroupBy.fillna` (:issue:`55719`) -- Removed ``axis`` argument from :meth:`DataFrame.groupby` and :meth:`Series.groupby` (:issue:`51203`) +- Removed ``axis`` argument from :meth:`DataFrame.groupby`, :meth:`Series.groupby`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.resample`, and :meth:`Series.resample` (:issue:`51203`) - Removed ``axis`` argument from all groupby operations (:issue:`50405`) - Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`) - Removed the ``ArrayManager`` (:issue:`55043`)