diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index 5bb87b8bb2663..9a5fc1c607f6a 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -454,10 +454,10 @@ def setup(self, axis): ) def time_count_level_multi(self, axis): - self.df.count(axis=axis, level=1) + self.df.count(axis=axis) def time_count_level_mixed_dtypes_multi(self, axis): - self.df_mixed.count(axis=axis, level=1) + self.df_mixed.count(axis=axis) class Apply: diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index 19fa7f7a06cf2..09244b31fbba7 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -23,10 +23,10 @@ def time_op(self, op, dtype, axis): class FrameMultiIndexOps: - params = ([0, 1, [0, 1]], ops) - param_names = ["level", "op"] + params = [ops] + param_names = ["op"] - def setup(self, level, op): + def setup(self, op): levels = [np.arange(10), np.arange(100), np.arange(100)] codes = [ np.arange(10).repeat(10000), @@ -37,8 +37,8 @@ def setup(self, level, op): df = pd.DataFrame(np.random.randn(len(index), 4), index=index) self.df_func = getattr(df, op) - def time_op(self, level, op): - self.df_func(level=level) + def time_op(self, op): + self.df_func() class SeriesOps: @@ -56,10 +56,10 @@ def time_op(self, op, dtype): class SeriesMultiIndexOps: - params = ([0, 1, [0, 1]], ops) - param_names = ["level", "op"] + params = [ops] + param_names = ["op"] - def setup(self, level, op): + def setup(self, op): levels = [np.arange(10), np.arange(100), np.arange(100)] codes = [ np.arange(10).repeat(10000), @@ -70,8 +70,8 @@ def setup(self, level, op): s = pd.Series(np.random.randn(len(index)), index=index) self.s_func = getattr(s, op) - def time_op(self, level, op): - self.s_func(level=level) + def time_op(self, op): + self.s_func() class Rank: diff --git a/doc/source/whatsnew/v0.15.2.rst b/doc/source/whatsnew/v0.15.2.rst index fd4946c9765e1..bb7beef449d93 100644 --- a/doc/source/whatsnew/v0.15.2.rst +++ b/doc/source/whatsnew/v0.15.2.rst @@ -154,11 +154,13 @@ Other enhancements: - ``Series.all`` and ``Series.any`` now support the ``level`` and ``skipna`` parameters (:issue:`8302`): - .. ipython:: python - :okwarning: + .. code-block:: python - s = pd.Series([False, True, False], index=[0, 0, 1]) - s.any(level=0) + >>> s = pd.Series([False, True, False], index=[0, 0, 1]) + >>> s.any(level=0) + 0 True + 1 False + dtype: bool - ``Panel`` now supports the ``all`` and ``any`` aggregation functions. (:issue:`8302`): diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 151d853166563..b4ec75db08470 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -421,6 +421,7 @@ Removal of prior version deprecations/changes - Removed :meth:`.Rolling.validate`, :meth:`.Expanding.validate`, and :meth:`.ExponentialMovingWindow.validate` (:issue:`43665`) - Removed :attr:`Rolling.win_type` returning ``"freq"`` (:issue:`38963`) - Removed :attr:`Rolling.is_datetimelike` (:issue:`38963`) +- Removed the ``level`` keyword in :class:`DataFrame` and :class:`Series` aggregations; use ``groupby`` instead (:issue:`39983`) - Removed deprecated :meth:`Timedelta.delta`, :meth:`Timedelta.is_populated`, and :attr:`Timedelta.freq` (:issue:`46430`, :issue:`46476`) - Removed deprecated :attr:`NaT.freq` (:issue:`45071`) - Removed deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1627a7add25ed..507e14c5616a2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -118,7 +118,6 @@ maybe_downcast_to_dtype, ) from pandas.core.dtypes.common import ( - ensure_platform_int, infer_dtype_from_object, is_1d_only_ea_dtype, is_bool_dtype, @@ -10331,7 +10330,7 @@ def c(x): # ---------------------------------------------------------------------- # ndarray-like stats methods - def count(self, axis: Axis = 0, level: Level = None, numeric_only: bool = False): + def count(self, axis: Axis = 0, numeric_only: bool = False): """ Count non-NA cells for each column or row. @@ -10343,10 +10342,6 @@ def count(self, axis: Axis = 0, level: Level = None, numeric_only: bool = False) axis : {0 or 'index', 1 or 'columns'}, default 0 If 0 or 'index' counts are generated for each column. If 1 or 'columns' counts are generated for each row. - level : int or str, optional - If the axis is a `MultiIndex` (hierarchical), count along a - particular `level`, collapsing into a `DataFrame`. - A `str` specifies the level name. numeric_only : bool, default False Include only `float`, `int` or `boolean` data. @@ -10400,16 +10395,6 @@ def count(self, axis: Axis = 0, level: Level = None, numeric_only: bool = False) dtype: int64 """ axis = self._get_axis_number(axis) - if level is not None: - warnings.warn( - "Using the level keyword in DataFrame and Series aggregations is " - "deprecated and will be removed in a future version. Use groupby " - "instead. df.count(level=1) should use df.groupby(level=1).count().", - FutureWarning, - stacklevel=find_stack_level(), - ) - res = self._count_level(level, axis=axis, numeric_only=numeric_only) - return res.__finalize__(self, method="count") if numeric_only: frame = self._get_numeric_data() @@ -10434,53 +10419,6 @@ def count(self, axis: Axis = 0, level: Level = None, numeric_only: bool = False) return result.astype("int64").__finalize__(self, method="count") - def _count_level(self, level: Level, axis: AxisInt = 0, numeric_only: bool = False): - if numeric_only: - frame = self._get_numeric_data() - else: - frame = self - - count_axis = frame._get_axis(axis) - agg_axis = frame._get_agg_axis(axis) - - if not isinstance(count_axis, MultiIndex): - raise TypeError( - f"Can only count levels on hierarchical {self._get_axis_name(axis)}." - ) - - # Mask NaNs: Mask rows or columns where the index level is NaN, and all - # values in the DataFrame that are NaN - if frame._is_mixed_type: - # Since we have mixed types, calling notna(frame.values) might - # upcast everything to object - values_mask = notna(frame).values - else: - # But use the speedup when we have homogeneous dtypes - values_mask = notna(frame.values) - - index_mask = notna(count_axis.get_level_values(level=level)) - if axis == 1: - mask = index_mask & values_mask - else: - mask = index_mask.reshape(-1, 1) & values_mask - - if isinstance(level, int): - level_number = level - else: - level_number = count_axis._get_level_number(level) - - level_name = count_axis._names[level_number] - level_index = count_axis.levels[level_number]._rename(name=level_name) - level_codes = ensure_platform_int(count_axis.codes[level_number]) - counts = lib.count_level_2d(mask, level_codes, len(level_index), axis=axis) - - if axis == 1: - result = self._constructor(counts, index=agg_axis, columns=level_index) - else: - result = self._constructor(counts, index=level_index, columns=agg_axis) - - return result - def _reduce( self, op, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d26a11eae9f7f..8bb7b98f39442 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10774,25 +10774,6 @@ def pct_change( rs = rs.reindex_like(data) return rs.__finalize__(self, method="pct_change") - @final - def _agg_by_level( - self, - name: str, - axis: Axis = 0, - level: Level = 0, - skipna: bool_t = True, - **kwargs, - ): - if axis is None: - raise ValueError("Must specify 'axis' when aggregating by level.") - grouped = self.groupby(level=level, axis=axis, sort=False) - if hasattr(grouped, name) and skipna: - return getattr(grouped, name)(**kwargs) - axis = self._get_axis_number(axis) - method = getattr(type(self), name) - applyf = lambda x: method(x, axis=axis, skipna=skipna, **kwargs) - return grouped.aggregate(applyf) - @final def _logical_func( self, @@ -10801,24 +10782,10 @@ def _logical_func( axis: Axis = 0, bool_only: bool_t = False, skipna: bool_t = True, - level: Level | None = None, **kwargs, ) -> Series | bool_t: nv.validate_logical_func((), kwargs, fname=name) validate_bool_kwarg(skipna, "skipna", none_allowed=False) - if level is not None: - warnings.warn( - "Using the level keyword in DataFrame and Series aggregations is " - "deprecated and will be removed in a future version. Use groupby " - "instead. df.any(level=1) should use df.groupby(level=1).any()", - FutureWarning, - stacklevel=find_stack_level(), - ) - if bool_only: - raise NotImplementedError( - "Option bool_only is not implemented with option level." - ) - return self._agg_by_level(name, axis=axis, level=level, skipna=skipna) if self.ndim > 1 and axis is None: # Reduce along one dimension then the other, to simplify DataFrame._reduce @@ -10855,11 +10822,10 @@ def any( axis: Axis = 0, bool_only: bool_t = False, skipna: bool_t = True, - level: Level | None = None, **kwargs, ) -> DataFrame | Series | bool_t: return self._logical_func( - "any", nanops.nanany, axis, bool_only, skipna, level, **kwargs + "any", nanops.nanany, axis, bool_only, skipna, **kwargs ) def all( @@ -10867,11 +10833,10 @@ def all( axis: Axis = 0, bool_only: bool_t = False, skipna: bool_t = True, - level: Level | None = None, **kwargs, ) -> Series | bool_t: return self._logical_func( - "all", nanops.nanall, axis, bool_only, skipna, level, **kwargs + "all", nanops.nanall, axis, bool_only, skipna, **kwargs ) @final @@ -10930,7 +10895,6 @@ def _stat_function_ddof( func, axis: Axis | None = None, skipna: bool_t = True, - level: Level | None = None, ddof: int = 1, numeric_only: bool_t = False, **kwargs, @@ -10939,17 +10903,7 @@ def _stat_function_ddof( validate_bool_kwarg(skipna, "skipna", none_allowed=False) if axis is None: axis = self._stat_axis_number - if level is not None: - warnings.warn( - "Using the level keyword in DataFrame and Series aggregations is " - "deprecated and will be removed in a future version. Use groupby " - "instead. df.var(level=1) should use df.groupby(level=1).var().", - FutureWarning, - stacklevel=find_stack_level(), - ) - return self._agg_by_level( - name, axis=axis, level=level, skipna=skipna, ddof=ddof - ) + return self._reduce( func, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof ) @@ -10958,39 +10912,36 @@ def sem( self, axis: Axis | None = None, skipna: bool_t = True, - level: Level | None = None, ddof: int = 1, numeric_only: bool_t = False, **kwargs, ) -> Series | float: return self._stat_function_ddof( - "sem", nanops.nansem, axis, skipna, level, ddof, numeric_only, **kwargs + "sem", nanops.nansem, axis, skipna, ddof, numeric_only, **kwargs ) def var( self, axis: Axis | None = None, skipna: bool_t = True, - level: Level | None = None, ddof: int = 1, numeric_only: bool_t = False, **kwargs, ) -> Series | float: return self._stat_function_ddof( - "var", nanops.nanvar, axis, skipna, level, ddof, numeric_only, **kwargs + "var", nanops.nanvar, axis, skipna, ddof, numeric_only, **kwargs ) def std( self, axis: Axis | None = None, skipna: bool_t = True, - level: Level | None = None, ddof: int = 1, numeric_only: bool_t = False, **kwargs, ) -> Series | float: return self._stat_function_ddof( - "std", nanops.nanstd, axis, skipna, level, ddof, numeric_only, **kwargs + "std", nanops.nanstd, axis, skipna, ddof, numeric_only, **kwargs ) @final @@ -11000,7 +10951,6 @@ def _stat_function( func, axis: Axis | None | lib.NoDefault = None, skipna: bool_t = True, - level: Level | None = None, numeric_only: bool_t = False, **kwargs, ): @@ -11011,7 +10961,7 @@ def _stat_function( validate_bool_kwarg(skipna, "skipna", none_allowed=False) - if axis is None and level is None and self.ndim > 1: + if axis is None and self.ndim > 1: # user must have explicitly passed axis=None # GH#21597 warnings.warn( @@ -11026,17 +10976,7 @@ def _stat_function( if axis is None: axis = self._stat_axis_number - if level is not None: - warnings.warn( - "Using the level keyword in DataFrame and Series aggregations is " - "deprecated and will be removed in a future version. Use groupby " - "instead. df.median(level=1) should use df.groupby(level=1).median().", - FutureWarning, - stacklevel=find_stack_level(), - ) - return self._agg_by_level( - name, axis=axis, level=level, skipna=skipna, numeric_only=numeric_only - ) + return self._reduce( func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only ) @@ -11045,7 +10985,6 @@ def min( self, axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, - level: Level | None = None, numeric_only: bool_t = False, **kwargs, ): @@ -11054,7 +10993,6 @@ def min( nanops.nanmin, axis, skipna, - level, numeric_only, **kwargs, ) @@ -11063,7 +11001,6 @@ def max( self, axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, - level: Level | None = None, numeric_only: bool_t = False, **kwargs, ): @@ -11072,7 +11009,6 @@ def max( nanops.nanmax, axis, skipna, - level, numeric_only, **kwargs, ) @@ -11081,48 +11017,44 @@ def mean( self, axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, - level: Level | None = None, numeric_only: bool_t = False, **kwargs, ) -> Series | float: return self._stat_function( - "mean", nanops.nanmean, axis, skipna, level, numeric_only, **kwargs + "mean", nanops.nanmean, axis, skipna, numeric_only, **kwargs ) def median( self, axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, - level: Level | None = None, numeric_only: bool_t = False, **kwargs, ) -> Series | float: return self._stat_function( - "median", nanops.nanmedian, axis, skipna, level, numeric_only, **kwargs + "median", nanops.nanmedian, axis, skipna, numeric_only, **kwargs ) def skew( self, axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, - level: Level | None = None, numeric_only: bool_t = False, **kwargs, ) -> Series | float: return self._stat_function( - "skew", nanops.nanskew, axis, skipna, level, numeric_only, **kwargs + "skew", nanops.nanskew, axis, skipna, numeric_only, **kwargs ) def kurt( self, axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, - level: Level | None = None, numeric_only: bool_t = False, **kwargs, ) -> Series | float: return self._stat_function( - "kurt", nanops.nankurt, axis, skipna, level, numeric_only, **kwargs + "kurt", nanops.nankurt, axis, skipna, numeric_only, **kwargs ) kurtosis = kurt @@ -11134,7 +11066,6 @@ def _min_count_stat_function( func, axis: Axis | None = None, skipna: bool_t = True, - level: Level | None = None, numeric_only: bool_t = False, min_count: int = 0, **kwargs, @@ -11150,22 +11081,6 @@ def _min_count_stat_function( if axis is None: axis = self._stat_axis_number - if level is not None: - warnings.warn( - "Using the level keyword in DataFrame and Series aggregations is " - "deprecated and will be removed in a future version. Use groupby " - "instead. df.sum(level=1) should use df.groupby(level=1).sum().", - FutureWarning, - stacklevel=find_stack_level(), - ) - return self._agg_by_level( - name, - axis=axis, - level=level, - skipna=skipna, - min_count=min_count, - numeric_only=numeric_only, - ) return self._reduce( func, @@ -11180,20 +11095,18 @@ def sum( self, axis: Axis | None = None, skipna: bool_t = True, - level: Level | None = None, numeric_only: bool_t = False, min_count: int = 0, **kwargs, ): return self._min_count_stat_function( - "sum", nanops.nansum, axis, skipna, level, numeric_only, min_count, **kwargs + "sum", nanops.nansum, axis, skipna, numeric_only, min_count, **kwargs ) def prod( self, axis: Axis | None = None, skipna: bool_t = True, - level: Level | None = None, numeric_only: bool_t = False, min_count: int = 0, **kwargs, @@ -11203,7 +11116,6 @@ def prod( nanops.nanprod, axis, skipna, - level, numeric_only, min_count, **kwargs, @@ -11234,7 +11146,6 @@ def any( axis: Axis = 0, bool_only=None, skipna: bool_t = True, - level=None, **kwargs, ): return NDFrame.any( @@ -11242,7 +11153,6 @@ def any( axis=axis, bool_only=bool_only, skipna=skipna, - level=level, **kwargs, ) @@ -11263,10 +11173,9 @@ def all( axis: Axis = 0, bool_only=None, skipna: bool_t = True, - level=None, **kwargs, ): - return NDFrame.all(self, axis, bool_only, skipna, level, **kwargs) + return NDFrame.all(self, axis, bool_only, skipna, **kwargs) setattr(cls, "all", all) @@ -11285,12 +11194,11 @@ def sem( self, axis: Axis | None = None, skipna: bool_t = True, - level=None, ddof: int = 1, numeric_only: bool_t = False, **kwargs, ): - return NDFrame.sem(self, axis, skipna, level, ddof, numeric_only, **kwargs) + return NDFrame.sem(self, axis, skipna, ddof, numeric_only, **kwargs) setattr(cls, "sem", sem) @@ -11308,12 +11216,11 @@ def var( self, axis: Axis | None = None, skipna: bool_t = True, - level=None, ddof: int = 1, numeric_only: bool_t = False, **kwargs, ): - return NDFrame.var(self, axis, skipna, level, ddof, numeric_only, **kwargs) + return NDFrame.var(self, axis, skipna, ddof, numeric_only, **kwargs) setattr(cls, "var", var) @@ -11332,12 +11239,11 @@ def std( self, axis: Axis | None = None, skipna: bool_t = True, - level=None, ddof: int = 1, numeric_only: bool_t = False, **kwargs, ): - return NDFrame.std(self, axis, skipna, level, ddof, numeric_only, **kwargs) + return NDFrame.std(self, axis, skipna, ddof, numeric_only, **kwargs) setattr(cls, "std", std) @@ -11421,14 +11327,11 @@ def sum( self, axis: Axis | None = None, skipna: bool_t = True, - level=None, numeric_only: bool_t = False, min_count: int = 0, **kwargs, ): - return NDFrame.sum( - self, axis, skipna, level, numeric_only, min_count, **kwargs - ) + return NDFrame.sum(self, axis, skipna, numeric_only, min_count, **kwargs) setattr(cls, "sum", sum) @@ -11446,14 +11349,11 @@ def prod( self, axis: Axis | None = None, skipna: bool_t = True, - level=None, numeric_only: bool_t = False, min_count: int = 0, **kwargs, ): - return NDFrame.prod( - self, axis, skipna, level, numeric_only, min_count, **kwargs - ) + return NDFrame.prod(self, axis, skipna, numeric_only, min_count, **kwargs) setattr(cls, "prod", prod) cls.product = prod @@ -11472,11 +11372,10 @@ def mean( self, axis: AxisInt | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, - level=None, numeric_only: bool_t = False, **kwargs, ): - return NDFrame.mean(self, axis, skipna, level, numeric_only, **kwargs) + return NDFrame.mean(self, axis, skipna, numeric_only, **kwargs) setattr(cls, "mean", mean) @@ -11494,11 +11393,10 @@ def skew( self, axis: AxisInt | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, - level=None, numeric_only: bool_t = False, **kwargs, ): - return NDFrame.skew(self, axis, skipna, level, numeric_only, **kwargs) + return NDFrame.skew(self, axis, skipna, numeric_only, **kwargs) setattr(cls, "skew", skew) @@ -11519,11 +11417,10 @@ def kurt( self, axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, - level=None, numeric_only: bool_t = False, **kwargs, ): - return NDFrame.kurt(self, axis, skipna, level, numeric_only, **kwargs) + return NDFrame.kurt(self, axis, skipna, numeric_only, **kwargs) setattr(cls, "kurt", kurt) cls.kurtosis = kurt @@ -11542,11 +11439,10 @@ def median( self, axis: AxisInt | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, - level=None, numeric_only: bool_t = False, **kwargs, ): - return NDFrame.median(self, axis, skipna, level, numeric_only, **kwargs) + return NDFrame.median(self, axis, skipna, numeric_only, **kwargs) setattr(cls, "median", median) @@ -11566,11 +11462,10 @@ def max( self, axis: AxisInt | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, - level=None, numeric_only: bool_t = False, **kwargs, ): - return NDFrame.max(self, axis, skipna, level, numeric_only, **kwargs) + return NDFrame.max(self, axis, skipna, numeric_only, **kwargs) setattr(cls, "max", max) @@ -11590,11 +11485,10 @@ def min( self, axis: AxisInt | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, - level=None, numeric_only: bool_t = False, **kwargs, ): - return NDFrame.min(self, axis, skipna, level, numeric_only, **kwargs) + return NDFrame.min(self, axis, skipna, numeric_only, **kwargs) setattr(cls, "min", min) @@ -11820,12 +11714,6 @@ def _doc_params(cls): For `Series` this parameter is unused and defaults to 0. skipna : bool, default True Exclude NA/null values when computing the result. -level : int or level name, default None - If the axis is a MultiIndex (hierarchical), count along a - particular level, collapsing into a {name1}. - - .. deprecated:: 1.3.0 - The level keyword is deprecated. Use groupby instead. numeric_only : bool, default False Include only float, int, boolean columns. Not implemented for Series. @@ -11850,12 +11738,6 @@ def _doc_params(cls): skipna : bool, default True Exclude NA/null values. If an entire row/column is NA, the result will be NA. -level : int or level name, default None - If the axis is a MultiIndex (hierarchical), count along a - particular level, collapsing into a {name1}. - - .. deprecated:: 1.3.0 - The level keyword is deprecated. Use groupby instead. ddof : int, default 1 Delta Degrees of Freedom. The divisor used in calculations is N - ddof, where N represents the number of elements. @@ -11953,12 +11835,6 @@ def _doc_params(cls): True, then the result will be {empty_value}, as for an empty row/column. If skipna is False, then NA are treated as True, because these are not equal to zero. -level : int or level name, default None - If the axis is a MultiIndex (hierarchical), count along a - particular level, collapsing into a {name1}. - - .. deprecated:: 1.3.0 - The level keyword is deprecated. Use groupby instead. **kwargs : any, default None Additional keywords have no effect but might be accepted for compatibility with NumPy. diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ec9c8564ab549..571559dc838f5 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -40,7 +40,6 @@ CorrelationMethod, FillnaOptions, IndexLabel, - Level, Manager, Manager2D, SingleManager, @@ -864,7 +863,6 @@ def skew( self, axis: Axis | lib.NoDefault = lib.no_default, skipna: bool = True, - level: Level | None = None, numeric_only: bool | None = None, **kwargs, ) -> Series: @@ -872,7 +870,6 @@ def skew( "skew", axis=axis, skipna=skipna, - level=level, numeric_only=numeric_only, **kwargs, ) @@ -2242,7 +2239,6 @@ def skew( self, axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool = True, - level: Level | None = None, numeric_only: bool | lib.NoDefault = lib.no_default, **kwargs, ) -> DataFrame: @@ -2250,7 +2246,6 @@ def skew( "skew", axis=axis, skipna=skipna, - level=level, numeric_only=numeric_only, **kwargs, ) diff --git a/pandas/core/series.py b/pandas/core/series.py index 2664988a7b8d4..44732b9060ff9 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1992,16 +1992,10 @@ def groupby( # Statistics, overridden ndarray methods # TODO: integrate bottleneck - def count(self, level: Level = None): + def count(self): """ Return number of non-NA/null observations in the Series. - Parameters - ---------- - level : int or level name, default None - If the axis is a MultiIndex (hierarchical), count along a - particular level, collapsing into a smaller Series. - Returns ------- int or Series (if level specified) @@ -2017,40 +2011,7 @@ def count(self, level: Level = None): >>> s.count() 2 """ - if level is None: - return notna(self._values).sum().astype("int64") - else: - warnings.warn( - "Using the level keyword in DataFrame and Series aggregations is " - "deprecated and will be removed in a future version. Use groupby " - "instead. ser.count(level=1) should use ser.groupby(level=1).count().", - FutureWarning, - stacklevel=find_stack_level(), - ) - if not isinstance(self.index, MultiIndex): - raise ValueError("Series.count level is only valid with a MultiIndex") - - index = self.index - assert isinstance(index, MultiIndex) # for mypy - - if isinstance(level, str): - level = index._get_level_number(level) - - lev = index.levels[level] - level_codes = np.array(index.codes[level], subok=False, copy=True) - - mask = level_codes == -1 - if mask.any(): - level_codes[mask] = cnt = len(lev) - lev = lev.insert(cnt, lev._na_value) - - obs = level_codes[notna(self._values)] - # error: Argument "minlength" to "bincount" has incompatible type - # "Optional[int]"; expected "SupportsIndex" - out = np.bincount(obs, minlength=len(lev) or None) # type: ignore[arg-type] - return self._constructor(out, index=lev, dtype="int64").__finalize__( - self, method="count" - ) + return notna(self._values).sum().astype("int64") def mode(self, dropna: bool = True) -> Series: """ diff --git a/pandas/tests/frame/methods/test_count_with_level_deprecated.py b/pandas/tests/frame/methods/test_count_with_level_deprecated.py deleted file mode 100644 index f6fbc281c7a8e..0000000000000 --- a/pandas/tests/frame/methods/test_count_with_level_deprecated.py +++ /dev/null @@ -1,123 +0,0 @@ -import numpy as np -import pytest - -from pandas import ( - DataFrame, - Index, - Series, -) -import pandas._testing as tm - - -class TestDataFrameCount: - def test_count_multiindex(self, multiindex_dataframe_random_data): - frame = multiindex_dataframe_random_data - - frame = frame.copy() - frame.index.names = ["a", "b"] - - with tm.assert_produces_warning(FutureWarning): - result = frame.count(level="b") - with tm.assert_produces_warning(FutureWarning): - expected = frame.count(level=1) - tm.assert_frame_equal(result, expected, check_names=False) - - with tm.assert_produces_warning(FutureWarning): - result = frame.count(level="a") - with tm.assert_produces_warning(FutureWarning): - expected = frame.count(level=0) - tm.assert_frame_equal(result, expected, check_names=False) - - msg = "Level x not found" - with pytest.raises(KeyError, match=msg): - with tm.assert_produces_warning(FutureWarning): - frame.count(level="x") - - def test_count_level_corner(self, multiindex_dataframe_random_data): - frame = multiindex_dataframe_random_data - - ser = frame["A"][:0] - with tm.assert_produces_warning(FutureWarning): - result = ser.count(level=0) - expected = Series(0, index=ser.index.levels[0], name="A") - tm.assert_series_equal(result, expected) - - df = frame[:0] - with tm.assert_produces_warning(FutureWarning): - result = df.count(level=0) - expected = ( - DataFrame( - index=ser.index.levels[0].set_names(["first"]), columns=df.columns - ) - .fillna(0) - .astype(np.int64) - ) - tm.assert_frame_equal(result, expected) - - def test_count_index_with_nan(self): - # https://github.com/pandas-dev/pandas/issues/21824 - df = DataFrame( - { - "Person": ["John", "Myla", None, "John", "Myla"], - "Age": [24.0, 5, 21.0, 33, 26], - "Single": [False, True, True, True, False], - } - ) - - # count on row labels - with tm.assert_produces_warning(FutureWarning): - res = df.set_index(["Person", "Single"]).count(level="Person") - expected = DataFrame( - index=Index(["John", "Myla"], name="Person"), - columns=Index(["Age"]), - data=[2, 2], - ) - tm.assert_frame_equal(res, expected) - - # count on column labels - with tm.assert_produces_warning(FutureWarning): - res = df.set_index(["Person", "Single"]).T.count(level="Person", axis=1) - expected = DataFrame( - columns=Index(["John", "Myla"], name="Person"), - index=Index(["Age"]), - data=[[2, 2]], - ) - tm.assert_frame_equal(res, expected) - - def test_count_level( - self, - multiindex_year_month_day_dataframe_random_data, - multiindex_dataframe_random_data, - ): - ymd = multiindex_year_month_day_dataframe_random_data - frame = multiindex_dataframe_random_data - - def _check_counts(frame, axis=0): - index = frame._get_axis(axis) - for i in range(index.nlevels): - with tm.assert_produces_warning(FutureWarning): - result = frame.count(axis=axis, level=i) - expected = frame.groupby(axis=axis, level=i).count() - expected = expected.reindex_like(result).astype("i8") - tm.assert_frame_equal(result, expected) - - frame.iloc[1, [1, 2]] = np.nan - frame.iloc[7, [0, 1]] = np.nan - ymd.iloc[1, [1, 2]] = np.nan - ymd.iloc[7, [0, 1]] = np.nan - - _check_counts(frame) - _check_counts(ymd) - _check_counts(frame.T, axis=1) - _check_counts(ymd.T, axis=1) - - # can't call with level on regular DataFrame - df = tm.makeTimeDataFrame() - with pytest.raises(TypeError, match="hierarchical"): - with tm.assert_produces_warning(FutureWarning): - df.count(level=0) - - frame["D"] = "foo" - with tm.assert_produces_warning(FutureWarning): - result = frame.count(level=0, numeric_only=True) - tm.assert_index_equal(result.columns, Index(list("ABC"), name="exp")) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 0e5c6057b9a61..b7474060a7e8a 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -18,7 +18,6 @@ Categorical, DataFrame, Index, - MultiIndex, Series, Timestamp, date_range, @@ -493,21 +492,6 @@ def test_sem(self, datetime_frame): result = nanops.nansem(arr, axis=0) assert not (result < 0).any() - @td.skip_if_no_scipy - def test_kurt(self): - index = MultiIndex( - levels=[["bar"], ["one", "two", "three"], [0, 1]], - codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], - ) - df = DataFrame(np.random.randn(6, 3), index=index) - - kurt = df.kurt() - with tm.assert_produces_warning(FutureWarning): - kurt2 = df.kurt(level=0).xs("bar") - tm.assert_series_equal(kurt, kurt2, check_names=False) - assert kurt.name is None - assert kurt2.name == "bar" - @pytest.mark.parametrize( "dropna, expected", [ @@ -1316,19 +1300,6 @@ def test_any_all_object_bool_only(self): assert df.any(bool_only=True, axis=None) - @pytest.mark.parametrize("method", ["any", "all"]) - def test_any_all_level_axis_none_raises(self, method): - df = DataFrame( - {"A": 1}, - index=MultiIndex.from_product( - [["A", "B"], ["a", "b"]], names=["out", "in"] - ), - ) - xpr = "Must specify 'axis' when aggregating by level." - with pytest.raises(ValueError, match=xpr): - with tm.assert_produces_warning(FutureWarning): - getattr(df, method)(axis=None, level="out") - # --------------------------------------------------------------------- # Unsorted @@ -1440,25 +1411,6 @@ def test_preserve_timezone(self, initial: str, method): result = getattr(df, method)(axis=1) tm.assert_series_equal(result, expected) - def test_frame_any_all_with_level(self): - df = DataFrame( - {"data": [False, False, True, False, True, False, True]}, - index=[ - ["one", "one", "two", "one", "two", "two", "two"], - [0, 1, 0, 2, 1, 2, 3], - ], - ) - - with tm.assert_produces_warning(FutureWarning, match="Using the level"): - result = df.any(level=0) - ex = DataFrame({"data": [False, True]}, index=["one", "two"]) - tm.assert_frame_equal(result, ex) - - with tm.assert_produces_warning(FutureWarning, match="Using the level"): - result = df.all(level=0) - ex = DataFrame({"data": [False, False]}, index=["one", "two"]) - tm.assert_frame_equal(result, ex) - def test_frame_any_with_timedelta(self): # GH#17667 df = DataFrame( @@ -1476,16 +1428,6 @@ def test_frame_any_with_timedelta(self): expected = Series(data=[False, True]) tm.assert_series_equal(result, expected) - def test_reductions_deprecation_level_argument( - self, frame_or_series, reduction_functions - ): - # GH#39983 - obj = frame_or_series( - [1, 2, 3], index=MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]]) - ) - with tm.assert_produces_warning(FutureWarning, match="level"): - getattr(obj, reduction_functions)(level=0) - def test_reductions_skipna_none_raises( self, request, frame_or_series, reduction_functions ): @@ -1638,22 +1580,6 @@ def test_minmax_extensionarray(method, numeric_only): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("meth", ["max", "min", "sum", "mean", "median"]) -def test_groupby_regular_arithmetic_equivalent(meth): - # GH#40660 - df = DataFrame( - {"a": [pd.Timedelta(hours=6), pd.Timedelta(hours=7)], "b": [12.1, 13.3]} - ) - expected = df.copy() - - with tm.assert_produces_warning(FutureWarning): - result = getattr(df, meth)(level=0) - tm.assert_frame_equal(result, expected) - - result = getattr(df.groupby(level=0), meth)(numeric_only=False) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize("ts_value", [Timestamp("2000-01-01"), pd.NaT]) def test_frame_mixed_numeric_object_with_timestamp(ts_value): # GH 13912 diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index b385091c9ff51..a06304af7a2d0 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -610,9 +610,8 @@ def test_subclassed_count(self): list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"] ), ) - with tm.assert_produces_warning(FutureWarning): - result = df.count(level=1) - assert isinstance(result, tm.SubclassedDataFrame) + result = df.count() + assert isinstance(result, tm.SubclassedSeries) df = tm.SubclassedDataFrame() result = df.count() diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 6bd9b8af766c3..a7551af68bc2b 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -190,12 +190,6 @@ pytest.param( (pd.DataFrame, frame_data, operator.methodcaller("count")), ), - pytest.param( - (pd.DataFrame, frame_mi_data, operator.methodcaller("count", level="A")), - marks=[ - pytest.mark.filterwarnings("ignore:Using the level keyword:FutureWarning"), - ], - ), pytest.param( (pd.DataFrame, frame_data, operator.methodcaller("nunique")), ), diff --git a/pandas/tests/groupby/test_allowlist.py b/pandas/tests/groupby/test_allowlist.py index 034514cb0bcfb..f7af4892635bb 100644 --- a/pandas/tests/groupby/test_allowlist.py +++ b/pandas/tests/groupby/test_allowlist.py @@ -70,13 +70,11 @@ def raw_frame(multiindex_dataframe_random_data): @pytest.mark.parametrize("op", AGG_FUNCTIONS) -@pytest.mark.parametrize("level", [0, 1]) @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("sort", [True, False]) -@pytest.mark.filterwarnings("ignore:Using the level keyword:FutureWarning") @pytest.mark.filterwarnings("ignore:The default value of numeric_only:FutureWarning") -def test_regression_allowlist_methods(raw_frame, op, level, axis, skipna, sort): +def test_regression_allowlist_methods(raw_frame, op, axis, skipna, sort): # GH6944 # GH 17537 # explicitly test the allowlist methods @@ -86,19 +84,14 @@ def test_regression_allowlist_methods(raw_frame, op, level, axis, skipna, sort): frame = raw_frame.T if op in AGG_FUNCTIONS_WITH_SKIPNA: - grouped = frame.groupby(level=level, axis=axis, sort=sort) + grouped = frame.groupby("first", axis=axis, sort=sort) result = getattr(grouped, op)(skipna=skipna) - expected = getattr(frame, op)(level=level, axis=axis, skipna=skipna) - if sort: - expected = expected.sort_index(axis=axis, level=level) - tm.assert_frame_equal(result, expected) else: - grouped = frame.groupby(level=level, axis=axis, sort=sort) + grouped = frame.groupby("first", axis=axis, sort=sort) result = getattr(grouped, op)() - expected = getattr(frame, op)(level=level, axis=axis) - if sort: - expected = expected.sort_index(axis=axis, level=level) - tm.assert_frame_equal(result, expected) + # Previously compared to frame.op(level=...), but level removed in 2.0 + # TODO(GH 49629): Assert something better + assert isinstance(result, DataFrame) def test_groupby_blocklist(df_letters): diff --git a/pandas/tests/groupby/test_api_consistency.py b/pandas/tests/groupby/test_api_consistency.py index 155f86c23e106..bd29f29719494 100644 --- a/pandas/tests/groupby/test_api_consistency.py +++ b/pandas/tests/groupby/test_api_consistency.py @@ -37,19 +37,19 @@ def test_frame_consistency(request, groupby_func): # Some of these may be purposeful inconsistencies between the APIs exclude_expected, exclude_result = set(), set() if groupby_func in ("any", "all"): - exclude_expected = {"kwargs", "bool_only", "level", "axis"} + exclude_expected = {"kwargs", "bool_only", "axis"} elif groupby_func in ("count",): - exclude_expected = {"numeric_only", "level", "axis"} + exclude_expected = {"numeric_only", "axis"} elif groupby_func in ("nunique",): exclude_expected = {"axis"} elif groupby_func in ("max", "min"): - exclude_expected = {"axis", "kwargs", "level", "skipna"} + exclude_expected = {"axis", "kwargs", "skipna"} exclude_result = {"min_count", "engine", "engine_kwargs"} elif groupby_func in ("mean", "std", "sum", "var"): - exclude_expected = {"axis", "kwargs", "level", "skipna"} + exclude_expected = {"axis", "kwargs", "skipna"} exclude_result = {"engine", "engine_kwargs"} elif groupby_func in ("median", "prod", "sem"): - exclude_expected = {"axis", "kwargs", "level", "skipna"} + exclude_expected = {"axis", "kwargs", "skipna"} elif groupby_func in ("backfill", "bfill", "ffill", "pad"): exclude_expected = {"downcast", "inplace", "axis"} elif groupby_func in ("cummax", "cummin"): @@ -95,19 +95,17 @@ def test_series_consistency(request, groupby_func): # Some of these may be purposeful inconsistencies between the APIs exclude_expected, exclude_result = set(), set() if groupby_func in ("any", "all"): - exclude_expected = {"kwargs", "bool_only", "level", "axis"} - elif groupby_func in ("count",): - exclude_expected = {"level"} + exclude_expected = {"kwargs", "bool_only", "axis"} elif groupby_func in ("diff",): exclude_result = {"axis"} elif groupby_func in ("max", "min"): - exclude_expected = {"axis", "kwargs", "level", "skipna"} + exclude_expected = {"axis", "kwargs", "skipna"} exclude_result = {"min_count", "engine", "engine_kwargs"} elif groupby_func in ("mean", "std", "sum", "var"): - exclude_expected = {"axis", "kwargs", "level", "skipna"} + exclude_expected = {"axis", "kwargs", "skipna"} exclude_result = {"engine", "engine_kwargs"} elif groupby_func in ("median", "prod", "sem"): - exclude_expected = {"axis", "kwargs", "level", "skipna"} + exclude_expected = {"axis", "kwargs", "skipna"} elif groupby_func in ("backfill", "bfill", "ffill", "pad"): exclude_expected = {"downcast", "inplace", "axis"} elif groupby_func in ("cummax", "cummin"): diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 392910bd9e598..7fd52d3cf5bb8 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1095,10 +1095,6 @@ def test_groupby_complex(): result = a.groupby(level=0).sum() tm.assert_series_equal(result, expected) - with tm.assert_produces_warning(FutureWarning): - result = a.sum(level=0) - tm.assert_series_equal(result, expected) - def test_groupby_complex_numbers(): # GH 17927 diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 8888e2687621d..fb4cba5ea40b3 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -672,30 +672,6 @@ def test_empty(self, method, unit, use_bottleneck, dtype): result = getattr(s, method)(min_count=2) assert isna(result) - @pytest.mark.parametrize("method, unit", [("sum", 0.0), ("prod", 1.0)]) - def test_empty_multi(self, method, unit): - s = Series( - [1, np.nan, np.nan, np.nan], - index=pd.MultiIndex.from_product([("a", "b"), (0, 1)]), - ) - # 1 / 0 by default - with tm.assert_produces_warning(FutureWarning): - result = getattr(s, method)(level=0) - expected = Series([1, unit], index=["a", "b"]) - tm.assert_series_equal(result, expected) - - # min_count=0 - with tm.assert_produces_warning(FutureWarning): - result = getattr(s, method)(level=0, min_count=0) - expected = Series([1, unit], index=["a", "b"]) - tm.assert_series_equal(result, expected) - - # min_count=1 - with tm.assert_produces_warning(FutureWarning): - result = getattr(s, method)(level=0, min_count=1) - expected = Series([1, np.nan], index=["a", "b"]) - tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("method", ["mean", "var"]) @pytest.mark.parametrize("dtype", ["Float64", "Int64", "boolean"]) def test_ops_consistency_on_empty_nullable(self, method, dtype): @@ -932,7 +908,7 @@ def test_numpy_all_any(self, index_or_series): idx = Index([1, 2, 3]) assert np.all(idx) - def test_all_any_params(self): + def test_all_any_skipna(self): # Check skipna, with implicit 'object' dtype. s1 = Series([np.nan, True]) s2 = Series([np.nan, False]) @@ -941,20 +917,8 @@ def test_all_any_params(self): assert s2.any(skipna=False) assert not s2.any(skipna=True) - # Check level. + def test_all_any_bool_only(self): s = Series([False, False, True, True, False, True], index=[0, 0, 1, 1, 2, 2]) - with tm.assert_produces_warning(FutureWarning): - tm.assert_series_equal(s.all(level=0), Series([False, True, False])) - with tm.assert_produces_warning(FutureWarning): - tm.assert_series_equal(s.any(level=0), Series([False, True, True])) - - msg = "Option bool_only is not implemented with option level" - with pytest.raises(NotImplementedError, match=msg): - with tm.assert_produces_warning(FutureWarning): - s.any(bool_only=True, level=0) - with pytest.raises(NotImplementedError, match=msg): - with tm.assert_produces_warning(FutureWarning): - s.all(bool_only=True, level=0) # GH#47500 - test bool_only works assert s.any(bool_only=True) @@ -1009,22 +973,6 @@ def test_any_all_nullable_kleene_logic( result = getattr(ser, bool_agg_func)(skipna=skipna) assert (result is pd.NA and expected is pd.NA) or result == expected - @pytest.mark.parametrize( - "bool_agg_func,expected", - [("all", [False, True, False]), ("any", [False, True, True])], - ) - def test_any_all_boolean_level(self, bool_agg_func, expected): - # GH#33449 - ser = Series( - [False, False, True, True, False, True], - index=[0, 0, 1, 1, 2, 2], - dtype="boolean", - ) - with tm.assert_produces_warning(FutureWarning): - result = getattr(ser, bool_agg_func)(level=0) - expected = Series(expected, dtype="boolean") - tm.assert_series_equal(result, expected) - def test_any_axis1_bool_only(self): # GH#32432 df = DataFrame({"A": [True, False], "B": [1, 2]}) diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py index be40d7ca631eb..0dc68d78eebc9 100644 --- a/pandas/tests/reductions/test_stat_reductions.py +++ b/pandas/tests/reductions/test_stat_reductions.py @@ -256,14 +256,7 @@ def test_kurt(self): alt = lambda x: kurtosis(x, bias=False) self._check_stat_op("kurt", alt, string_series) - index = pd.MultiIndex( - levels=[["bar"], ["one", "two", "three"], [0, 1]], - codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], - ) - s = Series(np.random.randn(6), index=index) - with tm.assert_produces_warning(FutureWarning): - tm.assert_almost_equal(s.kurt(), s.kurt(level=0)["bar"]) - + def test_kurt_corner(self): # test corner cases, kurt() returns NaN unless there's at least 4 # values min_N = 4 diff --git a/pandas/tests/series/methods/test_count.py b/pandas/tests/series/methods/test_count.py index 29fb6aa32bc7c..dfc531f63614f 100644 --- a/pandas/tests/series/methods/test_count.py +++ b/pandas/tests/series/methods/test_count.py @@ -1,69 +1,13 @@ import numpy as np -import pytest import pandas as pd from pandas import ( Categorical, - MultiIndex, Series, ) -import pandas._testing as tm class TestSeriesCount: - def test_count_level_series(self): - index = MultiIndex( - levels=[["foo", "bar", "baz"], ["one", "two", "three", "four"]], - codes=[[0, 0, 0, 2, 2], [2, 0, 1, 1, 2]], - ) - - ser = Series(np.random.randn(len(index)), index=index) - - with tm.assert_produces_warning(FutureWarning): - result = ser.count(level=0) - expected = ser.groupby(level=0).count() - tm.assert_series_equal( - result.astype("f8"), expected.reindex(result.index).fillna(0) - ) - - with tm.assert_produces_warning(FutureWarning): - result = ser.count(level=1) - expected = ser.groupby(level=1).count() - tm.assert_series_equal( - result.astype("f8"), expected.reindex(result.index).fillna(0) - ) - - def test_count_multiindex(self, series_with_multilevel_index): - ser = series_with_multilevel_index - - series = ser.copy() - series.index.names = ["a", "b"] - - with tm.assert_produces_warning(FutureWarning): - result = series.count(level="b") - with tm.assert_produces_warning(FutureWarning): - expect = ser.count(level=1).rename_axis("b") - tm.assert_series_equal(result, expect) - - with tm.assert_produces_warning(FutureWarning): - result = series.count(level="a") - with tm.assert_produces_warning(FutureWarning): - expect = ser.count(level=0).rename_axis("a") - tm.assert_series_equal(result, expect) - - msg = "Level x not found" - with pytest.raises(KeyError, match=msg): - with tm.assert_produces_warning(FutureWarning): - series.count("x") - - def test_count_level_without_multiindex(self): - ser = Series(range(3)) - - msg = "Series.count level is only valid with a MultiIndex" - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning): - ser.count(level=1) - def test_count(self, datetime_series): assert datetime_series.count() == len(datetime_series) @@ -71,18 +15,6 @@ def test_count(self, datetime_series): assert datetime_series.count() == np.isfinite(datetime_series).sum() - mi = MultiIndex.from_arrays([list("aabbcc"), [1, 2, 2, np.nan, 1, 2]]) - ts = Series(np.arange(len(mi)), index=mi) - - with tm.assert_produces_warning(FutureWarning): - left = ts.count(level=1) - right = Series([2, 3, 1], index=[1, 2, np.nan]) - tm.assert_series_equal(left, right) - - ts.iloc[[0, 3, 5]] = np.nan - with tm.assert_produces_warning(FutureWarning): - tm.assert_series_equal(ts.count(level=1), right - 1) - # GH#29478 with pd.option_context("use_inf_as_na", True): assert Series([pd.Timestamp("1990/1/1")]).count() == 1 diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py index a552d9d84329f..e9d2877148c2b 100644 --- a/pandas/tests/series/test_reductions.py +++ b/pandas/tests/series/test_reductions.py @@ -2,10 +2,7 @@ import pytest import pandas as pd -from pandas import ( - MultiIndex, - Series, -) +from pandas import Series import pandas._testing as tm @@ -83,15 +80,6 @@ def test_prod_numpy16_bug(): assert not isinstance(result, Series) -def test_sum_with_level(): - obj = Series([10.0], index=MultiIndex.from_tuples([(2, 3)])) - - with tm.assert_produces_warning(FutureWarning): - result = obj.sum(level=0) - expected = Series([10.0], index=[2]) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("func", [np.any, np.all]) @pytest.mark.parametrize("kwargs", [{"keepdims": True}, {"out": object()}]) def test_validate_any_all_out_keepdims_raises(kwargs, func): diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 7e428821a2d50..023411f486c6a 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -15,8 +15,7 @@ def test_reindex_level(self, multiindex_year_month_day_dataframe_random_data): # axis=0 ymd = multiindex_year_month_day_dataframe_random_data - with tm.assert_produces_warning(FutureWarning): - month_sums = ymd.sum(level="month") + month_sums = ymd.groupby("month").sum() result = month_sums.reindex(ymd.index, level=1) expected = ymd.groupby(level="month").transform(np.sum) @@ -28,35 +27,11 @@ def test_reindex_level(self, multiindex_year_month_day_dataframe_random_data): tm.assert_series_equal(result, expected, check_names=False) # axis=1 - with tm.assert_produces_warning(FutureWarning): - month_sums = ymd.T.sum(axis=1, level="month") + month_sums = ymd.T.groupby("month", axis=1).sum() result = month_sums.reindex(columns=ymd.index, level=1) expected = ymd.groupby(level="month").transform(np.sum).T tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("opname", ["sub", "add", "mul", "div"]) - def test_binops_level( - self, opname, multiindex_year_month_day_dataframe_random_data - ): - ymd = multiindex_year_month_day_dataframe_random_data - - op = getattr(DataFrame, opname) - with tm.assert_produces_warning(FutureWarning): - month_sums = ymd.sum(level="month") - result = op(ymd, month_sums, level="month") - - broadcasted = ymd.groupby(level="month").transform(np.sum) - expected = op(ymd, broadcasted) - tm.assert_frame_equal(result, expected) - - # Series - op = getattr(Series, opname) - result = op(ymd["A"], month_sums["A"], level="month") - broadcasted = ymd["A"].groupby(level="month").transform(np.sum) - expected = op(ymd["A"], broadcasted) - expected.name = "A" - tm.assert_series_equal(result, expected) - def test_reindex(self, multiindex_dataframe_random_data): frame = multiindex_dataframe_random_data @@ -155,106 +130,6 @@ def test_alignment(self): exp = x.reindex(exp_index) - y.reindex(exp_index) tm.assert_series_equal(res, exp) - @pytest.mark.parametrize("level", [0, 1]) - @pytest.mark.parametrize("skipna", [True, False]) - @pytest.mark.parametrize("sort", [True, False]) - def test_series_group_min_max( - self, all_numeric_reductions, level, skipna, sort, series_with_multilevel_index - ): - # GH 17537 - ser = series_with_multilevel_index - op = all_numeric_reductions - - grouped = ser.groupby(level=level, sort=sort) - # skipna=True - leftside = grouped.agg(lambda x: getattr(x, op)(skipna=skipna)) - with tm.assert_produces_warning(FutureWarning): - rightside = getattr(ser, op)(level=level, skipna=skipna) - if sort: - rightside = rightside.sort_index(level=level) - tm.assert_series_equal(leftside, rightside) - - @pytest.mark.parametrize("level", [0, 1]) - @pytest.mark.parametrize("axis", [0, 1]) - @pytest.mark.parametrize("skipna", [True, False]) - @pytest.mark.parametrize("sort", [True, False]) - def test_frame_group_ops( - self, - all_numeric_reductions, - level, - axis, - skipna, - sort, - multiindex_dataframe_random_data, - ): - # GH 17537 - frame = multiindex_dataframe_random_data - - frame.iloc[1, [1, 2]] = np.nan - frame.iloc[7, [0, 1]] = np.nan - - level_name = frame.index.names[level] - - if axis == 0: - frame = frame - else: - frame = frame.T - - grouped = frame.groupby(level=level, axis=axis, sort=sort) - - pieces = [] - op = all_numeric_reductions - - def aggf(x): - pieces.append(x) - return getattr(x, op)(skipna=skipna, axis=axis) - - leftside = grouped.agg(aggf) - with tm.assert_produces_warning(FutureWarning): - rightside = getattr(frame, op)(level=level, axis=axis, skipna=skipna) - if sort: - rightside = rightside.sort_index(level=level, axis=axis) - frame = frame.sort_index(level=level, axis=axis) - - # for good measure, groupby detail - level_index = frame._get_axis(axis).levels[level].rename(level_name) - - tm.assert_index_equal(leftside._get_axis(axis), level_index) - tm.assert_index_equal(rightside._get_axis(axis), level_index) - - tm.assert_frame_equal(leftside, rightside) - - @pytest.mark.parametrize("meth", ["var", "std"]) - def test_std_var_pass_ddof(self, meth): - index = MultiIndex.from_arrays( - [np.arange(5).repeat(10), np.tile(np.arange(10), 5)] - ) - df = DataFrame(np.random.randn(len(index), 5), index=index) - - ddof = 4 - alt = lambda x: getattr(x, meth)(ddof=ddof) - - with tm.assert_produces_warning(FutureWarning): - result = getattr(df[0], meth)(level=0, ddof=ddof) - expected = df[0].groupby(level=0).agg(alt) - tm.assert_series_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning): - result = getattr(df, meth)(level=0, ddof=ddof) - expected = df.groupby(level=0).agg(alt) - tm.assert_frame_equal(result, expected) - - def test_agg_multiple_levels( - self, multiindex_year_month_day_dataframe_random_data, frame_or_series - ): - ymd = multiindex_year_month_day_dataframe_random_data - ymd = tm.get_obj(ymd, frame_or_series) - - with tm.assert_produces_warning(FutureWarning): - result = ymd.sum(level=["year", "month"]) - expected = ymd.groupby(level=["year", "month"]).sum() - tm.assert_equal(result, expected) - def test_groupby_multilevel(self, multiindex_year_month_day_dataframe_random_data): ymd = multiindex_year_month_day_dataframe_random_data