Skip to content

DEPR: Remove df.reduction(level) #49611

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/frame_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,10 +454,10 @@ def setup(self, axis):
)

def time_count_level_multi(self, axis):
self.df.count(axis=axis, level=1)
self.df.count(axis=axis)

def time_count_level_mixed_dtypes_multi(self, axis):
self.df_mixed.count(axis=axis, level=1)
self.df_mixed.count(axis=axis)


class Apply:
Expand Down
20 changes: 10 additions & 10 deletions asv_bench/benchmarks/stat_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ def time_op(self, op, dtype, axis):

class FrameMultiIndexOps:

params = ([0, 1, [0, 1]], ops)
param_names = ["level", "op"]
params = [ops]
param_names = ["op"]

def setup(self, level, op):
def setup(self, op):
levels = [np.arange(10), np.arange(100), np.arange(100)]
codes = [
np.arange(10).repeat(10000),
Expand All @@ -37,8 +37,8 @@ def setup(self, level, op):
df = pd.DataFrame(np.random.randn(len(index), 4), index=index)
self.df_func = getattr(df, op)

def time_op(self, level, op):
self.df_func(level=level)
def time_op(self, op):
self.df_func()


class SeriesOps:
Expand All @@ -56,10 +56,10 @@ def time_op(self, op, dtype):

class SeriesMultiIndexOps:

params = ([0, 1, [0, 1]], ops)
param_names = ["level", "op"]
params = [ops]
param_names = ["op"]

def setup(self, level, op):
def setup(self, op):
levels = [np.arange(10), np.arange(100), np.arange(100)]
codes = [
np.arange(10).repeat(10000),
Expand All @@ -70,8 +70,8 @@ def setup(self, level, op):
s = pd.Series(np.random.randn(len(index)), index=index)
self.s_func = getattr(s, op)

def time_op(self, level, op):
self.s_func(level=level)
def time_op(self, op):
self.s_func()


class Rank:
Expand Down
10 changes: 6 additions & 4 deletions doc/source/whatsnew/v0.15.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -154,11 +154,13 @@ Other enhancements:

- ``Series.all`` and ``Series.any`` now support the ``level`` and ``skipna`` parameters (:issue:`8302`):

.. ipython:: python
:okwarning:
.. code-block:: python

s = pd.Series([False, True, False], index=[0, 0, 1])
s.any(level=0)
>>> s = pd.Series([False, True, False], index=[0, 0, 1])
>>> s.any(level=0)
0 True
1 False
dtype: bool
Comment on lines +157 to +163
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been changing to .. code-block:: ipython instead. I don't have any preference one way or the other, but wonder if maybe I should have been changing these to python code blocks.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have a preference either. I was mainly matching the other .. code-block:: pythons in this Other enhancements: section for consistency.

I think if we fully tackle freezing whatsnews #6856 I would prefer the ipython version


- ``Panel`` now supports the ``all`` and ``any`` aggregation functions. (:issue:`8302`):

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,7 @@ Removal of prior version deprecations/changes
- Removed :meth:`.Rolling.validate`, :meth:`.Expanding.validate`, and :meth:`.ExponentialMovingWindow.validate` (:issue:`43665`)
- Removed :attr:`Rolling.win_type` returning ``"freq"`` (:issue:`38963`)
- Removed :attr:`Rolling.is_datetimelike` (:issue:`38963`)
- Removed the ``level`` keyword in :class:`DataFrame` and :class:`Series` aggregations; use ``groupby`` instead (:issue:`39983`)
- Removed deprecated :meth:`Timedelta.delta`, :meth:`Timedelta.is_populated`, and :attr:`Timedelta.freq` (:issue:`46430`, :issue:`46476`)
- Removed deprecated :attr:`NaT.freq` (:issue:`45071`)
- Removed deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`)
Expand Down
64 changes: 1 addition & 63 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@
maybe_downcast_to_dtype,
)
from pandas.core.dtypes.common import (
ensure_platform_int,
infer_dtype_from_object,
is_1d_only_ea_dtype,
is_bool_dtype,
Expand Down Expand Up @@ -10331,7 +10330,7 @@ def c(x):
# ----------------------------------------------------------------------
# ndarray-like stats methods

def count(self, axis: Axis = 0, level: Level = None, numeric_only: bool = False):
def count(self, axis: Axis = 0, numeric_only: bool = False):
"""
Count non-NA cells for each column or row.

Expand All @@ -10343,10 +10342,6 @@ def count(self, axis: Axis = 0, level: Level = None, numeric_only: bool = False)
axis : {0 or 'index', 1 or 'columns'}, default 0
If 0 or 'index' counts are generated for each column.
If 1 or 'columns' counts are generated for each row.
level : int or str, optional
If the axis is a `MultiIndex` (hierarchical), count along a
particular `level`, collapsing into a `DataFrame`.
A `str` specifies the level name.
numeric_only : bool, default False
Include only `float`, `int` or `boolean` data.

Expand Down Expand Up @@ -10400,16 +10395,6 @@ def count(self, axis: Axis = 0, level: Level = None, numeric_only: bool = False)
dtype: int64
"""
axis = self._get_axis_number(axis)
if level is not None:
warnings.warn(
"Using the level keyword in DataFrame and Series aggregations is "
"deprecated and will be removed in a future version. Use groupby "
"instead. df.count(level=1) should use df.groupby(level=1).count().",
FutureWarning,
stacklevel=find_stack_level(),
)
res = self._count_level(level, axis=axis, numeric_only=numeric_only)
return res.__finalize__(self, method="count")

if numeric_only:
frame = self._get_numeric_data()
Expand All @@ -10434,53 +10419,6 @@ def count(self, axis: Axis = 0, level: Level = None, numeric_only: bool = False)

return result.astype("int64").__finalize__(self, method="count")

def _count_level(self, level: Level, axis: AxisInt = 0, numeric_only: bool = False):
if numeric_only:
frame = self._get_numeric_data()
else:
frame = self

count_axis = frame._get_axis(axis)
agg_axis = frame._get_agg_axis(axis)

if not isinstance(count_axis, MultiIndex):
raise TypeError(
f"Can only count levels on hierarchical {self._get_axis_name(axis)}."
)

# Mask NaNs: Mask rows or columns where the index level is NaN, and all
# values in the DataFrame that are NaN
if frame._is_mixed_type:
# Since we have mixed types, calling notna(frame.values) might
# upcast everything to object
values_mask = notna(frame).values
else:
# But use the speedup when we have homogeneous dtypes
values_mask = notna(frame.values)

index_mask = notna(count_axis.get_level_values(level=level))
if axis == 1:
mask = index_mask & values_mask
else:
mask = index_mask.reshape(-1, 1) & values_mask

if isinstance(level, int):
level_number = level
else:
level_number = count_axis._get_level_number(level)

level_name = count_axis._names[level_number]
level_index = count_axis.levels[level_number]._rename(name=level_name)
level_codes = ensure_platform_int(count_axis.codes[level_number])
counts = lib.count_level_2d(mask, level_codes, len(level_index), axis=axis)

if axis == 1:
result = self._constructor(counts, index=agg_axis, columns=level_index)
else:
result = self._constructor(counts, index=level_index, columns=agg_axis)

return result

def _reduce(
self,
op,
Expand Down
Loading