Skip to content

Commit 684a4bd

Browse files
TomAugspurgerjorisvandenbossche
authored andcommitted
API/COMPAT: support axis=None for logical reduction (reduce over all axes) (#21486)
* Compat with NumPy 1.15 logical func * Accepts axis=None as reduce all dims (cherry picked from commit f7ed7f8)
1 parent d0f664a commit 684a4bd

File tree

8 files changed

+215
-31
lines changed

8 files changed

+215
-31
lines changed

doc/source/whatsnew/v0.23.2.txt

+30
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,36 @@ and bug fixes. We recommend that all users upgrade to this version.
1616
:local:
1717
:backlinks: none
1818

19+
.. _whatsnew_0232.enhancements:
20+
21+
Logical Reductions over Entire DataFrame
22+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
23+
24+
:meth:`DataFrame.all` and :meth:`DataFrame.any` now accept ``axis=None`` to reduce over all axes to a scalar (:issue:`19976`)
25+
26+
.. ipython:: python
27+
28+
df = pd.DataFrame({"A": [1, 2], "B": [True, False]})
29+
df.all(axis=None)
30+
31+
32+
This also provides compatibility with NumPy 1.15, which now dispatches to ``DataFrame.all``.
33+
With NumPy 1.15 and pandas 0.23.1 or earlier, :func:`numpy.all` will no longer reduce over every axis:
34+
35+
.. code-block:: python
36+
37+
>>> # NumPy 1.15, pandas 0.23.1
38+
>>> np.any(pd.DataFrame({"A": [False], "B": [False]}))
39+
A False
40+
B False
41+
dtype: bool
42+
43+
With pandas 0.23.2, that will correctly return False, as it did with NumPy < 1.15.
44+
45+
.. ipython:: python
46+
47+
np.any(pd.DataFrame({"A": [False], "B": [False]}))
48+
1949

2050
.. _whatsnew_0232.fixed_regressions:
2151

pandas/core/frame.py

+18-4
Original file line numberDiff line numberDiff line change
@@ -6844,13 +6844,18 @@ def _count_level(self, level, axis=0, numeric_only=False):
68446844

68456845
def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
68466846
filter_type=None, **kwds):
6847-
axis = self._get_axis_number(axis)
6847+
if axis is None and filter_type == 'bool':
6848+
labels = None
6849+
constructor = None
6850+
else:
6851+
# TODO: Make other agg func handle axis=None properly
6852+
axis = self._get_axis_number(axis)
6853+
labels = self._get_agg_axis(axis)
6854+
constructor = self._constructor
68486855

68496856
def f(x):
68506857
return op(x, axis=axis, skipna=skipna, **kwds)
68516858

6852-
labels = self._get_agg_axis(axis)
6853-
68546859
# exclude timedelta/datetime unless we are uniform types
68556860
if axis == 1 and self._is_mixed_type and self._is_datelike_mixed_type:
68566861
numeric_only = True
@@ -6859,6 +6864,13 @@ def f(x):
68596864
try:
68606865
values = self.values
68616866
result = f(values)
6867+
6868+
if (filter_type == 'bool' and is_object_dtype(values) and
6869+
axis is None):
6870+
# work around https://github.com/numpy/numpy/issues/10489
6871+
# TODO: combine with hasattr(result, 'dtype') further down
6872+
# hard since we don't have `values` down there.
6873+
result = np.bool_(result)
68626874
except Exception as e:
68636875

68646876
# try by-column first
@@ -6925,7 +6937,9 @@ def f(x):
69256937
if axis == 0:
69266938
result = coerce_to_dtypes(result, self.dtypes)
69276939

6928-
return Series(result, index=labels)
6940+
if constructor is not None:
6941+
result = Series(result, index=labels)
6942+
return result
69296943

69306944
def nunique(self, axis=0, dropna=True):
69316945
"""

pandas/core/generic.py

+29-15
Original file line numberDiff line numberDiff line change
@@ -8729,6 +8729,8 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
87298729
return rs
87308730

87318731
def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwargs):
8732+
if axis is None:
8733+
raise ValueError("Must specify 'axis' when aggregating by level.")
87328734
grouped = self.groupby(level=level, axis=axis, sort=False)
87338735
if hasattr(grouped, name) and skipna:
87348736
return getattr(grouped, name)(**kwargs)
@@ -9055,8 +9057,15 @@ def _doc_parms(cls):
90559057
90569058
Parameters
90579059
----------
9058-
axis : int, default 0
9059-
Select the axis which can be 0 for indices and 1 for columns.
9060+
axis : {0 or 'index', 1 or 'columns', None}, default 0
9061+
Indicate which axis or axes should be reduced.
9062+
9063+
* 0 / 'index' : reduce the index, return a Series whose index is the
9064+
original column labels.
9065+
* 1 / 'columns' : reduce the columns, return a Series whose index is the
9066+
original index.
9067+
* None : reduce all axes, return a scalar.
9068+
90609069
skipna : boolean, default True
90619070
Exclude NA/null values. If an entire row/column is NA, the result
90629071
will be NA.
@@ -9078,9 +9087,9 @@ def _doc_parms(cls):
90789087
%(examples)s"""
90799088

90809089
_all_doc = """\
9081-
Return whether all elements are True over series or dataframe axis.
9090+
Return whether all elements are True, potentially over an axis.
90829091
9083-
Returns True if all elements within a series or along a dataframe
9092+
Returns True if all elements within a series or along a Dataframe
90849093
axis are non-zero, not-empty or not-False."""
90859094

90869095
_all_examples = """\
@@ -9093,7 +9102,7 @@ def _doc_parms(cls):
90939102
>>> pd.Series([True, False]).all()
90949103
False
90959104
9096-
Dataframes
9105+
DataFrames
90979106
90989107
Create a dataframe from a dictionary.
90999108
@@ -9110,12 +9119,17 @@ def _doc_parms(cls):
91109119
col2 False
91119120
dtype: bool
91129121
9113-
Adding axis=1 argument will check if row-wise values all return True.
9122+
Specify ``axis='columns'`` to check if row-wise values all return True.
91149123
9115-
>>> df.all(axis=1)
9124+
>>> df.all(axis='columns')
91169125
0 True
91179126
1 False
91189127
dtype: bool
9128+
9129+
Or ``axis=None`` for whether every value is True.
9130+
9131+
>>> df.all(axis=None)
9132+
False
91199133
"""
91209134

91219135
_all_see_also = """\
@@ -9481,6 +9495,11 @@ def _doc_parms(cls):
94819495
1 False
94829496
dtype: bool
94839497
9498+
Aggregating over the entire DataFrame with ``axis=None``.
9499+
9500+
>>> df.any(axis=None)
9501+
True
9502+
94849503
`any` for an empty DataFrame is an empty Series.
94859504
94869505
>>> pd.DataFrame([]).any()
@@ -9651,22 +9670,17 @@ def _make_logical_function(cls, name, name1, name2, axis_descr, desc, f,
96519670
@Substitution(outname=name, desc=desc, name1=name1, name2=name2,
96529671
axis_descr=axis_descr, examples=examples, see_also=see_also)
96539672
@Appender(_bool_doc)
9654-
def logical_func(self, axis=None, bool_only=None, skipna=None, level=None,
9673+
def logical_func(self, axis=0, bool_only=None, skipna=True, level=None,
96559674
**kwargs):
96569675
nv.validate_logical_func(tuple(), kwargs, fname=name)
9657-
if skipna is None:
9658-
skipna = True
9659-
if axis is None:
9660-
axis = self._stat_axis_number
96619676
if level is not None:
96629677
if bool_only is not None:
96639678
raise NotImplementedError("Option bool_only is not "
96649679
"implemented with option level.")
96659680
return self._agg_by_level(name, axis=axis, level=level,
96669681
skipna=skipna)
9667-
return self._reduce(f, axis=axis, skipna=skipna,
9668-
numeric_only=bool_only, filter_type='bool',
9669-
name=name)
9682+
return self._reduce(f, name, axis=axis, skipna=skipna,
9683+
numeric_only=bool_only, filter_type='bool')
96709684

96719685
return set_function_name(logical_func, name, cls)
96729686

pandas/core/panel.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -1143,13 +1143,26 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
11431143
raise NotImplementedError('Panel.{0} does not implement '
11441144
'numeric_only.'.format(name))
11451145

1146-
axis_name = self._get_axis_name(axis)
1147-
axis_number = self._get_axis_number(axis_name)
1146+
if axis is None and filter_type == 'bool':
1147+
# labels = None
1148+
# constructor = None
1149+
axis_number = None
1150+
axis_name = None
1151+
else:
1152+
# TODO: Make other agg func handle axis=None properly
1153+
axis = self._get_axis_number(axis)
1154+
# labels = self._get_agg_axis(axis)
1155+
# constructor = self._constructor
1156+
axis_name = self._get_axis_name(axis)
1157+
axis_number = self._get_axis_number(axis_name)
1158+
11481159
f = lambda x: op(x, axis=axis_number, skipna=skipna, **kwds)
11491160

11501161
with np.errstate(all='ignore'):
11511162
result = f(self.values)
11521163

1164+
if axis is None and filter_type == 'bool':
1165+
return np.bool_(result)
11531166
axes = self._get_plane_axes(axis_name)
11541167
if result.ndim == 2 and axis_name != self._info_axis_name:
11551168
result = result.T

pandas/core/series.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -3212,7 +3212,8 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
32123212
delegate = self._values
32133213
if isinstance(delegate, np.ndarray):
32143214
# Validate that 'axis' is consistent with Series's single axis.
3215-
self._get_axis_number(axis)
3215+
if axis is not None:
3216+
self._get_axis_number(axis)
32163217
if numeric_only:
32173218
raise NotImplementedError('Series.{0} does not implement '
32183219
'numeric_only.'.format(name))

pandas/tests/frame/test_analytics.py

+110-9
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from pandas.compat import lrange, PY35
1616
from pandas import (compat, isna, notna, DataFrame, Series,
1717
MultiIndex, date_range, Timestamp, Categorical,
18-
_np_version_under1p12, _np_version_under1p15)
18+
_np_version_under1p12)
1919
import pandas as pd
2020
import pandas.core.nanops as nanops
2121
import pandas.core.algorithms as algorithms
@@ -1139,11 +1139,35 @@ def test_any_all(self):
11391139
self._check_bool_op('any', np.any, has_skipna=True, has_bool_only=True)
11401140
self._check_bool_op('all', np.all, has_skipna=True, has_bool_only=True)
11411141

1142-
df = DataFrame(randn(10, 4)) > 0
1143-
df.any(1)
1144-
df.all(1)
1145-
df.any(1, bool_only=True)
1146-
df.all(1, bool_only=True)
1142+
def test_any_all_extra(self):
1143+
df = DataFrame({
1144+
'A': [True, False, False],
1145+
'B': [True, True, False],
1146+
'C': [True, True, True],
1147+
}, index=['a', 'b', 'c'])
1148+
result = df[['A', 'B']].any(1)
1149+
expected = Series([True, True, False], index=['a', 'b', 'c'])
1150+
tm.assert_series_equal(result, expected)
1151+
1152+
result = df[['A', 'B']].any(1, bool_only=True)
1153+
tm.assert_series_equal(result, expected)
1154+
1155+
result = df.all(1)
1156+
expected = Series([True, False, False], index=['a', 'b', 'c'])
1157+
tm.assert_series_equal(result, expected)
1158+
1159+
result = df.all(1, bool_only=True)
1160+
tm.assert_series_equal(result, expected)
1161+
1162+
# Axis is None
1163+
result = df.all(axis=None).item()
1164+
assert result is False
1165+
1166+
result = df.any(axis=None).item()
1167+
assert result is True
1168+
1169+
result = df[['C']].all(axis=None).item()
1170+
assert result is True
11471171

11481172
# skip pathological failure cases
11491173
# class CantNonzero(object):
@@ -1165,6 +1189,86 @@ def test_any_all(self):
11651189
# df.any(1, bool_only=True)
11661190
# df.all(1, bool_only=True)
11671191

1192+
@pytest.mark.parametrize('func, data, expected', [
1193+
(np.any, {}, False),
1194+
(np.all, {}, True),
1195+
(np.any, {'A': []}, False),
1196+
(np.all, {'A': []}, True),
1197+
(np.any, {'A': [False, False]}, False),
1198+
(np.all, {'A': [False, False]}, False),
1199+
(np.any, {'A': [True, False]}, True),
1200+
(np.all, {'A': [True, False]}, False),
1201+
(np.any, {'A': [True, True]}, True),
1202+
(np.all, {'A': [True, True]}, True),
1203+
1204+
(np.any, {'A': [False], 'B': [False]}, False),
1205+
(np.all, {'A': [False], 'B': [False]}, False),
1206+
1207+
(np.any, {'A': [False, False], 'B': [False, True]}, True),
1208+
(np.all, {'A': [False, False], 'B': [False, True]}, False),
1209+
1210+
# other types
1211+
(np.all, {'A': pd.Series([0.0, 1.0], dtype='float')}, False),
1212+
(np.any, {'A': pd.Series([0.0, 1.0], dtype='float')}, True),
1213+
(np.all, {'A': pd.Series([0, 1], dtype=int)}, False),
1214+
(np.any, {'A': pd.Series([0, 1], dtype=int)}, True),
1215+
pytest.param(np.all, {'A': pd.Series([0, 1], dtype='M8[ns]')}, False,
1216+
marks=[td.skip_if_np_lt_115]),
1217+
pytest.param(np.any, {'A': pd.Series([0, 1], dtype='M8[ns]')}, True,
1218+
marks=[td.skip_if_np_lt_115]),
1219+
pytest.param(np.all, {'A': pd.Series([1, 2], dtype='M8[ns]')}, True,
1220+
marks=[td.skip_if_np_lt_115]),
1221+
pytest.param(np.any, {'A': pd.Series([1, 2], dtype='M8[ns]')}, True,
1222+
marks=[td.skip_if_np_lt_115]),
1223+
pytest.param(np.all, {'A': pd.Series([0, 1], dtype='m8[ns]')}, False,
1224+
marks=[td.skip_if_np_lt_115]),
1225+
pytest.param(np.any, {'A': pd.Series([0, 1], dtype='m8[ns]')}, True,
1226+
marks=[td.skip_if_np_lt_115]),
1227+
pytest.param(np.all, {'A': pd.Series([1, 2], dtype='m8[ns]')}, True,
1228+
marks=[td.skip_if_np_lt_115]),
1229+
pytest.param(np.any, {'A': pd.Series([1, 2], dtype='m8[ns]')}, True,
1230+
marks=[td.skip_if_np_lt_115]),
1231+
(np.all, {'A': pd.Series([0, 1], dtype='category')}, False),
1232+
(np.any, {'A': pd.Series([0, 1], dtype='category')}, True),
1233+
(np.all, {'A': pd.Series([1, 2], dtype='category')}, True),
1234+
(np.any, {'A': pd.Series([1, 2], dtype='category')}, True),
1235+
1236+
# # Mix
1237+
# GH-21484
1238+
# (np.all, {'A': pd.Series([10, 20], dtype='M8[ns]'),
1239+
# 'B': pd.Series([10, 20], dtype='m8[ns]')}, True),
1240+
])
1241+
def test_any_all_np_func(self, func, data, expected):
1242+
# https://github.com/pandas-dev/pandas/issues/19976
1243+
data = DataFrame(data)
1244+
result = func(data)
1245+
assert isinstance(result, np.bool_)
1246+
assert result.item() is expected
1247+
1248+
# method version
1249+
result = getattr(DataFrame(data), func.__name__)(axis=None)
1250+
assert isinstance(result, np.bool_)
1251+
assert result.item() is expected
1252+
1253+
def test_any_all_object(self):
1254+
# https://github.com/pandas-dev/pandas/issues/19976
1255+
result = np.all(DataFrame(columns=['a', 'b'])).item()
1256+
assert result is True
1257+
1258+
result = np.any(DataFrame(columns=['a', 'b'])).item()
1259+
assert result is False
1260+
1261+
@pytest.mark.parametrize('method', ['any', 'all'])
1262+
def test_any_all_level_axis_none_raises(self, method):
1263+
df = DataFrame(
1264+
{"A": 1},
1265+
index=MultiIndex.from_product([['A', 'B'], ['a', 'b']],
1266+
names=['out', 'in'])
1267+
)
1268+
xpr = "Must specify 'axis' when aggregating by level."
1269+
with tm.assert_raises_regex(ValueError, xpr):
1270+
getattr(df, method)(axis=None, level='out')
1271+
11681272
def _check_bool_op(self, name, alternative, frame=None, has_skipna=True,
11691273
has_bool_only=False):
11701274
if frame is None:
@@ -2071,9 +2175,6 @@ def test_clip_against_list_like(self, inplace, lower, axis, res):
20712175
result = original
20722176
tm.assert_frame_equal(result, expected, check_exact=True)
20732177

2074-
@pytest.mark.xfail(
2075-
not _np_version_under1p15,
2076-
reason="failing under numpy-dev gh-19976")
20772178
@pytest.mark.parametrize("axis", [0, 1, None])
20782179
def test_clip_against_frame(self, axis):
20792180
df = DataFrame(np.random.randn(1000, 2))

pandas/tests/test_panel.py

+7
Original file line numberDiff line numberDiff line change
@@ -2717,3 +2717,10 @@ def test_panel_index():
27172717
np.repeat([1, 2, 3], 4)],
27182718
names=['time', 'panel'])
27192719
tm.assert_index_equal(index, expected)
2720+
2721+
2722+
def test_panel_np_all():
2723+
with catch_warnings(record=True):
2724+
wp = Panel({"A": DataFrame({'b': [1, 2]})})
2725+
result = np.all(wp)
2726+
assert result == np.bool_(True)

0 commit comments

Comments
 (0)