Skip to content

Commit f7ed7f8

Browse files
TomAugspurgerjorisvandenbossche
authored andcommitted
API/COMPAT: support axis=None for logical reduction (reduce over all axes) (#21486)
* Compat with NumPy 1.15 logical func * Accepts axis=None as reduce all dims
1 parent 1033e8b commit f7ed7f8

File tree

8 files changed

+215
-31
lines changed

8 files changed

+215
-31
lines changed

doc/source/whatsnew/v0.23.2.txt

+30
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,36 @@ and bug fixes. We recommend that all users upgrade to this version.
1616
:local:
1717
:backlinks: none
1818

19+
.. _whatsnew_0232.enhancements:
20+
21+
Logical Reductions over Entire DataFrame
22+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
23+
24+
:meth:`DataFrame.all` and :meth:`DataFrame.any` now accept ``axis=None`` to reduce over all axes to a scalar (:issue:`19976`)
25+
26+
.. ipython:: python
27+
28+
df = pd.DataFrame({"A": [1, 2], "B": [True, False]})
29+
df.all(axis=None)
30+
31+
32+
This also provides compatibility with NumPy 1.15, which now dispatches to ``DataFrame.all``.
33+
With NumPy 1.15 and pandas 0.23.1 or earlier, :func:`numpy.all` will no longer reduce over every axis:
34+
35+
.. code-block:: python
36+
37+
>>> # NumPy 1.15, pandas 0.23.1
38+
>>> np.any(pd.DataFrame({"A": [False], "B": [False]}))
39+
A False
40+
B False
41+
dtype: bool
42+
43+
With pandas 0.23.2, that will correctly return False, as it did with NumPy < 1.15.
44+
45+
.. ipython:: python
46+
47+
np.any(pd.DataFrame({"A": [False], "B": [False]}))
48+
1949

2050
.. _whatsnew_0232.fixed_regressions:
2151

pandas/core/frame.py

+18-4
Original file line numberDiff line numberDiff line change
@@ -6846,13 +6846,18 @@ def _count_level(self, level, axis=0, numeric_only=False):
68466846

68476847
def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
68486848
filter_type=None, **kwds):
6849-
axis = self._get_axis_number(axis)
6849+
if axis is None and filter_type == 'bool':
6850+
labels = None
6851+
constructor = None
6852+
else:
6853+
# TODO: Make other agg func handle axis=None properly
6854+
axis = self._get_axis_number(axis)
6855+
labels = self._get_agg_axis(axis)
6856+
constructor = self._constructor
68506857

68516858
def f(x):
68526859
return op(x, axis=axis, skipna=skipna, **kwds)
68536860

6854-
labels = self._get_agg_axis(axis)
6855-
68566861
# exclude timedelta/datetime unless we are uniform types
68576862
if axis == 1 and self._is_mixed_type and self._is_datelike_mixed_type:
68586863
numeric_only = True
@@ -6861,6 +6866,13 @@ def f(x):
68616866
try:
68626867
values = self.values
68636868
result = f(values)
6869+
6870+
if (filter_type == 'bool' and is_object_dtype(values) and
6871+
axis is None):
6872+
# work around https://github.com/numpy/numpy/issues/10489
6873+
# TODO: combine with hasattr(result, 'dtype') further down
6874+
# hard since we don't have `values` down there.
6875+
result = np.bool_(result)
68646876
except Exception as e:
68656877

68666878
# try by-column first
@@ -6927,7 +6939,9 @@ def f(x):
69276939
if axis == 0:
69286940
result = coerce_to_dtypes(result, self.dtypes)
69296941

6930-
return Series(result, index=labels)
6942+
if constructor is not None:
6943+
result = Series(result, index=labels)
6944+
return result
69316945

69326946
def nunique(self, axis=0, dropna=True):
69336947
"""

pandas/core/generic.py

+29-15
Original file line numberDiff line numberDiff line change
@@ -8728,6 +8728,8 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
87288728
return rs
87298729

87308730
def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwargs):
8731+
if axis is None:
8732+
raise ValueError("Must specify 'axis' when aggregating by level.")
87318733
grouped = self.groupby(level=level, axis=axis, sort=False)
87328734
if hasattr(grouped, name) and skipna:
87338735
return getattr(grouped, name)(**kwargs)
@@ -9054,8 +9056,15 @@ def _doc_parms(cls):
90549056
90559057
Parameters
90569058
----------
9057-
axis : int, default 0
9058-
Select the axis which can be 0 for indices and 1 for columns.
9059+
axis : {0 or 'index', 1 or 'columns', None}, default 0
9060+
Indicate which axis or axes should be reduced.
9061+
9062+
* 0 / 'index' : reduce the index, return a Series whose index is the
9063+
original column labels.
9064+
* 1 / 'columns' : reduce the columns, return a Series whose index is the
9065+
original index.
9066+
* None : reduce all axes, return a scalar.
9067+
90599068
skipna : boolean, default True
90609069
Exclude NA/null values. If an entire row/column is NA, the result
90619070
will be NA.
@@ -9077,9 +9086,9 @@ def _doc_parms(cls):
90779086
%(examples)s"""
90789087

90799088
_all_doc = """\
9080-
Return whether all elements are True over series or dataframe axis.
9089+
Return whether all elements are True, potentially over an axis.
90819090
9082-
Returns True if all elements within a series or along a dataframe
9091+
Returns True if all elements within a series or along a Dataframe
90839092
axis are non-zero, not-empty or not-False."""
90849093

90859094
_all_examples = """\
@@ -9092,7 +9101,7 @@ def _doc_parms(cls):
90929101
>>> pd.Series([True, False]).all()
90939102
False
90949103
9095-
Dataframes
9104+
DataFrames
90969105
90979106
Create a dataframe from a dictionary.
90989107
@@ -9109,12 +9118,17 @@ def _doc_parms(cls):
91099118
col2 False
91109119
dtype: bool
91119120
9112-
Adding axis=1 argument will check if row-wise values all return True.
9121+
Specify ``axis='columns'`` to check if row-wise values all return True.
91139122
9114-
>>> df.all(axis=1)
9123+
>>> df.all(axis='columns')
91159124
0 True
91169125
1 False
91179126
dtype: bool
9127+
9128+
Or ``axis=None`` for whether every value is True.
9129+
9130+
>>> df.all(axis=None)
9131+
False
91189132
"""
91199133

91209134
_all_see_also = """\
@@ -9484,6 +9498,11 @@ def _doc_parms(cls):
94849498
1 False
94859499
dtype: bool
94869500
9501+
Aggregating over the entire DataFrame with ``axis=None``.
9502+
9503+
>>> df.any(axis=None)
9504+
True
9505+
94879506
`any` for an empty DataFrame is an empty Series.
94889507
94899508
>>> pd.DataFrame([]).any()
@@ -9654,22 +9673,17 @@ def _make_logical_function(cls, name, name1, name2, axis_descr, desc, f,
96549673
@Substitution(outname=name, desc=desc, name1=name1, name2=name2,
96559674
axis_descr=axis_descr, examples=examples, see_also=see_also)
96569675
@Appender(_bool_doc)
9657-
def logical_func(self, axis=None, bool_only=None, skipna=None, level=None,
9676+
def logical_func(self, axis=0, bool_only=None, skipna=True, level=None,
96589677
**kwargs):
96599678
nv.validate_logical_func(tuple(), kwargs, fname=name)
9660-
if skipna is None:
9661-
skipna = True
9662-
if axis is None:
9663-
axis = self._stat_axis_number
96649679
if level is not None:
96659680
if bool_only is not None:
96669681
raise NotImplementedError("Option bool_only is not "
96679682
"implemented with option level.")
96689683
return self._agg_by_level(name, axis=axis, level=level,
96699684
skipna=skipna)
9670-
return self._reduce(f, axis=axis, skipna=skipna,
9671-
numeric_only=bool_only, filter_type='bool',
9672-
name=name)
9685+
return self._reduce(f, name, axis=axis, skipna=skipna,
9686+
numeric_only=bool_only, filter_type='bool')
96739687

96749688
return set_function_name(logical_func, name, cls)
96759689

pandas/core/panel.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -1143,13 +1143,26 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
11431143
raise NotImplementedError('Panel.{0} does not implement '
11441144
'numeric_only.'.format(name))
11451145

1146-
axis_name = self._get_axis_name(axis)
1147-
axis_number = self._get_axis_number(axis_name)
1146+
if axis is None and filter_type == 'bool':
1147+
# labels = None
1148+
# constructor = None
1149+
axis_number = None
1150+
axis_name = None
1151+
else:
1152+
# TODO: Make other agg func handle axis=None properly
1153+
axis = self._get_axis_number(axis)
1154+
# labels = self._get_agg_axis(axis)
1155+
# constructor = self._constructor
1156+
axis_name = self._get_axis_name(axis)
1157+
axis_number = self._get_axis_number(axis_name)
1158+
11481159
f = lambda x: op(x, axis=axis_number, skipna=skipna, **kwds)
11491160

11501161
with np.errstate(all='ignore'):
11511162
result = f(self.values)
11521163

1164+
if axis is None and filter_type == 'bool':
1165+
return np.bool_(result)
11531166
axes = self._get_plane_axes(axis_name)
11541167
if result.ndim == 2 and axis_name != self._info_axis_name:
11551168
result = result.T

pandas/core/series.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -3241,7 +3241,8 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
32413241
delegate = self._values
32423242
if isinstance(delegate, np.ndarray):
32433243
# Validate that 'axis' is consistent with Series's single axis.
3244-
self._get_axis_number(axis)
3244+
if axis is not None:
3245+
self._get_axis_number(axis)
32453246
if numeric_only:
32463247
raise NotImplementedError('Series.{0} does not implement '
32473248
'numeric_only.'.format(name))

pandas/tests/frame/test_analytics.py

+110-9
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from pandas.compat import lrange, PY35
1616
from pandas import (compat, isna, notna, DataFrame, Series,
1717
MultiIndex, date_range, Timestamp, Categorical,
18-
_np_version_under1p12, _np_version_under1p15,
18+
_np_version_under1p12,
1919
to_datetime, to_timedelta)
2020
import pandas as pd
2121
import pandas.core.nanops as nanops
@@ -1159,11 +1159,35 @@ def test_any_all(self):
11591159
self._check_bool_op('any', np.any, has_skipna=True, has_bool_only=True)
11601160
self._check_bool_op('all', np.all, has_skipna=True, has_bool_only=True)
11611161

1162-
df = DataFrame(randn(10, 4)) > 0
1163-
df.any(1)
1164-
df.all(1)
1165-
df.any(1, bool_only=True)
1166-
df.all(1, bool_only=True)
1162+
def test_any_all_extra(self):
1163+
df = DataFrame({
1164+
'A': [True, False, False],
1165+
'B': [True, True, False],
1166+
'C': [True, True, True],
1167+
}, index=['a', 'b', 'c'])
1168+
result = df[['A', 'B']].any(1)
1169+
expected = Series([True, True, False], index=['a', 'b', 'c'])
1170+
tm.assert_series_equal(result, expected)
1171+
1172+
result = df[['A', 'B']].any(1, bool_only=True)
1173+
tm.assert_series_equal(result, expected)
1174+
1175+
result = df.all(1)
1176+
expected = Series([True, False, False], index=['a', 'b', 'c'])
1177+
tm.assert_series_equal(result, expected)
1178+
1179+
result = df.all(1, bool_only=True)
1180+
tm.assert_series_equal(result, expected)
1181+
1182+
# Axis is None
1183+
result = df.all(axis=None).item()
1184+
assert result is False
1185+
1186+
result = df.any(axis=None).item()
1187+
assert result is True
1188+
1189+
result = df[['C']].all(axis=None).item()
1190+
assert result is True
11671191

11681192
# skip pathological failure cases
11691193
# class CantNonzero(object):
@@ -1185,6 +1209,86 @@ def test_any_all(self):
11851209
# df.any(1, bool_only=True)
11861210
# df.all(1, bool_only=True)
11871211

1212+
@pytest.mark.parametrize('func, data, expected', [
1213+
(np.any, {}, False),
1214+
(np.all, {}, True),
1215+
(np.any, {'A': []}, False),
1216+
(np.all, {'A': []}, True),
1217+
(np.any, {'A': [False, False]}, False),
1218+
(np.all, {'A': [False, False]}, False),
1219+
(np.any, {'A': [True, False]}, True),
1220+
(np.all, {'A': [True, False]}, False),
1221+
(np.any, {'A': [True, True]}, True),
1222+
(np.all, {'A': [True, True]}, True),
1223+
1224+
(np.any, {'A': [False], 'B': [False]}, False),
1225+
(np.all, {'A': [False], 'B': [False]}, False),
1226+
1227+
(np.any, {'A': [False, False], 'B': [False, True]}, True),
1228+
(np.all, {'A': [False, False], 'B': [False, True]}, False),
1229+
1230+
# other types
1231+
(np.all, {'A': pd.Series([0.0, 1.0], dtype='float')}, False),
1232+
(np.any, {'A': pd.Series([0.0, 1.0], dtype='float')}, True),
1233+
(np.all, {'A': pd.Series([0, 1], dtype=int)}, False),
1234+
(np.any, {'A': pd.Series([0, 1], dtype=int)}, True),
1235+
pytest.param(np.all, {'A': pd.Series([0, 1], dtype='M8[ns]')}, False,
1236+
marks=[td.skip_if_np_lt_115]),
1237+
pytest.param(np.any, {'A': pd.Series([0, 1], dtype='M8[ns]')}, True,
1238+
marks=[td.skip_if_np_lt_115]),
1239+
pytest.param(np.all, {'A': pd.Series([1, 2], dtype='M8[ns]')}, True,
1240+
marks=[td.skip_if_np_lt_115]),
1241+
pytest.param(np.any, {'A': pd.Series([1, 2], dtype='M8[ns]')}, True,
1242+
marks=[td.skip_if_np_lt_115]),
1243+
pytest.param(np.all, {'A': pd.Series([0, 1], dtype='m8[ns]')}, False,
1244+
marks=[td.skip_if_np_lt_115]),
1245+
pytest.param(np.any, {'A': pd.Series([0, 1], dtype='m8[ns]')}, True,
1246+
marks=[td.skip_if_np_lt_115]),
1247+
pytest.param(np.all, {'A': pd.Series([1, 2], dtype='m8[ns]')}, True,
1248+
marks=[td.skip_if_np_lt_115]),
1249+
pytest.param(np.any, {'A': pd.Series([1, 2], dtype='m8[ns]')}, True,
1250+
marks=[td.skip_if_np_lt_115]),
1251+
(np.all, {'A': pd.Series([0, 1], dtype='category')}, False),
1252+
(np.any, {'A': pd.Series([0, 1], dtype='category')}, True),
1253+
(np.all, {'A': pd.Series([1, 2], dtype='category')}, True),
1254+
(np.any, {'A': pd.Series([1, 2], dtype='category')}, True),
1255+
1256+
# # Mix
1257+
# GH-21484
1258+
# (np.all, {'A': pd.Series([10, 20], dtype='M8[ns]'),
1259+
# 'B': pd.Series([10, 20], dtype='m8[ns]')}, True),
1260+
])
1261+
def test_any_all_np_func(self, func, data, expected):
1262+
# https://github.com/pandas-dev/pandas/issues/19976
1263+
data = DataFrame(data)
1264+
result = func(data)
1265+
assert isinstance(result, np.bool_)
1266+
assert result.item() is expected
1267+
1268+
# method version
1269+
result = getattr(DataFrame(data), func.__name__)(axis=None)
1270+
assert isinstance(result, np.bool_)
1271+
assert result.item() is expected
1272+
1273+
def test_any_all_object(self):
1274+
# https://github.com/pandas-dev/pandas/issues/19976
1275+
result = np.all(DataFrame(columns=['a', 'b'])).item()
1276+
assert result is True
1277+
1278+
result = np.any(DataFrame(columns=['a', 'b'])).item()
1279+
assert result is False
1280+
1281+
@pytest.mark.parametrize('method', ['any', 'all'])
1282+
def test_any_all_level_axis_none_raises(self, method):
1283+
df = DataFrame(
1284+
{"A": 1},
1285+
index=MultiIndex.from_product([['A', 'B'], ['a', 'b']],
1286+
names=['out', 'in'])
1287+
)
1288+
xpr = "Must specify 'axis' when aggregating by level."
1289+
with tm.assert_raises_regex(ValueError, xpr):
1290+
getattr(df, method)(axis=None, level='out')
1291+
11881292
def _check_bool_op(self, name, alternative, frame=None, has_skipna=True,
11891293
has_bool_only=False):
11901294
if frame is None:
@@ -2091,9 +2195,6 @@ def test_clip_against_list_like(self, inplace, lower, axis, res):
20912195
result = original
20922196
tm.assert_frame_equal(result, expected, check_exact=True)
20932197

2094-
@pytest.mark.xfail(
2095-
not _np_version_under1p15,
2096-
reason="failing under numpy-dev gh-19976")
20972198
@pytest.mark.parametrize("axis", [0, 1, None])
20982199
def test_clip_against_frame(self, axis):
20992200
df = DataFrame(np.random.randn(1000, 2))

pandas/tests/test_panel.py

+7
Original file line numberDiff line numberDiff line change
@@ -2707,3 +2707,10 @@ def test_panel_index():
27072707
np.repeat([1, 2, 3], 4)],
27082708
names=['time', 'panel'])
27092709
tm.assert_index_equal(index, expected)
2710+
2711+
2712+
def test_panel_np_all():
2713+
with catch_warnings(record=True):
2714+
wp = Panel({"A": DataFrame({'b': [1, 2]})})
2715+
result = np.all(wp)
2716+
assert result == np.bool_(True)

0 commit comments

Comments
 (0)