Skip to content

Commit fd3e205

Browse files
authored
BUG: any/all not returning booleans for object type (#41102)
1 parent f40e58c commit fd3e205

File tree

6 files changed

+66
-7
lines changed

6 files changed

+66
-7
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,7 @@ Numeric
726726
- Bug in :meth:`DataFrame.transform` would raise ``SpecificationError`` when passed a dictionary and columns were missing; will now raise a ``KeyError`` instead (:issue:`40004`)
727727
- Bug in :meth:`DataFrameGroupBy.rank` giving incorrect results with ``pct=True`` and equal values between consecutive groups (:issue:`40518`)
728728
- Bug in :meth:`Series.count` would result in an ``int32`` result on 32-bit platforms when argument ``level=None`` (:issue:`40908`)
729+
- Bug in :class:`Series` and :class:`DataFrame` reductions with methods ``any`` and ``all`` not returning boolean results for object data (:issue:`12863`, :issue:`35450`, :issue:`27709`)
729730
- Bug in :meth:`Series.clip` would fail if series contains NA values and has nullable int or float as a data type (:issue:`40851`)
730731

731732
Conversion

pandas/core/nanops.py

+12
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,12 @@ def nanany(
486486
False
487487
"""
488488
values, _, _, _, _ = _get_values(values, skipna, fill_value=False, mask=mask)
489+
490+
# For object type, any won't necessarily return
491+
# boolean values (numpy/numpy#4352)
492+
if is_object_dtype(values):
493+
values = values.astype(bool)
494+
489495
# error: Incompatible return value type (got "Union[bool_, ndarray]", expected
490496
# "bool")
491497
return values.any(axis) # type: ignore[return-value]
@@ -526,6 +532,12 @@ def nanall(
526532
False
527533
"""
528534
values, _, _, _, _ = _get_values(values, skipna, fill_value=True, mask=mask)
535+
536+
# For object type, all won't necessarily return
537+
# boolean values (numpy/numpy#4352)
538+
if is_object_dtype(values):
539+
values = values.astype(bool)
540+
529541
# error: Incompatible return value type (got "Union[bool_, ndarray]", expected
530542
# "bool")
531543
return values.all(axis) # type: ignore[return-value]

pandas/tests/apply/test_series_apply.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -441,8 +441,8 @@ def test_non_callable_aggregates(how):
441441
("sum", "abc"),
442442
("max", "c"),
443443
("min", "a"),
444-
("all", "c"), # see GH12863
445-
("any", "a"),
444+
("all", True),
445+
("any", True),
446446
],
447447
),
448448
),

pandas/tests/frame/test_reductions.py

+25-3
Original file line numberDiff line numberDiff line change
@@ -1068,13 +1068,17 @@ def test_idxmax_dt64_multicolumn_axis1(self):
10681068

10691069
@pytest.mark.parametrize("opname", ["any", "all"])
10701070
def test_any_all(self, opname, bool_frame_with_na, float_string_frame):
1071-
assert_bool_op_calc(
1072-
opname, getattr(np, opname), bool_frame_with_na, has_skipna=True
1073-
)
10741071
assert_bool_op_api(
10751072
opname, bool_frame_with_na, float_string_frame, has_bool_only=True
10761073
)
10771074

1075+
@pytest.mark.parametrize("opname", ["any", "all"])
1076+
def test_any_all_bool_frame(self, opname, bool_frame_with_na):
1077+
# GH#12863: numpy gives back non-boolean data for object type
1078+
# so fill NaNs to compare with pandas behavior
1079+
df = bool_frame_with_na.fillna(True)
1080+
assert_bool_op_calc(opname, getattr(np, opname), df, has_skipna=True)
1081+
10781082
def test_any_all_extra(self):
10791083
df = DataFrame(
10801084
{
@@ -1108,6 +1112,24 @@ def test_any_all_extra(self):
11081112
result = df[["C"]].all(axis=None).item()
11091113
assert result is True
11101114

1115+
@pytest.mark.parametrize("axis", [0, 1])
1116+
@pytest.mark.parametrize("bool_agg_func", ["any", "all"])
1117+
@pytest.mark.parametrize("skipna", [True, False])
1118+
def test_any_all_object_dtype(self, axis, bool_agg_func, skipna):
1119+
# GH#35450
1120+
df = DataFrame(
1121+
data=[
1122+
[1, np.nan, np.nan, True],
1123+
[np.nan, 2, np.nan, True],
1124+
[np.nan, np.nan, np.nan, True],
1125+
[np.nan, np.nan, "5", np.nan],
1126+
]
1127+
)
1128+
1129+
result = getattr(df, bool_agg_func)(axis=axis, skipna=skipna)
1130+
expected = Series([True, True, True, True])
1131+
tm.assert_series_equal(result, expected)
1132+
11111133
def test_any_datetime(self):
11121134

11131135
# GH 23070

pandas/tests/reductions/test_reductions.py

+25-2
Original file line numberDiff line numberDiff line change
@@ -896,7 +896,7 @@ def test_all_any(self):
896896

897897
# Alternative types, with implicit 'object' dtype.
898898
s = Series(["abc", True])
899-
assert "abc" == s.any() # 'abc' || True => 'abc'
899+
assert s.any()
900900

901901
@pytest.mark.parametrize("klass", [Index, Series])
902902
def test_numpy_all_any(self, klass):
@@ -913,7 +913,7 @@ def test_all_any_params(self):
913913
s2 = Series([np.nan, False])
914914
assert s1.all(skipna=False) # nan && True => True
915915
assert s1.all(skipna=True)
916-
assert np.isnan(s2.any(skipna=False)) # nan || False => nan
916+
assert s2.any(skipna=False)
917917
assert not s2.any(skipna=True)
918918

919919
# Check level.
@@ -941,6 +941,29 @@ def test_all_any_params(self):
941941
with pytest.raises(NotImplementedError, match=msg):
942942
s.all(bool_only=True)
943943

944+
@pytest.mark.parametrize("bool_agg_func", ["any", "all"])
945+
@pytest.mark.parametrize("skipna", [True, False])
946+
def test_any_all_object_dtype(self, bool_agg_func, skipna):
947+
# GH#12863
948+
ser = Series(["a", "b", "c", "d", "e"], dtype=object)
949+
result = getattr(ser, bool_agg_func)(skipna=skipna)
950+
expected = True
951+
952+
assert result == expected
953+
954+
@pytest.mark.parametrize("bool_agg_func", ["any", "all"])
955+
@pytest.mark.parametrize(
956+
"data", [[False, None], [None, False], [False, np.nan], [np.nan, False]]
957+
)
958+
def test_any_all_object_dtype_missing(self, data, bool_agg_func):
959+
# GH#27709
960+
ser = Series(data)
961+
result = getattr(ser, bool_agg_func)(skipna=False)
962+
963+
# None is treated is False, but np.nan is treated as True
964+
expected = bool_agg_func == "any" and None not in data
965+
assert result == expected
966+
944967
@pytest.mark.parametrize("bool_agg_func", ["any", "all"])
945968
@pytest.mark.parametrize("skipna", [True, False])
946969
@pytest.mark.parametrize(

pandas/tests/test_nanops.py

+1
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ def _badobj_wrap(self, value, func, allow_complex=True, **kwargs):
270270
value = value.astype("f8")
271271
return func(value, **kwargs)
272272

273+
@pytest.mark.xfail(reason="GH12863: numpy result won't match for object type")
273274
@pytest.mark.parametrize(
274275
"nan_op,np_op", [(nanops.nanany, np.any), (nanops.nanall, np.all)]
275276
)

0 commit comments

Comments
 (0)