Skip to content

Commit 1f16f49

Browse files
authored
BUG: frame.any/all with bool_only=True and Categorical[bool] (pandas-dev#40477)
1 parent 3b60759 commit 1f16f49

File tree

4 files changed

+40
-18
lines changed

4 files changed

+40
-18
lines changed

pandas/core/dtypes/inference.py

+26
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import numpy as np
99

1010
from pandas._libs import lib
11+
from pandas._typing import ArrayLike
1112

1213
is_bool = lib.is_bool
1314

@@ -420,3 +421,28 @@ def is_dataclass(item):
420421
return is_dataclass(item) and not isinstance(item, type)
421422
except ImportError:
422423
return False
424+
425+
426+
def is_inferred_bool_dtype(arr: ArrayLike) -> bool:
427+
"""
428+
Check if this is a ndarray[bool] or an ndarray[object] of bool objects.
429+
430+
Parameters
431+
----------
432+
arr : np.ndarray or ExtensionArray
433+
434+
Returns
435+
-------
436+
bool
437+
438+
Notes
439+
-----
440+
This does not include the special treatment is_bool_dtype uses for
441+
Categorical.
442+
"""
443+
dtype = arr.dtype
444+
if dtype == np.dtype(bool):
445+
return True
446+
elif dtype == np.dtype("object"):
447+
return lib.is_bool_array(arr.ravel("K"))
448+
return False

pandas/core/internals/array_manager.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
)
3434
from pandas.core.dtypes.common import (
3535
ensure_int64,
36-
is_bool_dtype,
3736
is_datetime64_ns_dtype,
3837
is_dtype_equal,
3938
is_extension_array_dtype,
@@ -50,6 +49,7 @@
5049
ABCPandasArray,
5150
ABCSeries,
5251
)
52+
from pandas.core.dtypes.inference import is_inferred_bool_dtype
5353
from pandas.core.dtypes.missing import (
5454
array_equals,
5555
isna,
@@ -676,10 +676,7 @@ def get_bool_data(self, copy: bool = False) -> ArrayManager:
676676
copy : bool, default False
677677
Whether to copy the blocks
678678
"""
679-
return self._get_data_subset(
680-
lambda arr: is_bool_dtype(arr.dtype)
681-
or (is_object_dtype(arr.dtype) and lib.is_bool_array(arr))
682-
)
679+
return self._get_data_subset(is_inferred_bool_dtype)
683680

684681
def get_numeric_data(self, copy: bool = False) -> ArrayManager:
685682
"""

pandas/core/internals/blocks.py

+9-13
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
ABCPandasArray,
7070
ABCSeries,
7171
)
72+
from pandas.core.dtypes.inference import is_inferred_bool_dtype
7273
from pandas.core.dtypes.missing import (
7374
is_valid_na_for_dtype,
7475
isna,
@@ -158,7 +159,6 @@ class Block(libinternals.Block, PandasObject):
158159

159160
__slots__ = ()
160161
is_numeric = False
161-
is_bool = False
162162
is_object = False
163163
is_extension = False
164164
_can_consolidate = True
@@ -199,6 +199,14 @@ def is_categorical(self) -> bool:
199199
)
200200
return isinstance(self.values, Categorical)
201201

202+
@final
203+
@property
204+
def is_bool(self) -> bool:
205+
"""
206+
We can be bool if a) we are bool dtype or b) object dtype with bool objects.
207+
"""
208+
return is_inferred_bool_dtype(self.values)
209+
202210
@final
203211
def external_values(self):
204212
return external_values(self.values)
@@ -1741,10 +1749,6 @@ def _can_hold_element(self, element: Any) -> bool:
17411749
# "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]"
17421750
return can_hold_element(self.dtype, element) # type: ignore[arg-type]
17431751

1744-
@property
1745-
def is_bool(self):
1746-
return self.dtype.kind == "b"
1747-
17481752

17491753
class NDArrayBackedExtensionBlock(HybridMixin, Block):
17501754
"""
@@ -1907,14 +1911,6 @@ class ObjectBlock(Block):
19071911

19081912
values: np.ndarray
19091913

1910-
@property
1911-
def is_bool(self):
1912-
"""
1913-
we can be a bool if we have only bool values but are of type
1914-
object
1915-
"""
1916-
return lib.is_bool_array(self.values.ravel("K"))
1917-
19181914
@maybe_split
19191915
def reduce(self, func, ignore_failures: bool = False) -> List[Block]:
19201916
"""

pandas/tests/frame/test_reductions.py

+3
Original file line numberDiff line numberDiff line change
@@ -1163,6 +1163,9 @@ def test_any_all_object_bool_only(self):
11631163
df._consolidate_inplace()
11641164
df["C"] = Series([True, True])
11651165

1166+
# Categorical of bools is _not_ considered booly
1167+
df["D"] = df["C"].astype("category")
1168+
11661169
# The underlying bug is in DataFrame._get_bool_data, so we check
11671170
# that while we're here
11681171
res = df._get_bool_data()

0 commit comments

Comments
 (0)