diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index c3355757350b9..b8c0161d47a0d 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -235,6 +235,7 @@ Deprecations - Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`) - Deprecated passing a dictionary to :meth:`.SeriesGroupBy.agg`; pass a list of aggregations instead (:issue:`50684`) - Deprecated the "fastpath" keyword in :class:`Categorical` constructor, use :meth:`Categorical.from_codes` instead (:issue:`20110`) +- Deprecated the behavior of :func:`is_bool_dtype` returning ``True`` for object-dtype :class:`Index` of bool objects (:issue:`52680`) - Deprecated the methods :meth:`Series.bool` and :meth:`DataFrame.bool` (:issue:`51749`) - Deprecated unused "closed" and "normalize" keywords in the :class:`DatetimeIndex` constructor (:issue:`52628`) - Deprecated unused "closed" keyword in the :class:`TimedeltaIndex` constructor (:issue:`52628`) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 20d8cb4adc8b6..dcf4c25f14e2f 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1219,7 +1219,18 @@ def is_bool_dtype(arr_or_dtype) -> bool: if isinstance(arr_or_dtype, ABCIndex): # Allow Index[object] that is all-bools or Index["boolean"] - return arr_or_dtype.inferred_type == "boolean" + if arr_or_dtype.inferred_type == "boolean": + if not is_bool_dtype(arr_or_dtype.dtype): + # GH#52680 + warnings.warn( + "The behavior of is_bool_dtype with an object-dtype Index " + "of bool objects is deprecated. In a future version, " + "this will return False. Cast the Index to a bool dtype instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return True + return False elif isinstance(dtype, ExtensionDtype): return getattr(dtype, "_is_boolean", False) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0a56fa4d031d6..cd9681f22b247 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -926,7 +926,13 @@ def __array_wrap__(self, result, context=None): Gets called after a ufunc and other functions e.g. np.split. """ result = lib.item_from_zerodim(result) - if is_bool_dtype(result) or lib.is_scalar(result) or np.ndim(result) > 1: + if ( + (not isinstance(result, Index) and is_bool_dtype(result)) + or lib.is_scalar(result) + or np.ndim(result) > 1 + ): + # exclude Index to avoid warning from is_bool_dtype deprecation; + # in the Index case it doesn't matter which path we go down. return result return Index(result, name=self.name) @@ -6107,8 +6113,12 @@ def _should_compare(self, other: Index) -> bool: Check if `self == other` can ever have non-False entries. """ - if (is_bool_dtype(other) and is_any_real_numeric_dtype(self)) or ( - is_bool_dtype(self) and is_any_real_numeric_dtype(other) + # NB: we use inferred_type rather than is_bool_dtype to catch + # object_dtype_of_bool and categorical[object_dtype_of_bool] cases + if ( + other.inferred_type == "boolean" and is_any_real_numeric_dtype(self.dtype) + ) or ( + self.inferred_type == "boolean" and is_any_real_numeric_dtype(other.dtype) ): # GH#16877 Treat boolean labels passed to a numeric index as not # found. Without this fix False and True would be treated as 0 and 1 diff --git a/pandas/tests/base/common.py b/pandas/tests/base/common.py index 30cf9311c98bc..ad0b394105742 100644 --- a/pandas/tests/base/common.py +++ b/pandas/tests/base/common.py @@ -1,10 +1,9 @@ from typing import Any from pandas import Index -from pandas.api.types import is_bool_dtype def allow_na_ops(obj: Any) -> bool: """Whether to skip test cases including NaN""" - is_bool_index = isinstance(obj, Index) and is_bool_dtype(obj) + is_bool_index = isinstance(obj, Index) and obj.inferred_type == "boolean" return not is_bool_index and obj._can_hold_na diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 6a18bc4aaf856..80fa78ccdc97e 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -21,7 +21,6 @@ ) import pandas._testing as tm from pandas.api.types import ( - is_bool_dtype, is_signed_integer_dtype, pandas_dtype, ) @@ -242,7 +241,7 @@ def test_union_base(self, index): def test_difference_base(self, sort, index): first = index[2:] second = index[:4] - if is_bool_dtype(index): + if index.inferred_type == "boolean": # i think (TODO: be sure) there assumptions baked in about # the index fixture that don't hold here? answer = set(first).difference(set(second)) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 3fb1fadeed122..3f0d530a1cbd4 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -37,10 +37,7 @@ to_timedelta, ) import pandas._testing as tm -from pandas.api.types import ( - is_bool_dtype, - is_scalar, -) +from pandas.api.types import is_scalar from pandas.core.indexing import _one_ellipsis_message from pandas.tests.indexing.common import check_indexing_smoketest_or_raises @@ -1657,7 +1654,7 @@ def test_loc_iloc_getitem_leading_ellipses(self, series_with_simple_index, index obj = series_with_simple_index key = 0 if (indexer is tm.iloc or len(obj) == 0) else obj.index[0] - if indexer is tm.loc and is_bool_dtype(obj.index): + if indexer is tm.loc and obj.index.inferred_type == "boolean": # passing [False] will get interpreted as a boolean mask # TODO: should it? unambiguous when lengths dont match? return