Skip to content

Commit 70b7f04

Browse files
jbrockmendelyehoshuadimarsky
authored andcommitted
DEPR: treating object-dtype blocks as bool_only (pandas-dev#46188)
1 parent 4b92517 commit 70b7f04

File tree

5 files changed

+37
-10
lines changed

5 files changed

+37
-10
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,7 @@ of columns could result in a larger Series result. See (:issue:`37799`).
354354
*New behavior*:
355355

356356
.. ipython:: python
357+
:okwarning:
357358
358359
In [5]: df.all(bool_only=True)
359360

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ Other Deprecations
280280
- Deprecated the behavior of :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype and incompatible fill value; in a future version this will cast to a common dtype (usually object) instead of raising, matching the behavior of other dtypes (:issue:`45746`)
281281
- Deprecated the ``warn`` parameter in :func:`infer_freq` (:issue:`45947`)
282282
- Deprecated allowing non-keyword arguments in :meth:`ExtensionArray.argsort` (:issue:`46134`)
283+
- Deprecated treating all-bool ``object``-dtype columns as bool-like in :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``, explicitly cast to bool instead (:issue:`46188`)
283284
-
284285

285286
.. ---------------------------------------------------------------------------

pandas/core/dtypes/inference.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44
from numbers import Number
55
import re
66
from typing import Pattern
7+
import warnings
78

89
import numpy as np
910

1011
from pandas._libs import lib
1112
from pandas._typing import ArrayLike
13+
from pandas.util._exceptions import find_stack_level
1214

1315
is_bool = lib.is_bool
1416

@@ -447,5 +449,16 @@ def is_inferred_bool_dtype(arr: ArrayLike) -> bool:
447449
if dtype == np.dtype(bool):
448450
return True
449451
elif dtype == np.dtype("object"):
450-
return lib.is_bool_array(arr)
452+
result = lib.is_bool_array(arr)
453+
if result:
454+
# GH#46188
455+
warnings.warn(
456+
"In a future version, object-dtype columns with all-bool values "
457+
"will not be included in reductions with bool_only=True. "
458+
"Explicitly cast to bool dtype instead.",
459+
FutureWarning,
460+
stacklevel=find_stack_level(),
461+
)
462+
return result
463+
451464
return False

pandas/tests/frame/test_reductions.py

+16-7
Original file line numberDiff line numberDiff line change
@@ -1219,6 +1219,8 @@ def test_any_all_object(self):
12191219
assert result is False
12201220

12211221
def test_any_all_object_bool_only(self):
1222+
msg = "object-dtype columns with all-bool values"
1223+
12221224
df = DataFrame({"A": ["foo", 2], "B": [True, False]}).astype(object)
12231225
df._consolidate_inplace()
12241226
df["C"] = Series([True, True])
@@ -1228,29 +1230,36 @@ def test_any_all_object_bool_only(self):
12281230

12291231
# The underlying bug is in DataFrame._get_bool_data, so we check
12301232
# that while we're here
1231-
res = df._get_bool_data()
1233+
with tm.assert_produces_warning(FutureWarning, match=msg):
1234+
res = df._get_bool_data()
12321235
expected = df[["B", "C"]]
12331236
tm.assert_frame_equal(res, expected)
12341237

1235-
res = df.all(bool_only=True, axis=0)
1238+
with tm.assert_produces_warning(FutureWarning, match=msg):
1239+
res = df.all(bool_only=True, axis=0)
12361240
expected = Series([False, True], index=["B", "C"])
12371241
tm.assert_series_equal(res, expected)
12381242

12391243
# operating on a subset of columns should not produce a _larger_ Series
1240-
res = df[["B", "C"]].all(bool_only=True, axis=0)
1244+
with tm.assert_produces_warning(FutureWarning, match=msg):
1245+
res = df[["B", "C"]].all(bool_only=True, axis=0)
12411246
tm.assert_series_equal(res, expected)
12421247

1243-
assert not df.all(bool_only=True, axis=None)
1248+
with tm.assert_produces_warning(FutureWarning, match=msg):
1249+
assert not df.all(bool_only=True, axis=None)
12441250

1245-
res = df.any(bool_only=True, axis=0)
1251+
with tm.assert_produces_warning(FutureWarning, match=msg):
1252+
res = df.any(bool_only=True, axis=0)
12461253
expected = Series([True, True], index=["B", "C"])
12471254
tm.assert_series_equal(res, expected)
12481255

12491256
# operating on a subset of columns should not produce a _larger_ Series
1250-
res = df[["B", "C"]].any(bool_only=True, axis=0)
1257+
with tm.assert_produces_warning(FutureWarning, match=msg):
1258+
res = df[["B", "C"]].any(bool_only=True, axis=0)
12511259
tm.assert_series_equal(res, expected)
12521260

1253-
assert df.any(bool_only=True, axis=None)
1261+
with tm.assert_produces_warning(FutureWarning, match=msg):
1262+
assert df.any(bool_only=True, axis=None)
12541263

12551264
@pytest.mark.parametrize("method", ["any", "all"])
12561265
def test_any_all_level_axis_none_raises(self, method):

pandas/tests/internals/test_internals.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -784,14 +784,16 @@ def test_get_numeric_data(self):
784784
)
785785

786786
def test_get_bool_data(self):
787+
msg = "object-dtype columns with all-bool values"
787788
mgr = create_mgr(
788789
"int: int; float: float; complex: complex;"
789790
"str: object; bool: bool; obj: object; dt: datetime",
790791
item_shape=(3,),
791792
)
792793
mgr.iset(6, np.array([True, False, True], dtype=np.object_))
793794

794-
bools = mgr.get_bool_data()
795+
with tm.assert_produces_warning(FutureWarning, match=msg):
796+
bools = mgr.get_bool_data()
795797
tm.assert_index_equal(bools.items, Index(["bool", "dt"]))
796798
tm.assert_almost_equal(
797799
mgr.iget(mgr.items.get_loc("bool")).internal_values(),
@@ -805,7 +807,8 @@ def test_get_bool_data(self):
805807
)
806808

807809
# Check sharing
808-
bools2 = mgr.get_bool_data(copy=True)
810+
with tm.assert_produces_warning(FutureWarning, match=msg):
811+
bools2 = mgr.get_bool_data(copy=True)
809812
bools2.iset(0, np.array([False, True, False]))
810813
tm.assert_numpy_array_equal(
811814
mgr.iget(mgr.items.get_loc("bool")).internal_values(),

0 commit comments

Comments
 (0)