Skip to content

Commit 4901410

Browse files
authored
ENH: maybe_convert_objects add boolean support with NA (#50047)
* ENH: maybe_convert_objects add boolean support with NA * Fix merge error * Add gh ref * Fix test * Simplify
1 parent 14991df commit 4901410

File tree

3 files changed

+47
-18
lines changed

3 files changed

+47
-18
lines changed

pandas/_libs/lib.pyi

+6-6
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def maybe_convert_objects(
7575
convert_timedelta: Literal[False] = ...,
7676
convert_period: Literal[False] = ...,
7777
convert_interval: Literal[False] = ...,
78-
convert_to_nullable_integer: Literal[False] = ...,
78+
convert_to_nullable_dtype: Literal[False] = ...,
7979
dtype_if_all_nat: DtypeObj | None = ...,
8080
) -> npt.NDArray[np.object_ | np.number]: ...
8181
@overload # both convert_datetime and convert_to_nullable_integer False -> np.ndarray
@@ -88,7 +88,7 @@ def maybe_convert_objects(
8888
convert_timedelta: bool = ...,
8989
convert_period: Literal[False] = ...,
9090
convert_interval: Literal[False] = ...,
91-
convert_to_nullable_integer: Literal[False] = ...,
91+
convert_to_nullable_dtype: Literal[False] = ...,
9292
dtype_if_all_nat: DtypeObj | None = ...,
9393
) -> np.ndarray: ...
9494
@overload
@@ -101,7 +101,7 @@ def maybe_convert_objects(
101101
convert_timedelta: bool = ...,
102102
convert_period: bool = ...,
103103
convert_interval: bool = ...,
104-
convert_to_nullable_integer: Literal[True] = ...,
104+
convert_to_nullable_dtype: Literal[True] = ...,
105105
dtype_if_all_nat: DtypeObj | None = ...,
106106
) -> ArrayLike: ...
107107
@overload
@@ -114,7 +114,7 @@ def maybe_convert_objects(
114114
convert_timedelta: bool = ...,
115115
convert_period: bool = ...,
116116
convert_interval: bool = ...,
117-
convert_to_nullable_integer: bool = ...,
117+
convert_to_nullable_dtype: bool = ...,
118118
dtype_if_all_nat: DtypeObj | None = ...,
119119
) -> ArrayLike: ...
120120
@overload
@@ -127,7 +127,7 @@ def maybe_convert_objects(
127127
convert_timedelta: bool = ...,
128128
convert_period: Literal[True] = ...,
129129
convert_interval: bool = ...,
130-
convert_to_nullable_integer: bool = ...,
130+
convert_to_nullable_dtype: bool = ...,
131131
dtype_if_all_nat: DtypeObj | None = ...,
132132
) -> ArrayLike: ...
133133
@overload
@@ -140,7 +140,7 @@ def maybe_convert_objects(
140140
convert_timedelta: bool = ...,
141141
convert_period: bool = ...,
142142
convert_interval: bool = ...,
143-
convert_to_nullable_integer: bool = ...,
143+
convert_to_nullable_dtype: bool = ...,
144144
dtype_if_all_nat: DtypeObj | None = ...,
145145
) -> ArrayLike: ...
146146
@overload

pandas/_libs/lib.pyx

+17-10
Original file line numberDiff line numberDiff line change
@@ -1309,10 +1309,14 @@ cdef class Seen:
13091309
@property
13101310
def is_bool(self):
13111311
# i.e. not (anything but bool)
1312-
return not (
1313-
self.datetime_ or self.datetimetz_ or self.timedelta_ or self.nat_
1314-
or self.period_ or self.interval_
1315-
or self.numeric_ or self.nan_ or self.null_ or self.object_
1312+
return self.is_bool_or_na and not (self.nan_ or self.null_)
1313+
1314+
@property
1315+
def is_bool_or_na(self):
1316+
# i.e. not (anything but bool or missing values)
1317+
return self.bool_ and not (
1318+
self.datetime_ or self.datetimetz_ or self.nat_ or self.timedelta_
1319+
or self.period_ or self.interval_ or self.numeric_ or self.object_
13161320
)
13171321

13181322

@@ -2335,7 +2339,7 @@ def maybe_convert_objects(ndarray[object] objects,
23352339
bint convert_timedelta=False,
23362340
bint convert_period=False,
23372341
bint convert_interval=False,
2338-
bint convert_to_nullable_integer=False,
2342+
bint convert_to_nullable_dtype=False,
23392343
object dtype_if_all_nat=None) -> "ArrayLike":
23402344
"""
23412345
Type inference function-- convert object array to proper dtype
@@ -2362,9 +2366,9 @@ def maybe_convert_objects(ndarray[object] objects,
23622366
convert_interval : bool, default False
23632367
If an array-like object contains only Interval objects (with matching
23642368
dtypes and closedness) or NaN, whether to convert to IntervalArray.
2365-
convert_to_nullable_integer : bool, default False
2366-
If an array-like object contains only integer values (and NaN) is
2367-
encountered, whether to convert and return an IntegerArray.
2369+
convert_to_nullable_dtype : bool, default False
2370+
If an array-like object contains only integer or boolean values (and NaN) is
2371+
encountered, whether to convert and return an Boolean/IntegerArray.
23682372
dtype_if_all_nat : np.dtype, ExtensionDtype, or None, default None
23692373
Dtype to cast to if we have all-NaT.
23702374

@@ -2446,7 +2450,7 @@ def maybe_convert_objects(ndarray[object] objects,
24462450
seen.int_ = True
24472451
floats[i] = <float64_t>val
24482452
complexes[i] = <double complex>val
2449-
if not seen.null_ or convert_to_nullable_integer:
2453+
if not seen.null_ or convert_to_nullable_dtype:
24502454
seen.saw_int(val)
24512455

24522456
if ((seen.uint_ and seen.sint_) or
@@ -2606,6 +2610,9 @@ def maybe_convert_objects(ndarray[object] objects,
26062610
if seen.is_bool:
26072611
# is_bool property rules out everything else
26082612
return bools.view(np.bool_)
2613+
elif convert_to_nullable_dtype and seen.is_bool_or_na:
2614+
from pandas.core.arrays import BooleanArray
2615+
return BooleanArray(bools.view(np.bool_), mask)
26092616
seen.object_ = True
26102617

26112618
if not seen.object_:
@@ -2617,7 +2624,7 @@ def maybe_convert_objects(ndarray[object] objects,
26172624
elif seen.float_:
26182625
result = floats
26192626
elif seen.int_ or seen.uint_:
2620-
if convert_to_nullable_integer:
2627+
if convert_to_nullable_dtype:
26212628
from pandas.core.arrays import IntegerArray
26222629
if seen.uint_:
26232630
result = IntegerArray(uints, mask)

pandas/tests/dtypes/test_inference.py

+24-2
Original file line numberDiff line numberDiff line change
@@ -859,7 +859,7 @@ def test_maybe_convert_objects_timedelta64_nat(self):
859859
def test_maybe_convert_objects_nullable_integer(self, exp):
860860
# GH27335
861861
arr = np.array([2, np.NaN], dtype=object)
862-
result = lib.maybe_convert_objects(arr, convert_to_nullable_integer=True)
862+
result = lib.maybe_convert_objects(arr, convert_to_nullable_dtype=True)
863863

864864
tm.assert_extension_array_equal(result, exp)
865865

@@ -869,7 +869,7 @@ def test_maybe_convert_objects_nullable_integer(self, exp):
869869
def test_maybe_convert_objects_nullable_none(self, dtype, val):
870870
# GH#50043
871871
arr = np.array([val, None, 3], dtype="object")
872-
result = lib.maybe_convert_objects(arr, convert_to_nullable_integer=True)
872+
result = lib.maybe_convert_objects(arr, convert_to_nullable_dtype=True)
873873
expected = IntegerArray(
874874
np.array([val, 0, 3], dtype=dtype), np.array([False, True, False])
875875
)
@@ -930,6 +930,28 @@ def test_maybe_convert_objects_bool_nan(self):
930930
out = lib.maybe_convert_objects(ind.values, safe=1)
931931
tm.assert_numpy_array_equal(out, exp)
932932

933+
def test_maybe_convert_objects_nullable_boolean(self):
934+
# GH50047
935+
arr = np.array([True, False], dtype=object)
936+
exp = np.array([True, False])
937+
out = lib.maybe_convert_objects(arr, convert_to_nullable_dtype=True)
938+
tm.assert_numpy_array_equal(out, exp)
939+
940+
arr = np.array([True, False, pd.NaT], dtype=object)
941+
exp = np.array([True, False, pd.NaT], dtype=object)
942+
out = lib.maybe_convert_objects(arr, convert_to_nullable_dtype=True)
943+
tm.assert_numpy_array_equal(out, exp)
944+
945+
@pytest.mark.parametrize("val", [None, np.nan])
946+
def test_maybe_convert_objects_nullable_boolean_na(self, val):
947+
# GH50047
948+
arr = np.array([True, False, val], dtype=object)
949+
exp = BooleanArray(
950+
np.array([True, False, False]), np.array([False, False, True])
951+
)
952+
out = lib.maybe_convert_objects(arr, convert_to_nullable_dtype=True)
953+
tm.assert_extension_array_equal(out, exp)
954+
933955
@pytest.mark.parametrize(
934956
"data0",
935957
[

0 commit comments

Comments
 (0)