pandas-dev · jreback · May 5, 2021 · Apr 13, 2021 · Apr 18, 2021 · Apr 18, 2021
diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi
@@ -75,6 +75,8 @@ def maybe_convert_numeric(
     na_values: set,
     convert_empty: bool = True,
     coerce_numeric: bool = False,
+    convert_to_nullable_integer: bool = False,
+    convert_to_floating_array: bool = False
 ) -> np.ndarray: ...
 
 # TODO: restrict `arr`?

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -2007,7 +2007,9 @@ def maybe_convert_numeric(
     set na_values,
     bint convert_empty=True,
     bint coerce_numeric=False,
-) -> ndarray:
+    bint convert_to_nullable_integer=False,
+    bint convert_to_floating_array=False,
+) -> "ArrayLike":
     """
     Convert object array to a numeric array if possible.
 
@@ -2031,10 +2033,15 @@ def maybe_convert_numeric(
         numeric array has no suitable numerical dtype to return (i.e. uint64,
         int32, uint8). If set to False, the original object array will be
         returned. Otherwise, a ValueError will be raised.
-
+    convert_to_nullable_integer : bool, default False
+        If an array-like object contains only integer values (and NaN) is
+        encountered, whether to convert and return an IntegerArray.
+    convert_to_floating_array : bool, default False
+        If an array-like object contains only float values (and NaN) is
+        encountered, whether to convert and return an FloatingArray.
     Returns
     -------
-    np.ndarray
+    np.ndarray or ExtensionArray
         Array of converted object values to numerical ones.
     """
     if len(values) == 0:
@@ -2062,21 +2069,39 @@ def maybe_convert_numeric(
         ndarray[int64_t] ints = np.empty(n, dtype='i8')
         ndarray[uint64_t] uints = np.empty(n, dtype='u8')
         ndarray[uint8_t] bools = np.empty(n, dtype='u1')
+        ndarray[uint8_t] mask = np.zeros(n, dtype="u1")
         float64_t fval
 
     for i in range(n):
         val = values[i]
+        # We only want to disable NaNs showing as float if
+        # a) convert_to_nullable_integer = True
+        # b) no floats have been seen ( assuming an int shows up later )
+        # However, if no ints present (all null array), we need to return floats
+        allow_null_in_int = convert_to_nullable_integer and not seen.float_
 
         if val.__hash__ is not None and val in na_values:
-            seen.saw_null()
+            if allow_null_in_int:
+                seen.null_ = True
+                mask[i] = 1
+            else:
+                if convert_to_floating_array:
+                    mask[i] = 1
+                seen.saw_null()
             floats[i] = complexes[i] = NaN
         elif util.is_float_object(val):
             fval = val
             if fval != fval:
                 seen.null_ = True
-
+                if allow_null_in_int:
+                    mask[i] = 1
+                else:
+                    if convert_to_floating_array:
+                        mask[i] = 1
+                    seen.float_ = True
+            else:
+                seen.float_ = True
             floats[i] = complexes[i] = fval
-            seen.float_ = True
         elif util.is_integer_object(val):
             floats[i] = complexes[i] = val
 
@@ -2099,7 +2124,13 @@ def maybe_convert_numeric(
             floats[i] = uints[i] = ints[i] = bools[i] = val
             seen.bool_ = True
         elif val is None or val is C_NA:
-            seen.saw_null()
+            if allow_null_in_int:
+                seen.null_ = True
+                mask[i] = 1
+            else:
+                if convert_to_floating_array:
+                    mask[i] = 1
+                seen.saw_null()
             floats[i] = complexes[i] = NaN
         elif hasattr(val, '__len__') and len(val) == 0:
             if convert_empty or seen.coerce_numeric:
@@ -2123,14 +2154,18 @@ def maybe_convert_numeric(
                 else:
                     if fval != fval:
                         seen.null_ = True
+                        mask[i] = 1
 
                     floats[i] = fval
 
                 if maybe_int:
                     as_int = int(val)
 
                     if as_int in na_values:
-                        seen.saw_null()
+                        mask[i] = 1
+                        seen.null_ = True
+                        if not convert_to_nullable_integer:
+                            seen.float_ = True
                     else:
                         seen.saw_int(as_int)
 
@@ -2160,11 +2195,25 @@ def maybe_convert_numeric(
     if seen.check_uint64_conflict():
         return values
 
+    # This occurs since we disabled float nulls showing as null in anticipation
+    # of seeing ints that were never seen. So then, we return float
+    if convert_to_nullable_integer and seen.null_ and not seen.int_:
+        seen.float_ = True
+
     if seen.complex_:
         return complexes
     elif seen.float_:
+        if seen.null_ and convert_to_floating_array:
+            from pandas.core.arrays import FloatingArray
+            return FloatingArray(floats, mask.view(np.bool_))
         return floats
     elif seen.int_:
+        if seen.null_ and convert_to_nullable_integer:
+            from pandas.core.arrays import IntegerArray
+            if seen.uint_:
+                return IntegerArray(uints, mask.view(np.bool_))
+            else:
+                return IntegerArray(ints, mask.view(np.bool_))
         if seen.uint_:
             return uints
         else:

diff --git a/pandas/_libs/ops.pyi b/pandas/_libs/ops.pyi
@@ -39,5 +39,6 @@ def vec_binop(
 def maybe_convert_bool(
     arr: np.ndarray,  # np.ndarray[object]
     true_values=...,
-    false_values=...
+    false_values=...,
+    convert_to_nullable_boolean: bool = True,
 ) -> np.ndarray: ...
diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx
@@ -24,10 +24,7 @@ import_array()
 
 
 from pandas._libs.missing cimport checknull
-from pandas._libs.util cimport (
-    UINT8_MAX,
-    is_nan,
-)
+from pandas._libs.util cimport is_nan
 
 
 @cython.wraparound(False)
@@ -258,17 +255,20 @@ def vec_binop(object[:] left, object[:] right, object op) -> ndarray:
 
 
 def maybe_convert_bool(ndarray[object] arr,
-                       true_values=None, false_values=None) -> ndarray:
+                       true_values=None,
+                       false_values=None,
+                       convert_to_nullable_boolean=False) -> "ArrayLike":
     cdef:
         Py_ssize_t i, n
         ndarray[uint8_t] result
+        ndarray[uint8_t] mask
         object val
         set true_vals, false_vals
-        int na_count = 0
+        bint has_na = False
 
     n = len(arr)
     result = np.empty(n, dtype=np.uint8)
-
+    mask = np.zeros(n, dtype=np.uint8)
     # the defaults
     true_vals = {'True', 'TRUE', 'true'}
     false_vals = {'False', 'FALSE', 'false'}
@@ -292,15 +292,19 @@ def maybe_convert_bool(ndarray[object] arr,
         elif val in false_vals:
             result[i] = 0
         elif isinstance(val, float):
-            result[i] = UINT8_MAX
-            na_count += 1
+            mask[i] = 1
+            result[i] = 0  # Value here doesn't matter, will be replaced w/ nan
+            has_na = True
         else:
             return arr
 
-    if na_count > 0:
-        mask = result == UINT8_MAX
-        arr = result.view(np.bool_).astype(object)
-        np.putmask(arr, mask, np.nan)
-        return arr
+    if has_na:
+        if convert_to_nullable_boolean:
+            from pandas.core.arrays import BooleanArray
+            return BooleanArray(result.view(np.bool_), mask.view(np.bool_))
+        else:
+            arr = result.view(np.bool_).astype(object)
+            np.putmask(arr, mask, np.nan)
+            return arr
     else:
         return result.view(np.bool_)
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
@@ -24,6 +24,7 @@
 from pandas._libs import (
     lib,
     missing as libmissing,
+    ops as libops,
 )
 import pandas.util._test_decorators as td
 
@@ -60,7 +61,11 @@
     Timestamp,
 )
 import pandas._testing as tm
-from pandas.core.arrays import IntegerArray
+from pandas.core.arrays import (
+    BooleanArray,
+    FloatingArray,
+    IntegerArray,
+)
 
 
 @pytest.fixture(params=[True, False], ids=str)
@@ -415,6 +420,29 @@ def test_isneginf_scalar(self, value, expected):
         result = libmissing.isneginf_scalar(value)
         assert result is expected
 
+    @pytest.mark.parametrize(
+        "convert_to_nullable_boolean, exp",
+        [
+            (
+                True,
+                BooleanArray(
+                    np.array([True, False], dtype="bool"), np.array([False, True])
+                ),
+            ),
+            (False, np.array([True, np.nan], dtype="object")),
+        ],
+    )
+    def test_maybe_convert_nullable_boolean(self, convert_to_nullable_boolean, exp):
+        # GH 40687
+        arr = np.array([True, np.NaN], dtype=object)
+        result = libops.maybe_convert_bool(
+            arr, set(), convert_to_nullable_boolean=convert_to_nullable_boolean
+        )
+        if convert_to_nullable_boolean:
+            tm.assert_extension_array_equal(result, exp)
+        else:
+            tm.assert_numpy_array_equal(result, exp)
+
     @pytest.mark.parametrize("coerce_numeric", [True, False])
     @pytest.mark.parametrize(
         "infinity", ["inf", "inF", "iNf", "Inf", "iNF", "InF", "INf", "INF"]
@@ -607,6 +635,42 @@ def test_maybe_convert_objects_nullable_integer(self, exp):
 
         tm.assert_extension_array_equal(result, exp)
 
+    @pytest.mark.parametrize(
+        "exp",
+        [
+            IntegerArray(np.array([2, 0], dtype="i8"), np.array([False, True])),
+            IntegerArray(np.array([2, 0], dtype="int64"), np.array([False, True])),
+        ],
+    )
+    def test_maybe_convert_numeric_nullable_integer(self, exp):
+        # GH 40687
+        arr = np.array([2, np.NaN], dtype=object)
+        result = lib.maybe_convert_numeric(arr, set(), convert_to_nullable_integer=True)
+        tm.assert_extension_array_equal(result, exp)
+
+    @pytest.mark.parametrize(
+        "convert_to_floating_array, exp",
+        [
+            (
+                True,
+                FloatingArray(
+                    np.array([2.0, 0.0], dtype="float64"), np.array([False, True])
+                ),
+            ),
+            (False, np.array([2.0, np.nan])),
+        ],
+    )
+    def test_maybe_convert_numeric_floating_array(self, convert_to_floating_array, exp):
+        # GH 40687
+        arr = np.array([2, np.nan], dtype=object)
+        result = lib.maybe_convert_numeric(
+            arr, set(), convert_to_floating_array=convert_to_floating_array
+        )
+        if convert_to_floating_array:
+            tm.assert_extension_array_equal(result, exp)
+        else:
+            tm.assert_numpy_array_equal(result, exp)
+
     def test_maybe_convert_objects_bool_nan(self):
         # GH32146
         ind = Index([True, False, np.nan], dtype=object)