CLN: remove pandas_dtype kwd from infer_dtype_from_x (pandas-dev#53064)

jbrockmendel · im-vinicius · commit d6c63c59b362 · 2023-07-08T12:28:03.000+02:00
* REF: avoid object dtype in mask_missing

* REF: remove pandas_dtype kwarg from infer_dtype_from

* REF: remove pandas_dtype kwarg from infer_dtype_from_scalar

* mypy fixup
diff --git a/pandas/core/array_algos/putmask.py b/pandas/core/array_algos/putmask.py
@@ -136,7 +136,7 @@ def setitem_datetimelike_compat(values: np.ndarray, num_set: int, other):
     other : Any
     """
     if values.dtype == object:
-        dtype, _ = infer_dtype_from(other, pandas_dtype=True)
+        dtype, _ = infer_dtype_from(other)
 
         if isinstance(dtype, np.dtype) and dtype.kind in "mM":
             # https://github.com/numpy/numpy/issues/12550
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -631,7 +631,7 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan):
 
     # returns tuple of (dtype, fill_value)
     if issubclass(dtype.type, np.datetime64):
-        inferred, fv = infer_dtype_from_scalar(fill_value, pandas_dtype=True)
+        inferred, fv = infer_dtype_from_scalar(fill_value)
         if inferred == dtype:
             return dtype, fv
 
@@ -645,7 +645,7 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan):
             return _dtype_obj, fill_value
 
     elif issubclass(dtype.type, np.timedelta64):
-        inferred, fv = infer_dtype_from_scalar(fill_value, pandas_dtype=True)
+        inferred, fv = infer_dtype_from_scalar(fill_value)
         if inferred == dtype:
             return dtype, fv
 
@@ -735,33 +735,26 @@ def _ensure_dtype_type(value, dtype: np.dtype):
     return dtype.type(value)
 
 
-def infer_dtype_from(val, pandas_dtype: bool = False) -> tuple[DtypeObj, Any]:
+def infer_dtype_from(val) -> tuple[DtypeObj, Any]:
     """
     Interpret the dtype from a scalar or array.
 
     Parameters
     ----------
     val : object
-    pandas_dtype : bool, default False
-        whether to infer dtype including pandas extension types.
-        If False, scalar/array belongs to pandas extension types is inferred as
-        object
     """
     if not is_list_like(val):
-        return infer_dtype_from_scalar(val, pandas_dtype=pandas_dtype)
-    return infer_dtype_from_array(val, pandas_dtype=pandas_dtype)
+        return infer_dtype_from_scalar(val)
+    return infer_dtype_from_array(val)
 
 
-def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj, Any]:
+def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]:
     """
     Interpret the dtype from a scalar.
 
     Parameters
     ----------
-    pandas_dtype : bool, default False
-        whether to infer dtype including pandas extension types.
-        If False, scalar belongs to pandas extension types is inferred as
-        object
+    val : object
     """
     dtype: DtypeObj = _dtype_obj
 
@@ -796,11 +789,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj,
             dtype = val.dtype
             # TODO: test with datetime(2920, 10, 1) based on test_replace_dtypes
         else:
-            if pandas_dtype:
-                dtype = DatetimeTZDtype(unit="ns", tz=val.tz)
-            else:
-                # return datetimetz as object
-                return _dtype_obj, val
+            dtype = DatetimeTZDtype(unit="ns", tz=val.tz)
 
     elif isinstance(val, (np.timedelta64, dt.timedelta)):
         try:
@@ -834,12 +823,11 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj,
     elif is_complex(val):
         dtype = np.dtype(np.complex_)
 
-    elif pandas_dtype:
-        if lib.is_period(val):
-            dtype = PeriodDtype(freq=val.freq)
-        elif lib.is_interval(val):
-            subtype = infer_dtype_from_scalar(val.left, pandas_dtype=True)[0]
-            dtype = IntervalDtype(subtype=subtype, closed=val.closed)
+    if lib.is_period(val):
+        dtype = PeriodDtype(freq=val.freq)
+    elif lib.is_interval(val):
+        subtype = infer_dtype_from_scalar(val.left)[0]
+        dtype = IntervalDtype(subtype=subtype, closed=val.closed)
 
     return dtype, val
 
@@ -859,32 +847,18 @@ def dict_compat(d: dict[Scalar, Scalar]) -> dict[Scalar, Scalar]:
     return {maybe_box_datetimelike(key): value for key, value in d.items()}
 
 
-def infer_dtype_from_array(
-    arr, pandas_dtype: bool = False
-) -> tuple[DtypeObj, ArrayLike]:
+def infer_dtype_from_array(arr) -> tuple[DtypeObj, ArrayLike]:
     """
     Infer the dtype from an array.
 
     Parameters
     ----------
     arr : array
-    pandas_dtype : bool, default False
-        whether to infer dtype including pandas extension types.
-        If False, array belongs to pandas extension types
-        is inferred as object
 
     Returns
     -------
-    tuple (numpy-compat/pandas-compat dtype, array)
-
-    Notes
-    -----
-    if pandas_dtype=False. these infer to numpy dtypes
-    exactly with the exception that mixed / object dtypes
-    are not coerced by stringifying or conversion
+    tuple (pandas-compat dtype, array)
 
-    if pandas_dtype=True. datetime64tz-aware/categorical
-    types will retain there character.
 
     Examples
     --------
@@ -901,7 +875,7 @@ def infer_dtype_from_array(
         raise TypeError("'arr' must be list-like")
 
     arr_dtype = getattr(arr, "dtype", None)
-    if pandas_dtype and isinstance(arr_dtype, ExtensionDtype):
+    if isinstance(arr_dtype, ExtensionDtype):
         return arr.dtype, arr
 
     elif isinstance(arr, ABCSeries):
@@ -1303,7 +1277,7 @@ def find_result_type(left: ArrayLike, right: Any) -> DtypeObj:
         new_dtype = ensure_dtype_can_hold_na(left.dtype)
 
     else:
-        dtype, _ = infer_dtype_from(right, pandas_dtype=True)
+        dtype, _ = infer_dtype_from(right)
 
         new_dtype = find_common_type([left.dtype, dtype])
 
@@ -1466,7 +1440,7 @@ def construct_1d_arraylike_from_scalar(
 
     if dtype is None:
         try:
-            dtype, value = infer_dtype_from_scalar(value, pandas_dtype=True)
+            dtype, value = infer_dtype_from_scalar(value)
         except OutOfBoundsDatetime:
             dtype = _dtype_obj
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -824,7 +824,7 @@ def __init__(
             columns = ensure_index(columns)
 
             if not dtype:
-                dtype, _ = infer_dtype_from_scalar(data, pandas_dtype=True)
+                dtype, _ = infer_dtype_from_scalar(data)
 
             # For data is a scalar extension dtype
             if isinstance(dtype, ExtensionDtype):
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -6097,7 +6097,7 @@ def _find_common_type_compat(self, target) -> DtypeObj:
         Implementation of find_common_type that adjusts for Index-specific
         special cases.
         """
-        target_dtype, _ = infer_dtype_from(target, pandas_dtype=True)
+        target_dtype, _ = infer_dtype_from(target)
 
         # special case: if one dtype is uint64 and the other a signed int, return object
         # See https://github.com/pandas-dev/pandas/issues/26778 for discussion
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
@@ -558,7 +558,7 @@ def _maybe_convert_i8(self, key):
 
         if scalar:
             # Timestamp/Timedelta
-            key_dtype, key_i8 = infer_dtype_from_scalar(key, pandas_dtype=True)
+            key_dtype, key_i8 = infer_dtype_from_scalar(key)
             if lib.is_period(key):
                 key_i8 = key.ordinal
             elif isinstance(key_i8, Timestamp):
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
@@ -84,6 +84,7 @@
     new_block,
     to_native_types,
 )
+from pandas.core.internals.managers import make_na_array
 
 if TYPE_CHECKING:
     from pandas._typing import (
@@ -665,13 +666,8 @@ def _make_na_array(self, fill_value=None, use_na_proxy: bool = False):
             fill_value = np.nan
 
         dtype, fill_value = infer_dtype_from_scalar(fill_value)
-        # error: Argument "dtype" to "empty" has incompatible type "Union[dtype[Any],
-        # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str,
-        # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any],
-        # _DTypeDict, Tuple[Any, Any]]]"
-        values = np.empty(self.shape_proper[0], dtype=dtype)  # type: ignore[arg-type]
-        values.fill(fill_value)
-        return values
+        array_values = make_na_array(dtype, self.shape_proper[:1], fill_value)
+        return array_values
 
     def _equal_values(self, other) -> bool:
         """
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -921,7 +921,7 @@ def _make_na_block(
 
         shape = (len(placement), self.shape[1])
 
-        dtype, fill_value = infer_dtype_from_scalar(fill_value, pandas_dtype=True)
+        dtype, fill_value = infer_dtype_from_scalar(fill_value)
         block_values = make_na_array(dtype, shape, fill_value)
         return new_block_2d(block_values, placement=placement)
 
diff --git a/pandas/core/missing.py b/pandas/core/missing.py
@@ -80,11 +80,14 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]:
     #  known to be holdable by arr.
     # When called from Series._single_replace, values_to_mask is tuple or list
     dtype, values_to_mask = infer_dtype_from(values_to_mask)
-    # error: Argument "dtype" to "array" has incompatible type "Union[dtype[Any],
-    # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str,
-    # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any],
-    # _DTypeDict, Tuple[Any, Any]]]"
-    values_to_mask = np.array(values_to_mask, dtype=dtype)  # type: ignore[arg-type]
+
+    if isinstance(dtype, np.dtype):
+        values_to_mask = np.array(values_to_mask, dtype=dtype)
+    else:
+        cls = dtype.construct_array_type()
+        if not lib.is_list_like(values_to_mask):
+            values_to_mask = [values_to_mask]
+        values_to_mask = cls._from_sequence(values_to_mask, dtype=dtype, copy=False)
 
     potential_na = False
     if is_object_dtype(arr.dtype):
diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py
@@ -25,11 +25,6 @@
 )
 
 
-@pytest.fixture(params=[True, False])
-def pandas_dtype(request):
-    return request.param
-
-
 def test_infer_dtype_from_int_scalar(any_int_numpy_dtype):
     # Test that infer_dtype_from_scalar is
     # returning correct dtype for int and float.
@@ -81,36 +76,32 @@ def test_infer_dtype_from_timedelta(data):
 
 
 @pytest.mark.parametrize("freq", ["M", "D"])
-def test_infer_dtype_from_period(freq, pandas_dtype):
+def test_infer_dtype_from_period(freq):
     p = Period("2011-01-01", freq=freq)
-    dtype, val = infer_dtype_from_scalar(p, pandas_dtype=pandas_dtype)
+    dtype, val = infer_dtype_from_scalar(p)
 
-    if pandas_dtype:
-        exp_dtype = f"period[{freq}]"
-    else:
-        exp_dtype = np.object_
+    exp_dtype = f"period[{freq}]"
 
     assert dtype == exp_dtype
     assert val == p
 
 
-@pytest.mark.parametrize(
-    "data", [date(2000, 1, 1), "foo", Timestamp(1, tz="US/Eastern")]
-)
-def test_infer_dtype_misc(data):
-    dtype, val = infer_dtype_from_scalar(data)
+def test_infer_dtype_misc():
+    dt = date(2000, 1, 1)
+    dtype, val = infer_dtype_from_scalar(dt)
     assert dtype == np.object_
 
+    ts = Timestamp(1, tz="US/Eastern")
+    dtype, val = infer_dtype_from_scalar(ts)
+    assert dtype == "datetime64[ns, US/Eastern]"
+
 
 @pytest.mark.parametrize("tz", ["UTC", "US/Eastern", "Asia/Tokyo"])
-def test_infer_from_scalar_tz(tz, pandas_dtype):
+def test_infer_from_scalar_tz(tz):
     dt = Timestamp(1, tz=tz)
-    dtype, val = infer_dtype_from_scalar(dt, pandas_dtype=pandas_dtype)
+    dtype, val = infer_dtype_from_scalar(dt)
 
-    if pandas_dtype:
-        exp_dtype = f"datetime64[ns, {tz}]"
-    else:
-        exp_dtype = np.object_
+    exp_dtype = f"datetime64[ns, {tz}]"
 
     assert dtype == exp_dtype
     assert val == dt
@@ -126,11 +117,11 @@ def test_infer_from_scalar_tz(tz, pandas_dtype):
         (Timedelta(0), Timedelta(1), "timedelta64[ns]"),
     ],
 )
-def test_infer_from_interval(left, right, subtype, closed, pandas_dtype):
+def test_infer_from_interval(left, right, subtype, closed):
     # GH 30337
     interval = Interval(left, right, closed)
-    result_dtype, result_value = infer_dtype_from_scalar(interval, pandas_dtype)
-    expected_dtype = f"interval[{subtype}, {closed}]" if pandas_dtype else np.object_
+    result_dtype, result_value = infer_dtype_from_scalar(interval)
+    expected_dtype = f"interval[{subtype}, {closed}]"
     assert result_dtype == expected_dtype
     assert result_value == interval
 
@@ -143,54 +134,49 @@ def test_infer_dtype_from_scalar_errors():
 
 
 @pytest.mark.parametrize(
-    "value, expected, pandas_dtype",
+    "value, expected",
     [
-        ("foo", np.object_, False),
-        (b"foo", np.object_, False),
-        (1, np.int64, False),
-        (1.5, np.float_, False),
-        (np.datetime64("2016-01-01"), np.dtype("M8[ns]"), False),
-        (Timestamp("20160101"), np.dtype("M8[ns]"), False),
-        (Timestamp("20160101", tz="UTC"), np.object_, False),
-        (Timestamp("20160101", tz="UTC"), "datetime64[ns, UTC]", True),
+        ("foo", np.object_),
+        (b"foo", np.object_),
+        (1, np.int64),
+        (1.5, np.float_),
+        (np.datetime64("2016-01-01"), np.dtype("M8[ns]")),
+        (Timestamp("20160101"), np.dtype("M8[ns]")),
+        (Timestamp("20160101", tz="UTC"), "datetime64[ns, UTC]"),
     ],
 )
-def test_infer_dtype_from_scalar(value, expected, pandas_dtype):
-    dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=pandas_dtype)
+def test_infer_dtype_from_scalar(value, expected):
+    dtype, _ = infer_dtype_from_scalar(value)
     assert is_dtype_equal(dtype, expected)
 
     with pytest.raises(TypeError, match="must be list-like"):
-        infer_dtype_from_array(value, pandas_dtype=pandas_dtype)
+        infer_dtype_from_array(value)
 
 
 @pytest.mark.parametrize(
-    "arr, expected, pandas_dtype",
+    "arr, expected",
     [
-        ([1], np.int_, False),
-        (np.array([1], dtype=np.int64), np.int64, False),
-        ([np.nan, 1, ""], np.object_, False),
-        (np.array([[1.0, 2.0]]), np.float_, False),
-        (Categorical(list("aabc")), np.object_, False),
-        (Categorical([1, 2, 3]), np.int64, False),
-        (Categorical(list("aabc")), "category", True),
-        (Categorical([1, 2, 3]), "category", True),
-        (date_range("20160101", periods=3), np.dtype("=M8[ns]"), False),
+        ([1], np.int_),
+        (np.array([1], dtype=np.int64), np.int64),
+        ([np.nan, 1, ""], np.object_),
+        (np.array([[1.0, 2.0]]), np.float_),
+        (Categorical(list("aabc")), "category"),
+        (Categorical([1, 2, 3]), "category"),
+        (date_range("20160101", periods=3), np.dtype("=M8[ns]")),
         (
             date_range("20160101", periods=3, tz="US/Eastern"),
             "datetime64[ns, US/Eastern]",
-            True,
         ),
-        (Series([1.0, 2, 3]), np.float64, False),
-        (Series(list("abc")), np.object_, False),
+        (Series([1.0, 2, 3]), np.float64),
+        (Series(list("abc")), np.object_),
         (
             Series(date_range("20160101", periods=3, tz="US/Eastern")),
             "datetime64[ns, US/Eastern]",
-            True,
         ),
     ],
 )
-def test_infer_dtype_from_array(arr, expected, pandas_dtype):
-    dtype, _ = infer_dtype_from_array(arr, pandas_dtype=pandas_dtype)
+def test_infer_dtype_from_array(arr, expected):
+    dtype, _ = infer_dtype_from_array(arr)
     assert is_dtype_equal(dtype, expected)