REF: maybe_promote refactor/cleanup (#28897)

jbrockmendel · jreback · commit 4ade26b4c4c8 · 2019-10-12T13:14:21.000-04:00
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -393,32 +393,29 @@ def maybe_promote(dtype, fill_value=np.nan):
 
     elif is_float(fill_value):
         if issubclass(dtype.type, np.bool_):
-            dtype = np.object_
+            dtype = np.dtype(np.object_)
+
         elif issubclass(dtype.type, np.integer):
             dtype = np.dtype(np.float64)
-            if not isna(fill_value):
-                fill_value = dtype.type(fill_value)
 
         elif dtype.kind == "f":
-            if not np.can_cast(fill_value, dtype):
-                # e.g. dtype is float32, need float64
-                dtype = np.min_scalar_type(fill_value)
+            mst = np.min_scalar_type(fill_value)
+            if mst > dtype:
+                # e.g. mst is np.float64 and dtype is np.float32
+                dtype = mst
 
         elif dtype.kind == "c":
             mst = np.min_scalar_type(fill_value)
             dtype = np.promote_types(dtype, mst)
 
-            if dtype.kind == "c" and not np.isnan(fill_value):
-                fill_value = dtype.type(fill_value)
-
     elif is_bool(fill_value):
         if not issubclass(dtype.type, np.bool_):
-            dtype = np.object_
-        else:
-            fill_value = np.bool_(fill_value)
+            dtype = np.dtype(np.object_)
+
     elif is_integer(fill_value):
         if issubclass(dtype.type, np.bool_):
             dtype = np.dtype(np.object_)
+
         elif issubclass(dtype.type, np.integer):
             if not np.can_cast(fill_value, dtype):
                 # upcast to prevent overflow
@@ -428,35 +425,20 @@ def maybe_promote(dtype, fill_value=np.nan):
                     # Case where we disagree with numpy
                     dtype = np.dtype(np.object_)
 
-            fill_value = dtype.type(fill_value)
-
-        elif issubclass(dtype.type, np.floating):
-            # check if we can cast
-            if _check_lossless_cast(fill_value, dtype):
-                fill_value = dtype.type(fill_value)
-
-        if dtype.kind in ["c", "f"]:
-            # e.g. if dtype is complex128 and fill_value is 1, we
-            #  want np.complex128(1)
-            fill_value = dtype.type(fill_value)
-
     elif is_complex(fill_value):
         if issubclass(dtype.type, np.bool_):
             dtype = np.dtype(np.object_)
+
         elif issubclass(dtype.type, (np.integer, np.floating)):
             mst = np.min_scalar_type(fill_value)
             dtype = np.promote_types(dtype, mst)
 
         elif dtype.kind == "c":
             mst = np.min_scalar_type(fill_value)
-            if mst > dtype and mst.kind == "c":
+            if mst > dtype:
                 # e.g. mst is np.complex128 and dtype is np.complex64
                 dtype = mst
 
-        if dtype.kind == "c":
-            # make sure we have a np.complex and not python complex
-            fill_value = dtype.type(fill_value)
-
     elif fill_value is None:
         if is_float_dtype(dtype) or is_complex_dtype(dtype):
             fill_value = np.nan
@@ -466,37 +448,48 @@ def maybe_promote(dtype, fill_value=np.nan):
         elif is_datetime_or_timedelta_dtype(dtype):
             fill_value = dtype.type("NaT", "ns")
         else:
-            dtype = np.object_
+            dtype = np.dtype(np.object_)
             fill_value = np.nan
     else:
-        dtype = np.object_
+        dtype = np.dtype(np.object_)
 
     # in case we have a string that looked like a number
     if is_extension_array_dtype(dtype):
         pass
     elif issubclass(np.dtype(dtype).type, (bytes, str)):
-        dtype = np.object_
+        dtype = np.dtype(np.object_)
 
+    fill_value = _ensure_dtype_type(fill_value, dtype)
     return dtype, fill_value
 
 
-def _check_lossless_cast(value, dtype: np.dtype) -> bool:
+def _ensure_dtype_type(value, dtype):
     """
-    Check if we can cast the given value to the given dtype _losslesly_.
+    Ensure that the given value is an instance of the given dtype.
+
+    e.g. if out dtype is np.complex64, we should have an instance of that
+    as opposed to a python complex object.
 
     Parameters
     ----------
     value : object
-    dtype : np.dtype
+    dtype : np.dtype or ExtensionDtype
 
     Returns
     -------
-    bool
+    object
     """
-    casted = dtype.type(value)
-    if casted == value:
-        return True
-    return False
+
+    # Start with exceptions in which we do _not_ cast to numpy types
+    if is_extension_array_dtype(dtype):
+        return value
+    elif dtype == np.object_:
+        return value
+    elif isna(value):
+        # e.g. keep np.nan rather than try to cast to np.float32(np.nan)
+        return value
+
+    return dtype.type(value)
 
 
 def infer_dtype_from(val, pandas_dtype=False):