BUG: Nullable _get_common_dtype match non-nullable behavior (#46379)

jbrockmendel · web-flow · commit 663147edd35b · 2022-03-16T19:03:30.000-07:00
* BUG: Nullable _get_common_dtype match non-nullable behavior

* Whatsnew
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -463,6 +463,7 @@ Reshaping
 - Bug in :func:`concat` between a :class:`Series` with integer dtype and another with :class:`CategoricalDtype` with integer categories and containing ``NaN`` values casting to object dtype instead of ``float64`` (:issue:`45359`)
 - Bug in :func:`get_dummies` that selected object and categorical dtypes but not string (:issue:`44965`)
 - Bug in :meth:`DataFrame.align` when aligning a :class:`MultiIndex` to a :class:`Series` with another :class:`MultiIndex` (:issue:`46001`)
+- Bug in concanenation with ``IntegerDtype``, or ``FloatingDtype`` arrays where the resulting dtype did not mirror the behavior of the non-nullable dtypes (:issue:`46379`)
 -
 
 Sparse
diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
@@ -137,18 +137,6 @@ def __from_arrow__(
         else:
             return BooleanArray._concat_same_type(results)
 
-    def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
-        # Handle only boolean + np.bool_ -> boolean, since other cases like
-        # Int64 + boolean -> Int64 will be handled by the other type
-        if all(
-            isinstance(t, BooleanDtype)
-            or (isinstance(t, np.dtype) and (np.issubdtype(t, np.bool_)))
-            for t in dtypes
-        ):
-            return BooleanDtype()
-        else:
-            return None
-
 
 def coerce_to_array(
     values, mask=None, copy: bool = False
diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py
@@ -2,8 +2,6 @@
 
 import numpy as np
 
-from pandas._typing import DtypeObj
-
 from pandas.core.dtypes.common import is_float_dtype
 from pandas.core.dtypes.dtypes import register_extension_dtype
 
@@ -37,20 +35,6 @@ def construct_array_type(cls) -> type[FloatingArray]:
         """
         return FloatingArray
 
-    def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
-        # for now only handle other floating types
-        if not all(isinstance(t, FloatingDtype) for t in dtypes):
-            return None
-        np_dtype = np.find_common_type(
-            # error: Item "ExtensionDtype" of "Union[Any, ExtensionDtype]" has no
-            # attribute "numpy_dtype"
-            [t.numpy_dtype for t in dtypes],  # type: ignore[union-attr]
-            [],
-        )
-        if np.issubdtype(np_dtype, np.floating):
-            return FLOAT_STR_TO_DTYPE[str(np_dtype)]
-        return None
-
     @classmethod
     def _str_to_dtype_mapping(cls):
         return FLOAT_STR_TO_DTYPE
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -2,12 +2,9 @@
 
 import numpy as np
 
-from pandas._typing import DtypeObj
-
 from pandas.core.dtypes.base import register_extension_dtype
 from pandas.core.dtypes.common import is_integer_dtype
 
-from pandas.core.arrays.masked import BaseMaskedDtype
 from pandas.core.arrays.numeric import (
     NumericArray,
     NumericDtype,
@@ -38,38 +35,6 @@ def construct_array_type(cls) -> type[IntegerArray]:
         """
         return IntegerArray
 
-    def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
-        # we only handle nullable EA dtypes and numeric numpy dtypes
-        if not all(
-            isinstance(t, BaseMaskedDtype)
-            or (
-                isinstance(t, np.dtype)
-                and (np.issubdtype(t, np.number) or np.issubdtype(t, np.bool_))
-            )
-            for t in dtypes
-        ):
-            return None
-        np_dtype = np.find_common_type(
-            # error: List comprehension has incompatible type List[Union[Any,
-            # dtype, ExtensionDtype]]; expected List[Union[dtype, None, type,
-            # _SupportsDtype, str, Tuple[Any, Union[int, Sequence[int]]],
-            # List[Any], _DtypeDict, Tuple[Any, Any]]]
-            [
-                t.numpy_dtype  # type: ignore[misc]
-                if isinstance(t, BaseMaskedDtype)
-                else t
-                for t in dtypes
-            ],
-            [],
-        )
-        if np.issubdtype(np_dtype, np.integer):
-            return INT_STR_TO_DTYPE[str(np_dtype)]
-        elif np.issubdtype(np_dtype, np.floating):
-            from pandas.core.arrays.floating import FLOAT_STR_TO_DTYPE
-
-            return FLOAT_STR_TO_DTYPE[str(np_dtype)]
-        return None
-
     @classmethod
     def _str_to_dtype_mapping(cls):
         return INT_STR_TO_DTYPE
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
@@ -1437,3 +1437,22 @@ def from_numpy_dtype(cls, dtype: np.dtype) -> BaseMaskedDtype:
             return FLOAT_STR_TO_DTYPE[dtype.name]
         else:
             raise NotImplementedError(dtype)
+
+    def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
+        # We unwrap any masked dtypes, find the common dtype we would use
+        #  for that, then re-mask the result.
+        from pandas.core.dtypes.cast import find_common_type
+
+        new_dtype = find_common_type(
+            [
+                dtype.numpy_dtype if isinstance(dtype, BaseMaskedDtype) else dtype
+                for dtype in dtypes
+            ]
+        )
+        if not isinstance(new_dtype, np.dtype):
+            # If we ever support e.g. Masked[DatetimeArray] then this will change
+            return None
+        try:
+            return type(self).from_numpy_dtype(new_dtype)
+        except (KeyError, NotImplementedError):
+            return None
diff --git a/pandas/tests/arrays/integer/test_concat.py b/pandas/tests/arrays/integer/test_concat.py
@@ -15,11 +15,13 @@
         (["UInt8", "Int8"], "Int16"),
         (["Int32", "UInt32"], "Int64"),
         (["Int64", "UInt64"], "Float64"),
-        (["Int64", "boolean"], "Int64"),
-        (["UInt8", "boolean"], "UInt8"),
+        (["Int64", "boolean"], "object"),
+        (["UInt8", "boolean"], "object"),
     ],
 )
 def test_concat_series(to_concat_dtypes, result_dtype):
+    # we expect the same dtypes as we would get with non-masked inputs,
+    #  just masked where available.
 
     result = pd.concat([pd.Series([0, 1, pd.NA], dtype=t) for t in to_concat_dtypes])
     expected = pd.concat([pd.Series([0, 1, pd.NA], dtype=object)] * 2).astype(
@@ -47,11 +49,13 @@ def test_concat_series(to_concat_dtypes, result_dtype):
         (["UInt8", "int8"], "Int16"),
         (["Int32", "uint32"], "Int64"),
         (["Int64", "uint64"], "Float64"),
-        (["Int64", "bool"], "Int64"),
-        (["UInt8", "bool"], "UInt8"),
+        (["Int64", "bool"], "object"),
+        (["UInt8", "bool"], "object"),
     ],
 )
 def test_concat_series_with_numpy(to_concat_dtypes, result_dtype):
+    # we expect the same dtypes as we would get with non-masked inputs,
+    #  just masked where available.
 
     s1 = pd.Series([0, 1, pd.NA], dtype=to_concat_dtypes[0])
     s2 = pd.Series(np.array([0, 1], dtype=to_concat_dtypes[1]))

Original file line number	Diff line number	Diff line change
`@@ -463,6 +463,7 @@ Reshaping`
`463`	`463`	- Bug in :func:`concat` between a :class:`Series` with integer dtype and another with :class:`CategoricalDtype` with integer categories and containing ``NaN`` values casting to object dtype instead of ``float64`` (:issue:`45359`)
`464`	`464`	- Bug in :func:`get_dummies` that selected object and categorical dtypes but not string (:issue:`44965`)
`465`	`465`	- Bug in :meth:`DataFrame.align` when aligning a :class:`MultiIndex` to a :class:`Series` with another :class:`MultiIndex` (:issue:`46001`)
	`466`	+- Bug in concanenation with ``IntegerDtype``, or ``FloatingDtype`` arrays where the resulting dtype did not mirror the behavior of the non-nullable dtypes (:issue:`46379`)
`466`	`467`	`-`
`467`	`468`
`468`	`469`	`Sparse`