Skip to content

Commit 663147e

Browse files
authored
BUG: Nullable _get_common_dtype match non-nullable behavior (#46379)
* BUG: Nullable _get_common_dtype match non-nullable behavior * Whatsnew
1 parent 49937b9 commit 663147e

File tree

6 files changed

+28
-67
lines changed

6 files changed

+28
-67
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,7 @@ Reshaping
463463
- Bug in :func:`concat` between a :class:`Series` with integer dtype and another with :class:`CategoricalDtype` with integer categories and containing ``NaN`` values casting to object dtype instead of ``float64`` (:issue:`45359`)
464464
- Bug in :func:`get_dummies` that selected object and categorical dtypes but not string (:issue:`44965`)
465465
- Bug in :meth:`DataFrame.align` when aligning a :class:`MultiIndex` to a :class:`Series` with another :class:`MultiIndex` (:issue:`46001`)
466+
- Bug in concanenation with ``IntegerDtype``, or ``FloatingDtype`` arrays where the resulting dtype did not mirror the behavior of the non-nullable dtypes (:issue:`46379`)
466467
-
467468

468469
Sparse

pandas/core/arrays/boolean.py

-12
Original file line numberDiff line numberDiff line change
@@ -137,18 +137,6 @@ def __from_arrow__(
137137
else:
138138
return BooleanArray._concat_same_type(results)
139139

140-
def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
141-
# Handle only boolean + np.bool_ -> boolean, since other cases like
142-
# Int64 + boolean -> Int64 will be handled by the other type
143-
if all(
144-
isinstance(t, BooleanDtype)
145-
or (isinstance(t, np.dtype) and (np.issubdtype(t, np.bool_)))
146-
for t in dtypes
147-
):
148-
return BooleanDtype()
149-
else:
150-
return None
151-
152140

153141
def coerce_to_array(
154142
values, mask=None, copy: bool = False

pandas/core/arrays/floating.py

-16
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22

33
import numpy as np
44

5-
from pandas._typing import DtypeObj
6-
75
from pandas.core.dtypes.common import is_float_dtype
86
from pandas.core.dtypes.dtypes import register_extension_dtype
97

@@ -37,20 +35,6 @@ def construct_array_type(cls) -> type[FloatingArray]:
3735
"""
3836
return FloatingArray
3937

40-
def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
41-
# for now only handle other floating types
42-
if not all(isinstance(t, FloatingDtype) for t in dtypes):
43-
return None
44-
np_dtype = np.find_common_type(
45-
# error: Item "ExtensionDtype" of "Union[Any, ExtensionDtype]" has no
46-
# attribute "numpy_dtype"
47-
[t.numpy_dtype for t in dtypes], # type: ignore[union-attr]
48-
[],
49-
)
50-
if np.issubdtype(np_dtype, np.floating):
51-
return FLOAT_STR_TO_DTYPE[str(np_dtype)]
52-
return None
53-
5438
@classmethod
5539
def _str_to_dtype_mapping(cls):
5640
return FLOAT_STR_TO_DTYPE

pandas/core/arrays/integer.py

-35
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,9 @@
22

33
import numpy as np
44

5-
from pandas._typing import DtypeObj
6-
75
from pandas.core.dtypes.base import register_extension_dtype
86
from pandas.core.dtypes.common import is_integer_dtype
97

10-
from pandas.core.arrays.masked import BaseMaskedDtype
118
from pandas.core.arrays.numeric import (
129
NumericArray,
1310
NumericDtype,
@@ -38,38 +35,6 @@ def construct_array_type(cls) -> type[IntegerArray]:
3835
"""
3936
return IntegerArray
4037

41-
def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
42-
# we only handle nullable EA dtypes and numeric numpy dtypes
43-
if not all(
44-
isinstance(t, BaseMaskedDtype)
45-
or (
46-
isinstance(t, np.dtype)
47-
and (np.issubdtype(t, np.number) or np.issubdtype(t, np.bool_))
48-
)
49-
for t in dtypes
50-
):
51-
return None
52-
np_dtype = np.find_common_type(
53-
# error: List comprehension has incompatible type List[Union[Any,
54-
# dtype, ExtensionDtype]]; expected List[Union[dtype, None, type,
55-
# _SupportsDtype, str, Tuple[Any, Union[int, Sequence[int]]],
56-
# List[Any], _DtypeDict, Tuple[Any, Any]]]
57-
[
58-
t.numpy_dtype # type: ignore[misc]
59-
if isinstance(t, BaseMaskedDtype)
60-
else t
61-
for t in dtypes
62-
],
63-
[],
64-
)
65-
if np.issubdtype(np_dtype, np.integer):
66-
return INT_STR_TO_DTYPE[str(np_dtype)]
67-
elif np.issubdtype(np_dtype, np.floating):
68-
from pandas.core.arrays.floating import FLOAT_STR_TO_DTYPE
69-
70-
return FLOAT_STR_TO_DTYPE[str(np_dtype)]
71-
return None
72-
7338
@classmethod
7439
def _str_to_dtype_mapping(cls):
7540
return INT_STR_TO_DTYPE

pandas/core/dtypes/dtypes.py

+19
Original file line numberDiff line numberDiff line change
@@ -1437,3 +1437,22 @@ def from_numpy_dtype(cls, dtype: np.dtype) -> BaseMaskedDtype:
14371437
return FLOAT_STR_TO_DTYPE[dtype.name]
14381438
else:
14391439
raise NotImplementedError(dtype)
1440+
1441+
def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
1442+
# We unwrap any masked dtypes, find the common dtype we would use
1443+
# for that, then re-mask the result.
1444+
from pandas.core.dtypes.cast import find_common_type
1445+
1446+
new_dtype = find_common_type(
1447+
[
1448+
dtype.numpy_dtype if isinstance(dtype, BaseMaskedDtype) else dtype
1449+
for dtype in dtypes
1450+
]
1451+
)
1452+
if not isinstance(new_dtype, np.dtype):
1453+
# If we ever support e.g. Masked[DatetimeArray] then this will change
1454+
return None
1455+
try:
1456+
return type(self).from_numpy_dtype(new_dtype)
1457+
except (KeyError, NotImplementedError):
1458+
return None

pandas/tests/arrays/integer/test_concat.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,13 @@
1515
(["UInt8", "Int8"], "Int16"),
1616
(["Int32", "UInt32"], "Int64"),
1717
(["Int64", "UInt64"], "Float64"),
18-
(["Int64", "boolean"], "Int64"),
19-
(["UInt8", "boolean"], "UInt8"),
18+
(["Int64", "boolean"], "object"),
19+
(["UInt8", "boolean"], "object"),
2020
],
2121
)
2222
def test_concat_series(to_concat_dtypes, result_dtype):
23+
# we expect the same dtypes as we would get with non-masked inputs,
24+
# just masked where available.
2325

2426
result = pd.concat([pd.Series([0, 1, pd.NA], dtype=t) for t in to_concat_dtypes])
2527
expected = pd.concat([pd.Series([0, 1, pd.NA], dtype=object)] * 2).astype(
@@ -47,11 +49,13 @@ def test_concat_series(to_concat_dtypes, result_dtype):
4749
(["UInt8", "int8"], "Int16"),
4850
(["Int32", "uint32"], "Int64"),
4951
(["Int64", "uint64"], "Float64"),
50-
(["Int64", "bool"], "Int64"),
51-
(["UInt8", "bool"], "UInt8"),
52+
(["Int64", "bool"], "object"),
53+
(["UInt8", "bool"], "object"),
5254
],
5355
)
5456
def test_concat_series_with_numpy(to_concat_dtypes, result_dtype):
57+
# we expect the same dtypes as we would get with non-masked inputs,
58+
# just masked where available.
5559

5660
s1 = pd.Series([0, 1, pd.NA], dtype=to_concat_dtypes[0])
5761
s2 = pd.Series(np.array([0, 1], dtype=to_concat_dtypes[1]))

0 commit comments

Comments
 (0)