Skip to content

Commit 42fd7e7

Browse files
ENH: concat of nullable int + bool preserves int dtype (#34985)
1 parent 74f77a1 commit 42fd7e7

File tree

3 files changed

+50
-6
lines changed

3 files changed

+50
-6
lines changed

doc/source/whatsnew/v1.1.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ Other enhancements
322322
- :meth:`DataFrame.hist`, :meth:`Series.hist`, :meth:`core.groupby.DataFrameGroupBy.hist`, and :meth:`core.groupby.SeriesGroupBy.hist` have gained the ``legend`` argument. Set to True to show a legend in the histogram. (:issue:`6279`)
323323
- :func:`concat` and :meth:`~DataFrame.append` now preserve extension dtypes, for example
324324
combining a nullable integer column with a numpy integer column will no longer
325-
result in object dtype but preserve the integer dtype (:issue:`33607`, :issue:`34339`).
325+
result in object dtype but preserve the integer dtype (:issue:`33607`, :issue:`34339`, :issue:`34095`).
326326
- :meth:`~pandas.io.gbq.read_gbq` now allows to disable progress bar (:issue:`33360`).
327327
- :meth:`~pandas.io.gbq.read_gbq` now supports the ``max_results`` kwarg from ``pandas-gbq`` (:issue:`34639`).
328328
- :meth:`DataFrame.cov` and :meth:`Series.cov` now support a new parameter ddof to support delta degrees of freedom as in the corresponding numpy methods (:issue:`34611`).

pandas/core/arrays/integer.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,13 @@ def construct_array_type(cls) -> Type["IntegerArray"]:
9292
return IntegerArray
9393

9494
def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
95-
# for now only handle other integer types
95+
# we only handle nullable EA dtypes and numeric numpy dtypes
9696
if not all(
97-
isinstance(t, _IntegerDtype)
98-
or (isinstance(t, np.dtype) and np.issubdtype(t, np.integer))
97+
isinstance(t, BaseMaskedDtype)
98+
or (
99+
isinstance(t, np.dtype)
100+
and (np.issubdtype(t, np.number) or np.issubdtype(t, np.bool_))
101+
)
99102
for t in dtypes
100103
):
101104
return None

pandas/tests/arrays/integer/test_concat.py

+43-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import numpy as np
12
import pytest
23

34
import pandas as pd
@@ -15,12 +16,52 @@
1516
(["Int32", "UInt32"], "Int64"),
1617
# this still gives object (awaiting float extension dtype)
1718
(["Int64", "UInt64"], "object"),
19+
(["Int64", "boolean"], "Int64"),
20+
(["UInt8", "boolean"], "UInt8"),
1821
],
1922
)
2023
def test_concat_series(to_concat_dtypes, result_dtype):
2124

22-
result = pd.concat([pd.Series([1, 2, pd.NA], dtype=t) for t in to_concat_dtypes])
23-
expected = pd.concat([pd.Series([1, 2, pd.NA], dtype=object)] * 2).astype(
25+
result = pd.concat([pd.Series([0, 1, pd.NA], dtype=t) for t in to_concat_dtypes])
26+
expected = pd.concat([pd.Series([0, 1, pd.NA], dtype=object)] * 2).astype(
2427
result_dtype
2528
)
2629
tm.assert_series_equal(result, expected)
30+
31+
# order doesn't matter for result
32+
result = pd.concat(
33+
[pd.Series([0, 1, pd.NA], dtype=t) for t in to_concat_dtypes[::-1]]
34+
)
35+
expected = pd.concat([pd.Series([0, 1, pd.NA], dtype=object)] * 2).astype(
36+
result_dtype
37+
)
38+
tm.assert_series_equal(result, expected)
39+
40+
41+
@pytest.mark.parametrize(
42+
"to_concat_dtypes, result_dtype",
43+
[
44+
(["Int64", "int64"], "Int64"),
45+
(["UInt64", "uint64"], "UInt64"),
46+
(["Int8", "int8"], "Int8"),
47+
(["Int8", "int16"], "Int16"),
48+
(["UInt8", "int8"], "Int16"),
49+
(["Int32", "uint32"], "Int64"),
50+
# this still gives object (awaiting float extension dtype)
51+
(["Int64", "uint64"], "object"),
52+
(["Int64", "bool"], "Int64"),
53+
(["UInt8", "bool"], "UInt8"),
54+
],
55+
)
56+
def test_concat_series_with_numpy(to_concat_dtypes, result_dtype):
57+
58+
s1 = pd.Series([0, 1, pd.NA], dtype=to_concat_dtypes[0])
59+
s2 = pd.Series(np.array([0, 1], dtype=to_concat_dtypes[1]))
60+
result = pd.concat([s1, s2], ignore_index=True)
61+
expected = pd.Series([0, 1, pd.NA, 0, 1], dtype=object).astype(result_dtype)
62+
tm.assert_series_equal(result, expected)
63+
64+
# order doesn't matter for result
65+
result = pd.concat([s2, s1], ignore_index=True)
66+
expected = pd.Series([0, 1, 0, 1, pd.NA], dtype=object).astype(result_dtype)
67+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)