Skip to content

Commit b149e17

Browse files
authored
BUG: concat coercing arrow to object with null type (#53702)
* BUG: concat coercing arrow to object with null type * BUG: concat coercing arrow to object with null type
1 parent 13d968b commit b149e17

File tree

3 files changed

+13
-0
lines changed

3 files changed

+13
-0
lines changed

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,7 @@ Groupby/resample/rolling
479479

480480
Reshaping
481481
^^^^^^^^^
482+
- Bug in :func:`concat` coercing to ``object`` dtype when one column has ``pa.null()`` dtype (:issue:`53702`)
482483
- Bug in :func:`crosstab` when ``dropna=False`` would not keep ``np.nan`` in the result (:issue:`10772`)
483484
- Bug in :func:`merge_asof` raising ``KeyError`` for extension dtypes (:issue:`52904`)
484485
- Bug in :func:`merge_asof` raising ``ValueError`` for data backed by read-only ndarrays (:issue:`53513`)

pandas/core/dtypes/dtypes.py

+3
Original file line numberDiff line numberDiff line change
@@ -2214,10 +2214,13 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
22142214
# Mirrors BaseMaskedDtype
22152215
from pandas.core.dtypes.cast import find_common_type
22162216

2217+
null_dtype = type(self)(pa.null())
2218+
22172219
new_dtype = find_common_type(
22182220
[
22192221
dtype.numpy_dtype if isinstance(dtype, ArrowDtype) else dtype
22202222
for dtype in dtypes
2223+
if dtype != null_dtype
22212224
]
22222225
)
22232226
if not isinstance(new_dtype, np.dtype):

pandas/tests/extension/test_arrow.py

+9
Original file line numberDiff line numberDiff line change
@@ -2874,6 +2874,15 @@ def test_conversion_large_dtypes_from_numpy_array(data, arrow_dtype):
28742874
tm.assert_extension_array_equal(result, expected)
28752875

28762876

2877+
def test_concat_null_array():
2878+
df = pd.DataFrame({"a": [None, None]}, dtype=ArrowDtype(pa.null()))
2879+
df2 = pd.DataFrame({"a": [0, 1]}, dtype="int64[pyarrow]")
2880+
2881+
result = pd.concat([df, df2], ignore_index=True)
2882+
expected = pd.DataFrame({"a": [None, None, 0, 1]}, dtype="int64[pyarrow]")
2883+
tm.assert_frame_equal(result, expected)
2884+
2885+
28772886
@pytest.mark.parametrize("pa_type", tm.ALL_INT_PYARROW_DTYPES + tm.FLOAT_PYARROW_DTYPES)
28782887
def test_describe_numeric_data(pa_type):
28792888
# GH 52470

0 commit comments

Comments
 (0)