Skip to content

Commit de08242

Browse files
TomAugspurgertm9k1
authored andcommitted
BUG: Concat multiple different ExtensionArray types (pandas-dev#22997)
1 parent eae0c2c commit de08242

File tree

5 files changed

+24
-12
lines changed

5 files changed

+24
-12
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your
627627
- :meth:`Series.astype` and :meth:`DataFrame.astype` now dispatch to :meth:`ExtensionArray.astype` (:issue:`21185:`).
628628
- Slicing a single row of a ``DataFrame`` with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`)
629629
- Added :meth:`pandas.api.types.register_extension_dtype` to register an extension type with pandas (:issue:`22664`)
630+
- Bug when concatenating multiple ``Series`` with different extension dtypes not casting to object dtype (:issue:`22994`)
630631
- Series backed by an ``ExtensionArray`` now work with :func:`util.hash_pandas_object` (:issue:`23066`)
631632
- Updated the ``.type`` attribute for ``PeriodDtype``, ``DatetimeTZDtype``, and ``IntervalDtype`` to be instances of the dtype (``Period``, ``Timestamp``, and ``Interval`` respectively) (:issue:`22938`)
632633
- :func:`ExtensionArray.isna` is allowed to return an ``ExtensionArray`` (:issue:`22325`).

pandas/core/dtypes/concat.py

-5
Original file line numberDiff line numberDiff line change
@@ -560,11 +560,6 @@ def _concat_sparse(to_concat, axis=0, typs=None):
560560

561561
fill_values = [x.fill_value for x in to_concat
562562
if isinstance(x, SparseArray)]
563-
564-
if len(set(fill_values)) > 1:
565-
raise ValueError("Cannot concatenate SparseArrays with different "
566-
"fill values")
567-
568563
fill_value = fill_values[0]
569564

570565
# TODO: Fix join unit generation so we aren't passed this.

pandas/core/internals/managers.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1636,8 +1636,7 @@ def concat(self, to_concat, new_axis):
16361636
# check if all series are of the same block type:
16371637
if len(non_empties) > 0:
16381638
blocks = [obj.blocks[0] for obj in non_empties]
1639-
1640-
if all(type(b) is type(blocks[0]) for b in blocks[1:]): # noqa
1639+
if len({b.dtype for b in blocks}) == 1:
16411640
new_block = blocks[0].concat_same_type(blocks)
16421641
else:
16431642
values = [x.values for x in blocks]

pandas/tests/extension/decimal/test_decimal.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,9 @@ def test_hashable(self, dtype):
100100

101101

102102
class TestInterface(BaseDecimal, base.BaseInterfaceTests):
103-
pass
103+
104+
pytestmark = pytest.mark.skipif(compat.PY2,
105+
reason="Unhashble dtype in Py2.")
104106

105107

106108
class TestConstructors(BaseDecimal, base.BaseConstructorsTests):
@@ -112,7 +114,8 @@ def test_from_dtype(self, data):
112114

113115

114116
class TestReshaping(BaseDecimal, base.BaseReshapingTests):
115-
pass
117+
pytestmark = pytest.mark.skipif(compat.PY2,
118+
reason="Unhashble dtype in Py2.")
116119

117120

118121
class TestGetitem(BaseDecimal, base.BaseGetitemTests):
@@ -174,7 +177,8 @@ class TestCasting(BaseDecimal, base.BaseCastingTests):
174177

175178

176179
class TestGroupby(BaseDecimal, base.BaseGroupbyTests):
177-
pass
180+
pytestmark = pytest.mark.skipif(compat.PY2,
181+
reason="Unhashble dtype in Py2.")
178182

179183

180184
class TestSetitem(BaseDecimal, base.BaseSetitemTests):

pandas/tests/reshape/test_concat.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,25 @@
11
from warnings import catch_warnings, simplefilter
22
from itertools import combinations
33
from collections import deque
4+
from decimal import Decimal
45

56
import datetime as dt
67
import dateutil
78
import numpy as np
89
from numpy.random import randn
910

1011
from datetime import datetime
11-
from pandas.compat import StringIO, iteritems, PY2
12+
from pandas.compat import Iterable, StringIO, iteritems, PY2
1213
import pandas as pd
1314
from pandas import (DataFrame, concat,
1415
read_csv, isna, Series, date_range,
1516
Index, Panel, MultiIndex, Timestamp,
1617
DatetimeIndex, Categorical)
17-
from pandas.compat import Iterable
1818
from pandas.core.dtypes.dtypes import CategoricalDtype
1919
from pandas.util import testing as tm
2020
from pandas.util.testing import (assert_frame_equal,
2121
makeCustomDataframe as mkdf)
22+
from pandas.tests.extension.decimal import to_decimal
2223

2324
import pytest
2425

@@ -2361,6 +2362,18 @@ def test_concat_datetime_timezone(self):
23612362
index=idx1.append(idx1))
23622363
tm.assert_frame_equal(result, expected)
23632364

2365+
@pytest.mark.skipif(PY2, reason="Unhashable Decimal dtype")
2366+
def test_concat_different_extension_dtypes_upcasts(self):
2367+
a = pd.Series(pd.core.arrays.integer_array([1, 2]))
2368+
b = pd.Series(to_decimal([1, 2]))
2369+
2370+
result = pd.concat([a, b], ignore_index=True)
2371+
expected = pd.Series([
2372+
1, 2,
2373+
Decimal(1), Decimal(2)
2374+
], dtype=object)
2375+
tm.assert_series_equal(result, expected)
2376+
23642377

23652378
@pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel])
23662379
@pytest.mark.parametrize('dt', np.sctypes['float'])

0 commit comments

Comments
 (0)