diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index dde098be2e5ae..3d7e7686b2db6 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -602,6 +602,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your - :meth:`Series.astype` and :meth:`DataFrame.astype` now dispatch to :meth:`ExtensionArray.astype` (:issue:`21185:`). - Slicing a single row of a ``DataFrame`` with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`) - Added :meth:`pandas.api.types.register_extension_dtype` to register an extension type with pandas (:issue:`22664`) +- Bug when concatenating multiple ``Series`` with different extension dtypes not casting to object dtype (:issue:`22994`) - Series backed by an ``ExtensionArray`` now work with :func:`util.hash_pandas_object` (:issue:`23066`) - Updated the ``.type`` attribute for ``PeriodDtype``, ``DatetimeTZDtype``, and ``IntervalDtype`` to be instances of the dtype (``Period``, ``Timestamp``, and ``Interval`` respectively) (:issue:`22938`) - :func:`ExtensionArray.isna` is allowed to return an ``ExtensionArray`` (:issue:`22325`). diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index ac824708245d2..2b1778e5bcb2e 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -560,11 +560,6 @@ def _concat_sparse(to_concat, axis=0, typs=None): fill_values = [x.fill_value for x in to_concat if isinstance(x, SparseArray)] - - if len(set(fill_values)) > 1: - raise ValueError("Cannot concatenate SparseArrays with different " - "fill values") - fill_value = fill_values[0] # TODO: Fix join unit generation so we aren't passed this. diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 3667d7c5e39dc..2646dbd33815d 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1636,8 +1636,7 @@ def concat(self, to_concat, new_axis): # check if all series are of the same block type: if len(non_empties) > 0: blocks = [obj.blocks[0] for obj in non_empties] - - if all(type(b) is type(blocks[0]) for b in blocks[1:]): # noqa + if len({b.dtype for b in blocks}) == 1: new_block = blocks[0].concat_same_type(blocks) else: values = [x.values for x in blocks] diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index f84d24295b049..be1c61166e4b1 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -100,7 +100,9 @@ def test_hashable(self, dtype): class TestInterface(BaseDecimal, base.BaseInterfaceTests): - pass + + pytestmark = pytest.mark.skipif(compat.PY2, + reason="Unhashble dtype in Py2.") class TestConstructors(BaseDecimal, base.BaseConstructorsTests): @@ -112,7 +114,8 @@ def test_from_dtype(self, data): class TestReshaping(BaseDecimal, base.BaseReshapingTests): - pass + pytestmark = pytest.mark.skipif(compat.PY2, + reason="Unhashble dtype in Py2.") class TestGetitem(BaseDecimal, base.BaseGetitemTests): @@ -174,7 +177,8 @@ class TestCasting(BaseDecimal, base.BaseCastingTests): class TestGroupby(BaseDecimal, base.BaseGroupbyTests): - pass + pytestmark = pytest.mark.skipif(compat.PY2, + reason="Unhashble dtype in Py2.") class TestSetitem(BaseDecimal, base.BaseSetitemTests): diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 2aaa04d571e69..d39c9fafe5749 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1,6 +1,7 @@ from warnings import catch_warnings, simplefilter from itertools import combinations from collections import deque +from decimal import Decimal import datetime as dt import dateutil @@ -8,17 +9,17 @@ from numpy.random import randn from datetime import datetime -from pandas.compat import StringIO, iteritems, PY2 +from pandas.compat import Iterable, StringIO, iteritems, PY2 import pandas as pd from pandas import (DataFrame, concat, read_csv, isna, Series, date_range, Index, Panel, MultiIndex, Timestamp, DatetimeIndex, Categorical) -from pandas.compat import Iterable from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.util import testing as tm from pandas.util.testing import (assert_frame_equal, makeCustomDataframe as mkdf) +from pandas.tests.extension.decimal import to_decimal import pytest @@ -2361,6 +2362,18 @@ def test_concat_datetime_timezone(self): index=idx1.append(idx1)) tm.assert_frame_equal(result, expected) + @pytest.mark.skipif(PY2, reason="Unhashable Decimal dtype") + def test_concat_different_extension_dtypes_upcasts(self): + a = pd.Series(pd.core.arrays.integer_array([1, 2])) + b = pd.Series(to_decimal([1, 2])) + + result = pd.concat([a, b], ignore_index=True) + expected = pd.Series([ + 1, 2, + Decimal(1), Decimal(2) + ], dtype=object) + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel]) @pytest.mark.parametrize('dt', np.sctypes['float'])