diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 9a6455d4d012f..acdac0949ee34 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -191,7 +191,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ -- +- Bug in :func:`concat` between a :class:`Series` with integer dtype and another with :class:`CategoricalDtype` with integer categories and containing ``NaN`` values casting to object dtype instead of ``float64`` (:issue:`45359`) - Sparse diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 83c2668242129..40f5af9d0c0dd 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -30,6 +30,7 @@ ) if TYPE_CHECKING: + from pandas import Categorical from pandas.core.arrays.sparse import SparseArray @@ -40,17 +41,14 @@ def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike: """ if is_dtype_equal(arr.dtype, dtype): return arr - if ( - is_categorical_dtype(arr.dtype) - and isinstance(dtype, np.dtype) - and np.issubdtype(dtype, np.integer) - ): - # problem case: categorical of int -> gives int as result dtype, - # but categorical can contain NAs -> fall back to object dtype - try: - return arr.astype(dtype, copy=False) - except ValueError: - return arr.astype(object, copy=False) + + if isinstance(dtype, np.dtype) and dtype.kind in ["i", "u"]: + + if is_categorical_dtype(arr.dtype) and cast("Categorical", arr)._hasnans: + # problem case: categorical of int -> gives int as result dtype, + # but categorical can contain NAs -> float64 instead + # GH#45359 + dtype = np.dtype(np.float64) if is_sparse(arr) and not is_sparse(dtype): # problem case: SparseArray.astype(dtype) doesn't follow the specified diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index a04605071a862..c2bcd90ff10fb 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -576,7 +576,7 @@ def _concat(self, to_concat: list[Index], name: Hashable) -> Index: # not all to_concat elements are among our categories (or NA) from pandas.core.dtypes.concat import concat_compat - res = concat_compat(to_concat) + res = concat_compat([x._values for x in to_concat]) return Index(res, name=name) else: cat = self._data._from_backing_data(codes) diff --git a/pandas/tests/reshape/concat/test_append_common.py b/pandas/tests/reshape/concat/test_append_common.py index f36bee9dd8dec..36bca1c2b654e 100644 --- a/pandas/tests/reshape/concat/test_append_common.py +++ b/pandas/tests/reshape/concat/test_append_common.py @@ -508,7 +508,7 @@ def test_concat_categorical(self): s1 = Series([10, 11, np.nan], dtype="category") s2 = Series([np.nan, 1, 3, 2], dtype="category") - exp = Series([10, 11, np.nan, np.nan, 1, 3, 2], dtype="object") + exp = Series([10, 11, np.nan, np.nan, 1, 3, 2], dtype=np.float64) tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) @@ -529,12 +529,12 @@ def test_concat_categorical_coercion(self): s1 = Series([1, 2, np.nan], dtype="category") s2 = Series([2, 1, 2]) - exp = Series([1, 2, np.nan, 2, 1, 2], dtype="object") + exp = Series([1, 2, np.nan, 2, 1, 2], dtype=np.float64) tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) # result shouldn't be affected by 1st elem dtype - exp = Series([2, 1, 2, 1, 2, np.nan], dtype="object") + exp = Series([2, 1, 2, 1, 2, np.nan], dtype=np.float64) tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) @@ -554,11 +554,11 @@ def test_concat_categorical_coercion(self): s1 = Series([10, 11, np.nan], dtype="category") s2 = Series([1, 3, 2]) - exp = Series([10, 11, np.nan, 1, 3, 2], dtype="object") + exp = Series([10, 11, np.nan, 1, 3, 2], dtype=np.float64) tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) - exp = Series([1, 3, 2, 10, 11, np.nan], dtype="object") + exp = Series([1, 3, 2, 10, 11, np.nan], dtype=np.float64) tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)