Skip to content

Commit db5c087

Browse files
authored
BUG: categorical-with-nas to float64 instead of object (#45359)
1 parent d2ef7e9 commit db5c087

File tree

4 files changed

+16
-18
lines changed

4 files changed

+16
-18
lines changed

doc/source/whatsnew/v1.5.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ Groupby/resample/rolling
236236

237237
Reshaping
238238
^^^^^^^^^
239-
-
239+
- Bug in :func:`concat` between a :class:`Series` with integer dtype and another with :class:`CategoricalDtype` with integer categories and containing ``NaN`` values casting to object dtype instead of ``float64`` (:issue:`45359`)
240240
-
241241

242242
Sparse

pandas/core/dtypes/concat.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
)
3131

3232
if TYPE_CHECKING:
33+
from pandas import Categorical
3334
from pandas.core.arrays.sparse import SparseArray
3435

3536

@@ -40,17 +41,14 @@ def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
4041
"""
4142
if is_dtype_equal(arr.dtype, dtype):
4243
return arr
43-
if (
44-
is_categorical_dtype(arr.dtype)
45-
and isinstance(dtype, np.dtype)
46-
and np.issubdtype(dtype, np.integer)
47-
):
48-
# problem case: categorical of int -> gives int as result dtype,
49-
# but categorical can contain NAs -> fall back to object dtype
50-
try:
51-
return arr.astype(dtype, copy=False)
52-
except ValueError:
53-
return arr.astype(object, copy=False)
44+
45+
if isinstance(dtype, np.dtype) and dtype.kind in ["i", "u"]:
46+
47+
if is_categorical_dtype(arr.dtype) and cast("Categorical", arr)._hasnans:
48+
# problem case: categorical of int -> gives int as result dtype,
49+
# but categorical can contain NAs -> float64 instead
50+
# GH#45359
51+
dtype = np.dtype(np.float64)
5452

5553
if is_sparse(arr) and not is_sparse(dtype):
5654
# problem case: SparseArray.astype(dtype) doesn't follow the specified

pandas/core/indexes/category.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -576,7 +576,7 @@ def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
576576
# not all to_concat elements are among our categories (or NA)
577577
from pandas.core.dtypes.concat import concat_compat
578578

579-
res = concat_compat(to_concat)
579+
res = concat_compat([x._values for x in to_concat])
580580
return Index(res, name=name)
581581
else:
582582
cat = self._data._from_backing_data(codes)

pandas/tests/reshape/concat/test_append_common.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,7 @@ def test_concat_categorical(self):
508508
s1 = Series([10, 11, np.nan], dtype="category")
509509
s2 = Series([np.nan, 1, 3, 2], dtype="category")
510510

511-
exp = Series([10, 11, np.nan, np.nan, 1, 3, 2], dtype="object")
511+
exp = Series([10, 11, np.nan, np.nan, 1, 3, 2], dtype=np.float64)
512512
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
513513
tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
514514

@@ -529,12 +529,12 @@ def test_concat_categorical_coercion(self):
529529
s1 = Series([1, 2, np.nan], dtype="category")
530530
s2 = Series([2, 1, 2])
531531

532-
exp = Series([1, 2, np.nan, 2, 1, 2], dtype="object")
532+
exp = Series([1, 2, np.nan, 2, 1, 2], dtype=np.float64)
533533
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
534534
tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
535535

536536
# result shouldn't be affected by 1st elem dtype
537-
exp = Series([2, 1, 2, 1, 2, np.nan], dtype="object")
537+
exp = Series([2, 1, 2, 1, 2, np.nan], dtype=np.float64)
538538
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
539539
tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
540540

@@ -554,11 +554,11 @@ def test_concat_categorical_coercion(self):
554554
s1 = Series([10, 11, np.nan], dtype="category")
555555
s2 = Series([1, 3, 2])
556556

557-
exp = Series([10, 11, np.nan, 1, 3, 2], dtype="object")
557+
exp = Series([10, 11, np.nan, 1, 3, 2], dtype=np.float64)
558558
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
559559
tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
560560

561-
exp = Series([1, 3, 2, 10, 11, np.nan], dtype="object")
561+
exp = Series([1, 3, 2, 10, 11, np.nan], dtype=np.float64)
562562
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
563563
tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
564564

0 commit comments

Comments
 (0)