Skip to content

Commit d2ef7e9

Browse files
authored
BUG: CategoricalIndex.union with nans (#45362)
1 parent 94f976f commit d2ef7e9

File tree

4 files changed

+58
-11
lines changed

4 files changed

+58
-11
lines changed

doc/source/whatsnew/v1.5.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ Bug fixes
158158

159159
Categorical
160160
^^^^^^^^^^^
161-
-
161+
- Bug in :meth:`CategoricalIndex.union` when the index's categories are integer-dtype and the index contains ``NaN`` values incorrectly raising instead of casting to ``float64`` (:issue:`45362`)
162162
-
163163

164164
Datetimelike

pandas/core/dtypes/cast.py

+41
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@
8181
)
8282
from pandas.core.dtypes.generic import (
8383
ABCExtensionArray,
84+
ABCIndex,
8485
ABCSeries,
8586
)
8687
from pandas.core.dtypes.inference import is_list_like
@@ -93,7 +94,9 @@
9394

9495
if TYPE_CHECKING:
9596

97+
from pandas import Index
9698
from pandas.core.arrays import (
99+
Categorical,
97100
DatetimeArray,
98101
ExtensionArray,
99102
IntervalArray,
@@ -1470,6 +1473,44 @@ def find_result_type(left: ArrayLike, right: Any) -> DtypeObj:
14701473
return new_dtype
14711474

14721475

1476+
def common_dtype_categorical_compat(
1477+
objs: list[Index | ArrayLike], dtype: DtypeObj
1478+
) -> DtypeObj:
1479+
"""
1480+
Update the result of find_common_type to account for NAs in a Categorical.
1481+
1482+
Parameters
1483+
----------
1484+
objs : list[np.ndarray | ExtensionArray | Index]
1485+
dtype : np.dtype or ExtensionDtype
1486+
1487+
Returns
1488+
-------
1489+
np.dtype or ExtensionDtype
1490+
"""
1491+
# GH#38240
1492+
1493+
# TODO: more generally, could do `not can_hold_na(dtype)`
1494+
if isinstance(dtype, np.dtype) and dtype.kind in ["i", "u"]:
1495+
1496+
for obj in objs:
1497+
# We don't want to accientally allow e.g. "categorical" str here
1498+
obj_dtype = getattr(obj, "dtype", None)
1499+
if isinstance(obj_dtype, CategoricalDtype):
1500+
if isinstance(obj, ABCIndex):
1501+
# This check may already be cached
1502+
hasnas = obj.hasnans
1503+
else:
1504+
# Categorical
1505+
hasnas = cast("Categorical", obj)._hasnans
1506+
1507+
if hasnas:
1508+
# see test_union_int_categorical_with_nan
1509+
dtype = np.dtype(np.float64)
1510+
break
1511+
return dtype
1512+
1513+
14731514
@overload
14741515
def find_common_type(types: list[np.dtype]) -> np.dtype:
14751516
...

pandas/core/indexes/base.py

+2-10
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666

6767
from pandas.core.dtypes.cast import (
6868
can_hold_element,
69+
common_dtype_categorical_compat,
6970
find_common_type,
7071
infer_dtype_from,
7172
maybe_cast_pointwise_result,
@@ -6017,17 +6018,8 @@ def _find_common_type_compat(self, target) -> DtypeObj:
60176018
return _dtype_obj
60186019

60196020
dtype = find_common_type([self.dtype, target_dtype])
6021+
dtype = common_dtype_categorical_compat([self, target], dtype)
60206022

6021-
if dtype.kind in ["i", "u"]:
6022-
# TODO: what about reversed with self being categorical?
6023-
if (
6024-
isinstance(target, Index)
6025-
and is_categorical_dtype(target.dtype)
6026-
and target.hasnans
6027-
):
6028-
# FIXME: find_common_type incorrect with Categorical GH#38240
6029-
# FIXME: some cases where float64 cast can be lossy?
6030-
dtype = np.dtype(np.float64)
60316023
if dtype.kind == "c":
60326024
dtype = _dtype_obj
60336025
return dtype

pandas/tests/indexes/test_setops.py

+14
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,20 @@ def test_union_with_duplicate_index_not_subset_and_non_monotonic(cls):
592592
tm.assert_index_equal(result, expected)
593593

594594

595+
def test_union_int_categorical_with_nan():
596+
ci = CategoricalIndex([1, 2, np.nan])
597+
assert ci.categories.dtype.kind == "i"
598+
599+
idx = Index([1, 2])
600+
601+
result = idx.union(ci)
602+
expected = Index([1, 2, np.nan], dtype=np.float64)
603+
tm.assert_index_equal(result, expected)
604+
605+
result = ci.union(idx)
606+
tm.assert_index_equal(result, expected)
607+
608+
595609
class TestSetOpsUnsorted:
596610
# These may eventually belong in a dtype-specific test_setops, or
597611
# parametrized over a more general fixture

0 commit comments

Comments
 (0)