Skip to content

Commit 045880c

Browse files
dsaxtonjreback
authored andcommitted
BUG: Don't cast categorical nan to int (#28438)
1 parent 367670e commit 045880c

File tree

4 files changed

+18
-6
lines changed

4 files changed

+18
-6
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ Categorical
122122
^^^^^^^^^^^
123123

124124
- Added test to assert the :func:`fillna` raises the correct ValueError message when the value isn't a value from categories (:issue:`13628`)
125+
- Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`)
125126
-
126127
-
127128

pandas/core/arrays/categorical.py

+3
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,9 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
520520
if dtype == self.dtype:
521521
return self
522522
return self._set_dtype(dtype)
523+
if is_integer_dtype(dtype) and self.isna().any():
524+
msg = "Cannot convert float NaN to integer"
525+
raise ValueError(msg)
523526
return np.array(self, dtype=dtype, copy=copy)
524527

525528
@cache_readonly

pandas/core/indexes/base.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -4715,13 +4715,13 @@ def set_value(self, arr, key, value):
47154715
@Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
47164716
def get_indexer_non_unique(self, target):
47174717
target = ensure_index(target)
4718-
if is_categorical(target):
4719-
target = target.astype(target.dtype.categories.dtype)
47204718
pself, ptarget = self._maybe_promote(target)
47214719
if pself is not self or ptarget is not target:
47224720
return pself.get_indexer_non_unique(ptarget)
47234721

4724-
if self.is_all_dates:
4722+
if is_categorical(target):
4723+
tgt_values = np.asarray(target)
4724+
elif self.is_all_dates:
47254725
tgt_values = target.asi8
47264726
else:
47274727
tgt_values = target._ndarray_values
@@ -4733,7 +4733,7 @@ def get_indexer_for(self, target, **kwargs):
47334733
"""
47344734
Guaranteed return of an indexer even when non-unique.
47354735
4736-
This dispatches to get_indexer or get_indexer_nonunique
4736+
This dispatches to get_indexer or get_indexer_non_unique
47374737
as appropriate.
47384738
47394739
Returns

pandas/tests/extension/test_categorical.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import pytest
2020

2121
import pandas as pd
22-
from pandas import Categorical
22+
from pandas import Categorical, CategoricalIndex, Timestamp
2323
from pandas.api.types import CategoricalDtype
2424
from pandas.tests.extension import base
2525
import pandas.util.testing as tm
@@ -197,7 +197,15 @@ def test_searchsorted(self, data_for_sorting):
197197

198198

199199
class TestCasting(base.BaseCastingTests):
200-
pass
200+
@pytest.mark.parametrize("cls", [Categorical, CategoricalIndex])
201+
@pytest.mark.parametrize("values", [[1, np.nan], [Timestamp("2000"), pd.NaT]])
202+
def test_cast_nan_to_int(self, cls, values):
203+
# GH 28406
204+
s = cls(values)
205+
206+
msg = "Cannot (cast|convert)"
207+
with pytest.raises((ValueError, TypeError), match=msg):
208+
s.astype(int)
201209

202210

203211
class TestArithmeticOps(base.BaseArithmeticOpsTests):

0 commit comments

Comments
 (0)