Skip to content

Commit 647f3f0

Browse files
miker985jreback
authored andcommitted
Fix categorical from codes nan 21767 (#21775)
1 parent a8836f3 commit 647f3f0

File tree

3 files changed

+38
-4
lines changed

3 files changed

+38
-4
lines changed

doc/source/whatsnew/v0.24.0.txt

+2-3
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,7 @@ Deprecations
477477
- :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`)
478478
- :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`)
479479
- :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`)
480+
- :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`)
480481

481482
.. _whatsnew_0240.prior_deprecations:
482483

@@ -524,9 +525,7 @@ Bug Fixes
524525
Categorical
525526
^^^^^^^^^^^
526527

527-
-
528-
-
529-
-
528+
- Bug in :meth:`Categorical.from_codes` where ``NaN`` values in `codes` were silently converted to ``0`` (:issue:`21767`). In the future this will raise a ``ValueError``. Also changes the behavior of `.from_codes([1.1, 2.0])`.
530529

531530
Datetimelike
532531
^^^^^^^^^^^^

pandas/core/arrays/categorical.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
is_timedelta64_dtype,
2828
is_categorical,
2929
is_categorical_dtype,
30+
is_float_dtype,
31+
is_integer_dtype,
3032
is_list_like, is_sequence,
3133
is_scalar, is_iterator,
3234
is_dict_like)
@@ -633,8 +635,21 @@ def from_codes(cls, codes, categories, ordered=False):
633635
categorical. If not given, the resulting categorical will be
634636
unordered.
635637
"""
638+
codes = np.asarray(codes) # #21767
639+
if not is_integer_dtype(codes):
640+
msg = "codes need to be array-like integers"
641+
if is_float_dtype(codes):
642+
icodes = codes.astype('i8')
643+
if (icodes == codes).all():
644+
msg = None
645+
codes = icodes
646+
warn(("float codes will be disallowed in the future and "
647+
"raise a ValueError"), FutureWarning, stacklevel=2)
648+
if msg:
649+
raise ValueError(msg)
650+
636651
try:
637-
codes = coerce_indexer_dtype(np.asarray(codes), categories)
652+
codes = coerce_indexer_dtype(codes, categories)
638653
except (ValueError, TypeError):
639654
raise ValueError(
640655
"codes need to be convertible to an arrays of integers")

pandas/tests/arrays/categorical/test_constructors.py

+20
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,26 @@ def test_from_codes_with_categorical_categories(self):
468468
with pytest.raises(ValueError):
469469
Categorical.from_codes([0, 1], Categorical(['a', 'b', 'a']))
470470

471+
def test_from_codes_with_nan_code(self):
472+
# GH21767
473+
codes = [1, 2, np.nan]
474+
categories = ['a', 'b', 'c']
475+
with pytest.raises(ValueError):
476+
Categorical.from_codes(codes, categories)
477+
478+
def test_from_codes_with_float(self):
479+
# GH21767
480+
codes = [1.0, 2.0, 0] # integer, but in float dtype
481+
categories = ['a', 'b', 'c']
482+
483+
with tm.assert_produces_warning(FutureWarning):
484+
cat = Categorical.from_codes(codes, categories)
485+
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1'))
486+
487+
codes = [1.1, 2.0, 0] # non-integer
488+
with pytest.raises(ValueError):
489+
Categorical.from_codes(codes, categories)
490+
471491
@pytest.mark.parametrize('dtype', [None, 'category'])
472492
def test_from_inferred_categories(self, dtype):
473493
cats = ['a', 'b']

0 commit comments

Comments
 (0)