From 355ffd7372b144071447ddb60cf285730dd4f25e Mon Sep 17 00:00:00 2001 From: Mike Richards Date: Fri, 6 Jul 2018 10:01:49 -0700 Subject: [PATCH 01/11] Add NaN check to Categorical.from_codes --- pandas/core/arrays/categorical.py | 2 ++ pandas/tests/arrays/categorical/test_constructors.py | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 204e800b932a9..9748b69b1d351 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -629,6 +629,8 @@ def from_codes(cls, codes, categories, ordered=False): categorical. If not given, the resulting categorical will be unordered. """ + if isna(codes).any(): + raise ValueError("nan is not a valid code. Use -1") try: codes = coerce_indexer_dtype(np.asarray(codes), categories) except (ValueError, TypeError): diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index e082629a5433d..212305827ed8f 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -468,6 +468,14 @@ def test_from_codes_with_categorical_categories(self): with pytest.raises(ValueError): Categorical.from_codes([0, 1], Categorical(['a', 'b', 'a'])) + def test_from_codes_with_nan_code(self): + # GH21767 + codes = [1, 2, np.nan] + categories = ['a', 'b', 'c'] + with pytest.raises(ValueError, + match='nan is not a valid code. Use -1'): + Categorical.from_codes(codes, categories) + @pytest.mark.parametrize('dtype', [None, 'category']) def test_from_inferred_categories(self, dtype): cats = ['a', 'b'] From acb00dbd29846b342effb4408a1594d6265da67c Mon Sep 17 00:00:00 2001 From: Mike Richards Date: Fri, 6 Jul 2018 10:05:59 -0700 Subject: [PATCH 02/11] Add whatsnew entry --- doc/source/whatsnew/v0.24.0.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index d2d5d40393b62..2cbe72c0393c7 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -486,9 +486,7 @@ Bug Fixes Categorical ^^^^^^^^^^^ -- -- -- +- Bug in :meth:`Categorical.from_codes` where `NaN` values in `codes` were silently converted to `0` (:issue:`21767`) Datetimelike ^^^^^^^^^^^^ From c625ebb345ff1c70b4301c3979a06e2cdda3c27b Mon Sep 17 00:00:00 2001 From: Mike Richards Date: Fri, 6 Jul 2018 10:31:21 -0700 Subject: [PATCH 03/11] Use pandas is_integer_dtype --- pandas/core/arrays/categorical.py | 8 +++++--- pandas/tests/arrays/categorical/test_constructors.py | 3 +-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 9748b69b1d351..d72fc8df73ff2 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -27,6 +27,7 @@ is_timedelta64_dtype, is_categorical, is_categorical_dtype, + is_integer_dtype, is_list_like, is_sequence, is_scalar, is_iterator, is_dict_like) @@ -629,10 +630,11 @@ def from_codes(cls, codes, categories, ordered=False): categorical. If not given, the resulting categorical will be unordered. """ - if isna(codes).any(): - raise ValueError("nan is not a valid code. Use -1") + codes = np.asarray(codes) + if not is_integer_dtype(codes): + raise ValueError("codes need to be array-like integers") try: - codes = coerce_indexer_dtype(np.asarray(codes), categories) + codes = coerce_indexer_dtype(codes, categories) except (ValueError, TypeError): raise ValueError( "codes need to be convertible to an arrays of integers") diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 212305827ed8f..9aab30bcb1259 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -472,8 +472,7 @@ def test_from_codes_with_nan_code(self): # GH21767 codes = [1, 2, np.nan] categories = ['a', 'b', 'c'] - with pytest.raises(ValueError, - match='nan is not a valid code. Use -1'): + with pytest.raises(ValueError): Categorical.from_codes(codes, categories) @pytest.mark.parametrize('dtype', [None, 'category']) From 93659e877ea405a872ac2b3c4f30db9171b617e0 Mon Sep 17 00:00:00 2001 From: Mike Richards Date: Mon, 9 Jul 2018 08:00:03 -0700 Subject: [PATCH 04/11] Warn for float dtype --- pandas/core/arrays/categorical.py | 15 ++++++++++++++- .../tests/arrays/categorical/test_constructors.py | 10 ++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index d72fc8df73ff2..988a71d8f9c78 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -27,6 +27,7 @@ is_timedelta64_dtype, is_categorical, is_categorical_dtype, + is_float_dtype, is_integer_dtype, is_list_like, is_sequence, is_scalar, is_iterator, @@ -632,7 +633,19 @@ def from_codes(cls, codes, categories, ordered=False): """ codes = np.asarray(codes) if not is_integer_dtype(codes): - raise ValueError("codes need to be array-like integers") + err = True + if is_float_dtype(codes): + icodes = codes.astype('i8') + if (icodes == codes).all(): + err = False + codes = icodes + warn("float codes will be disallowed in the future", + FutureWarning) + else: + err = True + if err: + raise ValueError("codes need to be array-like integers") + try: codes = coerce_indexer_dtype(codes, categories) except (ValueError, TypeError): diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 9aab30bcb1259..810fe9ea838ab 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -2,6 +2,7 @@ import pytest from datetime import datetime +import warnings import numpy as np @@ -475,6 +476,15 @@ def test_from_codes_with_nan_code(self): with pytest.raises(ValueError): Categorical.from_codes(codes, categories) + def test_from_codes_with_float(self): + # GH21767 + codes = [1.0, 2.0, 0] + categories = ['a', 'b', 'c'] + with warnings.catch_warnings(record=True) as w: + Categorical.from_codes(codes, categories) + + assert len(w) == 1 + @pytest.mark.parametrize('dtype', [None, 'category']) def test_from_inferred_categories(self, dtype): cats = ['a', 'b'] From a880024f46a0560c0e35e4d7992df477757a4177 Mon Sep 17 00:00:00 2001 From: Mike Richards Date: Mon, 9 Jul 2018 15:08:39 -0700 Subject: [PATCH 05/11] Address two comments from jreback --- doc/source/whatsnew/v0.24.0.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 2cbe72c0393c7..e4b918f595952 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -439,6 +439,7 @@ Deprecations - :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`) - :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`) - :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`) +- :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. .. _whatsnew_0240.prior_deprecations: @@ -486,7 +487,7 @@ Bug Fixes Categorical ^^^^^^^^^^^ -- Bug in :meth:`Categorical.from_codes` where `NaN` values in `codes` were silently converted to `0` (:issue:`21767`) +- Bug in :meth:`Categorical.from_codes` where ``NaN`` values in `codes` were silently converted to ``0`` (:issue:`21767`) Datetimelike ^^^^^^^^^^^^ From b6e173b690c5d680a53ba1e977e20eea27c392da Mon Sep 17 00:00:00 2001 From: Mike Richards Date: Mon, 9 Jul 2018 15:18:29 -0700 Subject: [PATCH 06/11] Address comments from jreback --- pandas/tests/arrays/categorical/test_constructors.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 810fe9ea838ab..ed51bb2df972e 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -2,7 +2,6 @@ import pytest from datetime import datetime -import warnings import numpy as np @@ -478,12 +477,15 @@ def test_from_codes_with_nan_code(self): def test_from_codes_with_float(self): # GH21767 - codes = [1.0, 2.0, 0] + codes = [1.0, 2.0, 0] # integer, but in float dtype categories = ['a', 'b', 'c'] - with warnings.catch_warnings(record=True) as w: + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): Categorical.from_codes(codes, categories) - assert len(w) == 1 + codes = [1.1, 2.0, 0] # non-integer + with pytest.raises(ValueError): + Categorical.from_codes(codes, categories) @pytest.mark.parametrize('dtype', [None, 'category']) def test_from_inferred_categories(self, dtype): From 69fb0d5914f9c8a7b2feb86fa5daefce5341bf7d Mon Sep 17 00:00:00 2001 From: Mike Richards Date: Thu, 12 Jul 2018 15:23:39 -0700 Subject: [PATCH 07/11] Address stacklevel request from Tom --- pandas/core/arrays/categorical.py | 2 +- pandas/tests/arrays/categorical/test_constructors.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 988a71d8f9c78..c2cd5b140a8fd 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -640,7 +640,7 @@ def from_codes(cls, codes, categories, ordered=False): err = False codes = icodes warn("float codes will be disallowed in the future", - FutureWarning) + FutureWarning, stacklevel=2) else: err = True if err: diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index ed51bb2df972e..be4a9d97eda3b 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -480,7 +480,7 @@ def test_from_codes_with_float(self): codes = [1.0, 2.0, 0] # integer, but in float dtype categories = ['a', 'b', 'c'] - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): Categorical.from_codes(codes, categories) codes = [1.1, 2.0, 0] # non-integer From bab4c76d8e4c7e18793a952bf5a60f51ee9eecbc Mon Sep 17 00:00:00 2001 From: Mike Richards Date: Thu, 12 Jul 2018 15:29:10 -0700 Subject: [PATCH 08/11] Address comments from Tom --- doc/source/whatsnew/v0.24.0.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index e4b918f595952..c4a3847f38758 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -439,7 +439,7 @@ Deprecations - :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`) - :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`) - :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`) -- :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. +- :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`) .. _whatsnew_0240.prior_deprecations: @@ -487,7 +487,7 @@ Bug Fixes Categorical ^^^^^^^^^^^ -- Bug in :meth:`Categorical.from_codes` where ``NaN`` values in `codes` were silently converted to ``0`` (:issue:`21767`) +- Bug in :meth:`Categorical.from_codes` where ``NaN`` values in `codes` were silently converted to ``0`` (:issue:`21767`). In the future this will raise a ``ValueError``. Also changes the behavior of `.from_codes([1.1, 2.0])`. Datetimelike ^^^^^^^^^^^^ From c41bf0178b2d0fa2792ef7fab58fbad39589230a Mon Sep 17 00:00:00 2001 From: Mike Richards Date: Thu, 12 Jul 2018 15:29:36 -0700 Subject: [PATCH 09/11] Use "msg" not "err" --- pandas/core/arrays/categorical.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index c2cd5b140a8fd..2eb317904056c 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -633,18 +633,16 @@ def from_codes(cls, codes, categories, ordered=False): """ codes = np.asarray(codes) if not is_integer_dtype(codes): - err = True + msg = "codes need to be array-like integers" if is_float_dtype(codes): icodes = codes.astype('i8') if (icodes == codes).all(): - err = False + msg = None codes = icodes - warn("float codes will be disallowed in the future", - FutureWarning, stacklevel=2) - else: - err = True - if err: - raise ValueError("codes need to be array-like integers") + warn(("float codes will be disallowed in the future and " + "raise a ValueError"), FutureWarning, stacklevel=2) + if msg: + raise ValueError(msg) try: codes = coerce_indexer_dtype(codes, categories) From 3d955d1837f8f009d14a0193acfe22debeb71403 Mon Sep 17 00:00:00 2001 From: Mike Richards Date: Mon, 30 Jul 2018 07:55:30 -0700 Subject: [PATCH 10/11] Add issue reference --- pandas/core/arrays/categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 2eb317904056c..7c793cb1d6488 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -631,7 +631,7 @@ def from_codes(cls, codes, categories, ordered=False): categorical. If not given, the resulting categorical will be unordered. """ - codes = np.asarray(codes) + codes = np.asarray(codes) # #21767 if not is_integer_dtype(codes): msg = "codes need to be array-like integers" if is_float_dtype(codes): From b8d4b83dc9d0aa2574f8e26a5e9fa7df15783cdd Mon Sep 17 00:00:00 2001 From: Mike Richards Date: Tue, 31 Jul 2018 07:48:41 -0700 Subject: [PATCH 11/11] Test converted floats are correct int values --- pandas/tests/arrays/categorical/test_constructors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index be4a9d97eda3b..c903a8c09195e 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -481,7 +481,8 @@ def test_from_codes_with_float(self): categories = ['a', 'b', 'c'] with tm.assert_produces_warning(FutureWarning): - Categorical.from_codes(codes, categories) + cat = Categorical.from_codes(codes, categories) + tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1')) codes = [1.1, 2.0, 0] # non-integer with pytest.raises(ValueError):