From adc0bca2c8816143ed05b95585a9ef2f16c65abf Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Wed, 15 May 2019 00:19:04 -0600 Subject: [PATCH 1/9] DEPR: Deprecate ordered=None for CategoricalDtype --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/core/dtypes/dtypes.py | 7 +++++++ pandas/tests/arrays/categorical/test_dtypes.py | 8 ++++++++ pandas/tests/dtypes/test_dtypes.py | 8 +++++++- pandas/tests/indexes/test_category.py | 8 ++++++++ pandas/tests/series/test_dtypes.py | 8 ++++++++ 6 files changed, 39 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 49518c57fc846..2b6817958196e 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -257,6 +257,7 @@ Deprecations - Deprecated the ``units=M`` (months) and ``units=Y`` (year) parameters for ``units`` of :func:`pandas.to_timedelta`, :func:`pandas.Timedelta` and :func:`pandas.TimedeltaIndex` (:issue:`16344`) - The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64` or :meth:`Timedelta.to_timedelta64`. (:issue:`24416`) +- The default value ``ordered=None`` in :class:`~pandas.api.types.CategoricalDtype` has been deprecated in favor of ``ordered=False``. When converting between categorical types ``ordered=True`` must be explicitly passed in order to be preserved. (:issue:`26336`) .. _whatsnew_0250.prior_deprecations: diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index da762978f55cc..d722b2495efc2 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -550,8 +550,15 @@ def update_dtype(self, dtype): new_categories = self.categories new_ordered = dtype.ordered + + # TODO(GH26336): remove this if block when ordered=None is removed if new_ordered is None: new_ordered = self.ordered + if self.ordered: + msg = ("ordered=None is deprecated and will default to False " + "in a future version; ordered=True must be explicitly " + "passed in order to be retained") + warnings.warn(msg, FutureWarning, stacklevel=2) return CategoricalDtype(new_categories, new_ordered) diff --git a/pandas/tests/arrays/categorical/test_dtypes.py b/pandas/tests/arrays/categorical/test_dtypes.py index b8c223ab3b04e..5f261f29560ea 100644 --- a/pandas/tests/arrays/categorical/test_dtypes.py +++ b/pandas/tests/arrays/categorical/test_dtypes.py @@ -160,6 +160,14 @@ def test_astype_category(self, dtype_ordered, cat_ordered): expected = cat tm.assert_categorical_equal(result, expected) + def test_astype_category_ordered_none_deprecated(self): + # GH 26336 + cdt1 = CategoricalDtype(categories=list('cdab'), ordered=True) + cdt2 = CategoricalDtype(categories=list('cedafb')) + cat = Categorical(list('abcdaba'), dtype=cdt1) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + cat.astype(cdt2) + def test_iter_python_types(self): # GH-19909 cat = Categorical([1, 2]) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index decd0091e2ce8..9cf51dcd286ef 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -817,7 +817,13 @@ def test_update_dtype(self, ordered_fixture, new_categories, new_ordered): if expected_ordered is None: expected_ordered = dtype.ordered - result = dtype.update_dtype(new_dtype) + # GH 26336 + if new_ordered is None and ordered_fixture is True: + with tm.assert_produces_warning(FutureWarning): + result = dtype.update_dtype(new_dtype) + else: + result = dtype.update_dtype(new_dtype) + tm.assert_index_equal(result.categories, expected_categories) assert result.ordered is expected_ordered diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index d38fa20a9335c..3b140e9228879 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -490,6 +490,14 @@ def test_astype_category(self, name, dtype_ordered, index_ordered): expected = index tm.assert_index_equal(result, expected) + def test_astype_category_ordered_none_deprecated(self): + # GH 26336 + cdt1 = CategoricalDtype(categories=list('cdab'), ordered=True) + cdt2 = CategoricalDtype(categories=list('cedafb')) + idx = CategoricalIndex(list('abcdaba'), dtype=cdt1) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + idx.astype(cdt2) + def test_reindex_base(self): # Determined by cat ordering. idx = CategoricalIndex(list("cab"), categories=list("cab")) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 51334557fa403..c7af68cc5cc54 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -227,6 +227,14 @@ def test_astype_categories_deprecation(self): result = s.astype('category', categories=['a', 'b'], ordered=True) tm.assert_series_equal(result, expected) + def test_astype_category_ordered_none_deprecated(self): + # GH 26336 + cdt1 = CategoricalDtype(categories=list('cdab'), ordered=True) + cdt2 = CategoricalDtype(categories=list('cedafb')) + s = Series(list('abcdaba'), dtype=cdt1) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + s.astype(cdt2) + def test_astype_from_categorical(self): items = ["a", "b", "c", "a"] s = Series(items) From 2da967deb0dce05155bba22c5fc6a2d340352211 Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Mon, 20 May 2019 23:57:50 -0600 Subject: [PATCH 2/9] cover additional case and review edits --- pandas/core/dtypes/dtypes.py | 2 +- pandas/core/generic.py | 5 +++- pandas/core/internals/construction.py | 5 +++- pandas/io/packers.py | 7 ++++- .../tests/arrays/categorical/test_dtypes.py | 2 +- pandas/tests/dtypes/test_dtypes.py | 3 ++- pandas/tests/indexes/test_category.py | 2 +- pandas/tests/series/test_constructors.py | 27 +++++++++++++++++++ 8 files changed, 46 insertions(+), 7 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 94f98db005d3f..35d89dc1628fd 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -546,7 +546,7 @@ def update_dtype(self, dtype): msg = ("ordered=None is deprecated and will default to False " "in a future version; ordered=True must be explicitly " "passed in order to be retained") - warnings.warn(msg, FutureWarning, stacklevel=2) + warnings.warn(msg, FutureWarning, stacklevel=3) return CategoricalDtype(new_categories, new_ordered) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 76910f425836e..9812964fce5e5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -175,7 +175,10 @@ def is_copy(self, msg): def _validate_dtype(self, dtype): """ validate the passed dtype """ - if dtype is not None: + # GH 26336: don't convert 'category' to CategoricalDtype + if isinstance(dtype, str) and dtype == 'category': + pass + elif dtype is not None: dtype = pandas_dtype(dtype) # a compound dtype diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 863b9f7fb16d7..ca1d1bcda5a84 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -538,7 +538,10 @@ def sanitize_array(data, index, dtype=None, copy=False, Sanitize input data to an ndarray, copy if specified, coerce to the dtype if specified. """ - if dtype is not None: + # GH 26336: don't convert 'category' to CategoricalDtype + if isinstance(dtype, str) and dtype == 'category': + pass + elif dtype is not None: dtype = pandas_dtype(dtype) if isinstance(data, ma.MaskedArray): diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 1309bd1fef421..abe3a087a10f6 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -623,7 +623,12 @@ def decode(obj): return Interval(obj['left'], obj['right'], obj['closed']) elif typ == 'series': dtype = dtype_for(obj['dtype']) - pd_dtype = pandas_dtype(dtype) + + # GH 26336: don't convert 'category' to CategoricalDtype + if isinstance(dtype, str) and dtype == 'category': + pd_dtype = dtype + else: + pd_dtype = pandas_dtype(dtype) index = obj['index'] result = Series(unconvert(obj['data'], dtype, obj['compress']), diff --git a/pandas/tests/arrays/categorical/test_dtypes.py b/pandas/tests/arrays/categorical/test_dtypes.py index 5f261f29560ea..14ad3c4d5e860 100644 --- a/pandas/tests/arrays/categorical/test_dtypes.py +++ b/pandas/tests/arrays/categorical/test_dtypes.py @@ -165,7 +165,7 @@ def test_astype_category_ordered_none_deprecated(self): cdt1 = CategoricalDtype(categories=list('cdab'), ordered=True) cdt2 = CategoricalDtype(categories=list('cedafb')) cat = Categorical(list('abcdaba'), dtype=cdt1) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): cat.astype(cdt2) def test_iter_python_types(self): diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 9cf51dcd286ef..b5d5789a22c1e 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -819,7 +819,8 @@ def test_update_dtype(self, ordered_fixture, new_categories, new_ordered): # GH 26336 if new_ordered is None and ordered_fixture is True: - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): result = dtype.update_dtype(new_dtype) else: result = dtype.update_dtype(new_dtype) diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 3b140e9228879..63bea066d7b99 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -495,7 +495,7 @@ def test_astype_category_ordered_none_deprecated(self): cdt1 = CategoricalDtype(categories=list('cdab'), ordered=True) cdt2 = CategoricalDtype(categories=list('cedafb')) idx = CategoricalIndex(list('abcdaba'), dtype=cdt1) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): idx.astype(cdt2) def test_reindex_base(self): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index f2345a0822f6d..e6119828338ef 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -365,6 +365,33 @@ def test_constructor_categorical_dtype(self): dtype=CategoricalDtype(['a', 'b'], ordered=True)) tm.assert_series_equal(result, expected, check_categorical=True) + def test_constructor_categorical_string(self): + # GH 26336: the string 'category' maintains existing CategoricalDtype + cdt = CategoricalDtype(categories=list('dabc'), ordered=True) + expected = Series(list('abcabc'), dtype=cdt) + + # Series(Categorical, dtype='category') keeps existing dtype + cat = Categorical(list('abcabc'), dtype=cdt) + result = Series(cat, dtype='category') + tm.assert_series_equal(result, expected) + + # Series(Series[Categorical], dtype='category') keeps existing dtype + result = Series(result, dtype='category') + tm.assert_series_equal(result, expected) + + def test_categorical_ordered_none_deprecated(self): + # GH 26336 + cdt1 = CategoricalDtype(categories=list('cdab'), ordered=True) + cdt2 = CategoricalDtype(categories=list('cedafb')) + + cat = Categorical(list('abcdaba'), dtype=cdt1) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + Series(cat, dtype=cdt2) + + s = Series(cat) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + Series(s, dtype=cdt2) + def test_categorical_sideeffects_free(self): # Passing a categorical to a Series and then changing values in either # the series or the categorical should not change the values in the From b03eadd927d5301f9d8845df73af09b8710b98ee Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Tue, 21 May 2019 00:02:02 -0600 Subject: [PATCH 3/9] fix whatsnew --- doc/source/whatsnew/v0.25.0.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 3558b6af3a705..aabe5bc3a57c7 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -261,7 +261,8 @@ Deprecations - The deprecated ``.ix[]`` indexer now raises a more visible FutureWarning instead of DeprecationWarning (:issue:`26438`). - Deprecated the ``units=M`` (months) and ``units=Y`` (year) parameters for ``units`` of :func:`pandas.to_timedelta`, :func:`pandas.Timedelta` and :func:`pandas.TimedeltaIndex` (:issue:`16344`) - The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64` or :meth:`Timedelta.to_timedelta64`. (:issue:`24416`) - +- The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version. +- The default value ``ordered=None`` in :class:`~pandas.api.types.CategoricalDtype` has been deprecated in favor of ``ordered=False``. When converting between categorical types ``ordered=True`` must be explicitly passed in order to be preserved. (:issue:`26336`) .. _whatsnew_0250.prior_deprecations: From 77f171d9e6d2b388fb8fd2b4c6b424f4ba13b7fc Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Tue, 21 May 2019 00:06:10 -0600 Subject: [PATCH 4/9] update warning message --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/core/dtypes/dtypes.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index aabe5bc3a57c7..293681b02988b 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -264,6 +264,7 @@ Deprecations - The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version. - The default value ``ordered=None`` in :class:`~pandas.api.types.CategoricalDtype` has been deprecated in favor of ``ordered=False``. When converting between categorical types ``ordered=True`` must be explicitly passed in order to be preserved. (:issue:`26336`) + .. _whatsnew_0250.prior_deprecations: Removal of prior version deprecations/changes diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 35d89dc1628fd..160d2da18e4e4 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -543,8 +543,10 @@ def update_dtype(self, dtype): if new_ordered is None: new_ordered = self.ordered if self.ordered: - msg = ("ordered=None is deprecated and will default to False " - "in a future version; ordered=True must be explicitly " + msg = ("Constructing a CategoricalDtype without specifying " + "`ordered` or explicitly passing `ordered=None` is " + "deprecated and will default to `ordered=False` in a " + "future version; `ordered=True` must be explicitly " "passed in order to be retained") warnings.warn(msg, FutureWarning, stacklevel=3) From cf12a1f7b00f40c2a6e3ac251ab9fad6caebd12b Mon Sep 17 00:00:00 2001 From: jschendel Date: Tue, 21 May 2019 14:35:54 -0600 Subject: [PATCH 5/9] consolidate special casing --- pandas/core/generic.py | 5 +---- pandas/core/internals/construction.py | 5 +---- pandas/core/series.py | 8 +++++++- pandas/io/packers.py | 14 ++------------ 4 files changed, 11 insertions(+), 21 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9812964fce5e5..76910f425836e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -175,10 +175,7 @@ def is_copy(self, msg): def _validate_dtype(self, dtype): """ validate the passed dtype """ - # GH 26336: don't convert 'category' to CategoricalDtype - if isinstance(dtype, str) and dtype == 'category': - pass - elif dtype is not None: + if dtype is not None: dtype = pandas_dtype(dtype) # a compound dtype diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index ca1d1bcda5a84..863b9f7fb16d7 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -538,10 +538,7 @@ def sanitize_array(data, index, dtype=None, copy=False, Sanitize input data to an ndarray, copy if specified, coerce to the dtype if specified. """ - # GH 26336: don't convert 'category' to CategoricalDtype - if isinstance(dtype, str) and dtype == 'category': - pass - elif dtype is not None: + if dtype is not None: dtype = pandas_dtype(dtype) if isinstance(data, ma.MaskedArray): diff --git a/pandas/core/series.py b/pandas/core/series.py index 5b59fd6e7b38d..b1c632992de7a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -18,7 +18,7 @@ from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.common import ( - _is_unorderable_exception, ensure_platform_int, is_bool, + _is_unorderable_exception, ensure_platform_int, is_bool, is_categorical, is_categorical_dtype, is_datetime64_dtype, is_datetimelike, is_dict_like, is_extension_array_dtype, is_extension_type, is_hashable, is_integer, is_iterator, is_list_like, is_scalar, is_string_like, is_timedelta64_dtype) @@ -168,6 +168,12 @@ def __init__(self, data=None, index=None, dtype=None, name=None, if data is None: data = {} if dtype is not None: + # GH 26336: explicitly handle 'category' to avoid warning + # TODO: Remove after CategoricalDtype defaults to ordered=False + if (isinstance(dtype, str) and dtype == 'category' and + is_categorical(data)): + dtype = data.dtype + dtype = self._validate_dtype(dtype) if isinstance(data, MultiIndex): diff --git a/pandas/io/packers.py b/pandas/io/packers.py index abe3a087a10f6..5678bda02c844 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -623,19 +623,9 @@ def decode(obj): return Interval(obj['left'], obj['right'], obj['closed']) elif typ == 'series': dtype = dtype_for(obj['dtype']) - - # GH 26336: don't convert 'category' to CategoricalDtype - if isinstance(dtype, str) and dtype == 'category': - pd_dtype = dtype - else: - pd_dtype = pandas_dtype(dtype) - index = obj['index'] - result = Series(unconvert(obj['data'], dtype, obj['compress']), - index=index, - dtype=pd_dtype, - name=obj['name']) - return result + data = unconvert(obj['data'], dtype, obj['compress']) + return Series(data, index=index, dtype=dtype, name=obj['name']) elif typ == 'block_manager': axes = obj['axes'] From bc16e98eeca214c414eed65ee8637b0b3f334b5e Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Mon, 1 Jul 2019 11:15:13 -0600 Subject: [PATCH 6/9] .ordered warning and keep None as non-default --- pandas/core/arrays/categorical.py | 10 ++--- pandas/core/dtypes/dtypes.py | 53 +++++++++++++++--------- pandas/core/internals/construction.py | 2 +- pandas/tests/dtypes/test_dtypes.py | 25 +++++++---- pandas/tests/indexes/test_category.py | 12 +++--- pandas/tests/series/test_constructors.py | 14 ++++--- pandas/tests/series/test_dtypes.py | 13 +++--- 7 files changed, 80 insertions(+), 49 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 3ef2f41f25338..9835b9d03c64b 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -331,7 +331,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None, # sanitize input if is_categorical_dtype(values): if dtype.categories is None: - dtype = CategoricalDtype(values.categories, dtype.ordered) + dtype = CategoricalDtype(values.categories, dtype._ordered) elif not isinstance(values, (ABCIndexClass, ABCSeries)): # sanitize_array coerces np.nan to a string under certain versions # of numpy @@ -354,7 +354,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None, codes, categories = factorize(values, sort=True) except TypeError: codes, categories = factorize(values, sort=False) - if dtype.ordered: + if dtype._ordered: # raise, as we don't have a sortable data structure and so # the user should give us one by specifying categories raise TypeError("'values' is not ordered, please " @@ -367,7 +367,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None, "supported at this time") # we're inferring from values - dtype = CategoricalDtype(categories, dtype.ordered) + dtype = CategoricalDtype(categories, dtype._ordered) elif is_categorical_dtype(values): old_codes = (values._values.codes if isinstance(values, ABCSeries) @@ -432,7 +432,7 @@ def ordered(self): """ Whether the categories have an ordered relationship. """ - return self.dtype.ordered + return self.dtype._ordered @property def dtype(self) -> CategoricalDtype: @@ -846,7 +846,7 @@ def set_categories(self, new_categories, ordered=None, rename=False, """ inplace = validate_bool_kwarg(inplace, 'inplace') if ordered is None: - ordered = self.dtype.ordered + ordered = self.dtype._ordered new_dtype = CategoricalDtype(new_categories, ordered=ordered) cat = self if inplace else self.copy() diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 3dfa84a31bc2b..250fe38765165 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -17,6 +17,10 @@ str_type = str +# sentinel value used for the default value of ordered in the CategoricalDtype +# constructor to detect when ordered=None is explicitly passed (GH 26403) +sentinel = object() # type: object + def register_extension_dtype(cls: Type[ExtensionDtype], ) -> Type[ExtensionDtype]: @@ -214,7 +218,9 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): _metadata = ('categories', 'ordered') _cache = {} # type: Dict[str_type, PandasExtensionDtype] - def __init__(self, categories=None, ordered: Optional[bool] = None): + def __init__(self, + categories=None, + ordered: Union[None, bool, object] = sentinel): self._finalize(categories, ordered, fastpath=False) @classmethod @@ -334,7 +340,7 @@ def _finalize(self, fastpath: bool = False, ) -> None: - if ordered is not None: + if ordered is not None and ordered is not sentinel: self.validate_ordered(ordered) if categories is not None: @@ -342,7 +348,8 @@ def _finalize(self, fastpath=fastpath) self._categories = categories - self._ordered = ordered + self._ordered = ordered if ordered is not sentinel else None + self._ordered_from_sentinel = ordered is sentinel def __setstate__(self, state: Dict[str_type, Any]) -> None: # for pickle compat. __get_state__ is defined in the @@ -355,12 +362,12 @@ def __hash__(self) -> int: # _hash_categories returns a uint64, so use the negative # space for when we have unknown categories to avoid a conflict if self.categories is None: - if self.ordered: + if self._ordered: return -1 else: return -2 # We *do* want to include the real self.ordered here - return int(self._hash_categories(self.categories, self.ordered)) + return int(self._hash_categories(self.categories, self._ordered)) def __eq__(self, other: Any) -> bool: """ @@ -379,7 +386,7 @@ def __eq__(self, other: Any) -> bool: return other == self.name elif other is self: return True - elif not (hasattr(other, 'ordered') and hasattr(other, 'categories')): + elif not (hasattr(other, '_ordered') and hasattr(other, 'categories')): return False elif self.categories is None or other.categories is None: # We're forced into a suboptimal corner thanks to math and @@ -388,10 +395,10 @@ def __eq__(self, other: Any) -> bool: # CDT(., .) = CDT(None, False) and *all* # CDT(., .) = CDT(None, True). return True - elif self.ordered or other.ordered: + elif self._ordered or other._ordered: # At least one has ordered=True; equal if both have ordered=True # and the same values for categories in the same order. - return ((self.ordered == other.ordered) and + return ((self._ordered == other._ordered) and self.categories.equals(other.categories)) else: # Neither has ordered=True; equal if both have the same categories, @@ -406,7 +413,7 @@ def __repr__(self): data = "None, " else: data = self.categories._format_data(name=self.__class__.__name__) - return tpl.format(data, self.ordered) + return tpl.format(data, self._ordered) @staticmethod def _hash_categories(categories, ordered: Optional[bool] = True) -> int: @@ -534,25 +541,23 @@ def update_dtype(self, dtype: 'CategoricalDtype') -> 'CategoricalDtype': msg = ('a CategoricalDtype must be passed to perform an update, ' 'got {dtype!r}').format(dtype=dtype) raise ValueError(msg) - elif dtype.categories is not None and dtype.ordered is self.ordered: - return dtype # dtype is CDT: keep current categories/ordered if None new_categories = dtype.categories if new_categories is None: new_categories = self.categories - new_ordered = dtype.ordered - - # TODO(GH26336): remove this if block when ordered=None is removed + new_ordered = dtype._ordered + new_ordered_from_sentinel = dtype._ordered_from_sentinel if new_ordered is None: - new_ordered = self.ordered - if self.ordered: + # maintain existing ordered if new dtype has ordered=None + new_ordered = self._ordered + if self._ordered and new_ordered_from_sentinel: + # only warn if we'd actually change the existing behavior msg = ("Constructing a CategoricalDtype without specifying " - "`ordered` or explicitly passing `ordered=None` is " - "deprecated and will default to `ordered=False` in a " - "future version; `ordered=True` must be explicitly " - "passed in order to be retained") + "`ordered` will default to `ordered=False` in a future " + "version; `ordered=True` must be explicitly passed in " + "order to be retained") warnings.warn(msg, FutureWarning, stacklevel=3) return CategoricalDtype(new_categories, new_ordered) @@ -569,6 +574,14 @@ def ordered(self) -> Optional[bool]: """ Whether the categories have an ordered relationship. """ + # TODO: remove if block when ordered=None as default is deprecated + if self._ordered_from_sentinel and self._ordered is None: + # warn when accessing ordered if ordered=None and None was not + # explicitly passed to the constructor + msg = ("Constructing a CategoricalDtype without specifying " + "`ordered` will default to `ordered=False` in a future " + "version; `ordered=None` must be explicitly passed.") + warnings.warn(msg, FutureWarning, stacklevel=2) return self._ordered @property diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index bdfb854679a2c..1044f25a6bbcd 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -725,7 +725,7 @@ def _try_cast(arr, dtype, copy, raise_cast_failure): # We *do* allow casting to categorical, since we know # that Categorical is the only array type for 'category'. subarr = Categorical(arr, dtype.categories, - ordered=dtype.ordered) + ordered=dtype._ordered) elif is_extension_array_dtype(dtype): # create an extension array from its dtype array_type = dtype.construct_array_type()._from_sequence diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 1546924d4d99b..56f1a52eb99cd 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -10,7 +10,8 @@ is_datetime64tz_dtype, is_datetimetz, is_dtype_equal, is_interval_dtype, is_period, is_period_dtype, is_string_dtype) from pandas.core.dtypes.dtypes import ( - CategoricalDtype, DatetimeTZDtype, IntervalDtype, PeriodDtype, registry) + CategoricalDtype, DatetimeTZDtype, IntervalDtype, PeriodDtype, registry, + sentinel) import pandas as pd from pandas import ( @@ -54,7 +55,8 @@ def test_pickle(self): class TestCategoricalDtype(Base): def create(self): - return CategoricalDtype() + # TODO(GH 26403): Remove when default ordered becomes False + return CategoricalDtype(ordered=None) def test_pickle(self): # make sure our cache is NOT pickled @@ -675,7 +677,8 @@ def test_unordered_same(self, ordered): def test_categories(self): result = CategoricalDtype(['a', 'b', 'c']) tm.assert_index_equal(result.categories, pd.Index(['a', 'b', 'c'])) - assert result.ordered is None + with tm.assert_produces_warning(FutureWarning): + assert result.ordered is None def test_equal_but_different(self, ordered_fixture): c1 = CategoricalDtype([1, 2, 3]) @@ -804,7 +807,7 @@ def test_categorical_categories(self): @pytest.mark.parametrize('new_categories', [ list('abc'), list('cba'), list('wxyz'), None]) - @pytest.mark.parametrize('new_ordered', [True, False, None]) + @pytest.mark.parametrize('new_ordered', [True, False, None, sentinel]) def test_update_dtype(self, ordered_fixture, new_categories, new_ordered): dtype = CategoricalDtype(list('abc'), ordered_fixture) new_dtype = CategoricalDtype(new_categories, new_ordered) @@ -813,12 +816,12 @@ def test_update_dtype(self, ordered_fixture, new_categories, new_ordered): if expected_categories is None: expected_categories = dtype.categories - expected_ordered = new_dtype.ordered - if expected_ordered is None: + expected_ordered = new_ordered + if new_ordered is sentinel or new_ordered is None: expected_ordered = dtype.ordered # GH 26336 - if new_ordered is None and ordered_fixture is True: + if new_ordered is sentinel and ordered_fixture is True: with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = dtype.update_dtype(new_dtype) @@ -844,6 +847,14 @@ def test_update_dtype_errors(self, bad_dtype): with pytest.raises(ValueError, match=msg): dtype.update_dtype(bad_dtype) + @pytest.mark.parametrize('ordered', [sentinel, None, True, False]) + def test_ordered_none_default_deprecated(self, ordered): + # GH 26403: CDT.ordered only warns if ordered is not explicitly passed + dtype = CategoricalDtype(list('abc'), ordered=ordered) + warning = FutureWarning if ordered is sentinel else None + with tm.assert_produces_warning(warning): + dtype.ordered + @pytest.mark.parametrize('dtype', [ CategoricalDtype, diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 93cd84afff37f..21aa19f835495 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -5,7 +5,7 @@ from pandas._libs import index as libindex -from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.dtypes import CategoricalDtype, sentinel import pandas as pd from pandas import Categorical, IntervalIndex @@ -490,12 +490,14 @@ def test_astype_category(self, name, dtype_ordered, index_ordered): expected = index tm.assert_index_equal(result, expected) - def test_astype_category_ordered_none_deprecated(self): - # GH 26336 + @pytest.mark.parametrize('none, warning', [ + (None, None), (sentinel, FutureWarning)]) + def test_astype_category_ordered_none_deprecated(self, none, warning): + # GH 26336: only warn if None is not explicitly passed cdt1 = CategoricalDtype(categories=list('cdab'), ordered=True) - cdt2 = CategoricalDtype(categories=list('cedafb')) + cdt2 = CategoricalDtype(categories=list('cedafb'), ordered=none) idx = CategoricalIndex(list('abcdaba'), dtype=cdt1) - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(warning): idx.astype(cdt2) def test_reindex_base(self): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index cbbda661f29bc..57529c88c1b5b 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -12,12 +12,12 @@ from pandas.core.dtypes.common import ( is_categorical_dtype, is_datetime64tz_dtype) +from pandas.core.dtypes.dtypes import CategoricalDtype, sentinel import pandas as pd from pandas import ( Categorical, DataFrame, Index, IntervalIndex, MultiIndex, NaT, Series, Timestamp, date_range, isna, period_range, timedelta_range) -from pandas.api.types import CategoricalDtype from pandas.core.arrays import period_array import pandas.util.testing as tm from pandas.util.testing import assert_series_equal @@ -386,17 +386,19 @@ def test_constructor_categorical_string(self): result = Series(result, dtype='category') tm.assert_series_equal(result, expected) - def test_categorical_ordered_none_deprecated(self): - # GH 26336 + @pytest.mark.parametrize('none, warning', [ + (None, None), (sentinel, FutureWarning)]) + def test_categorical_ordered_none_deprecated(self, none, warning): + # GH 26336: only warn if None is not explicitly passed cdt1 = CategoricalDtype(categories=list('cdab'), ordered=True) - cdt2 = CategoricalDtype(categories=list('cedafb')) + cdt2 = CategoricalDtype(categories=list('cedafb'), ordered=none) cat = Categorical(list('abcdaba'), dtype=cdt1) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(warning, check_stacklevel=False): Series(cat, dtype=cdt2) s = Series(cat) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(warning, check_stacklevel=False): Series(s, dtype=cdt2) def test_categorical_sideeffects_free(self): diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 34552e0bbcf4d..eea2aef2ca026 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -8,10 +8,11 @@ from pandas._libs.tslibs import iNaT +from pandas.core.dtypes.dtypes import CategoricalDtype, sentinel + import pandas as pd from pandas import ( Categorical, DataFrame, Index, Series, Timedelta, Timestamp, date_range) -from pandas.api.types import CategoricalDtype import pandas.util.testing as tm @@ -233,12 +234,14 @@ def test_astype_categories_deprecation(self): result = s.astype('category', categories=['a', 'b'], ordered=True) tm.assert_series_equal(result, expected) - def test_astype_category_ordered_none_deprecated(self): - # GH 26336 + @pytest.mark.parametrize('none, warning', [ + (None, None), (sentinel, FutureWarning)]) + def test_astype_category_ordered_none_deprecated(self, none, warning): + # GH 26336: only warn if None is not explicitly passed cdt1 = CategoricalDtype(categories=list('cdab'), ordered=True) - cdt2 = CategoricalDtype(categories=list('cedafb')) + cdt2 = CategoricalDtype(categories=list('cedafb'), ordered=none) s = Series(list('abcdaba'), dtype=cdt1) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(warning, check_stacklevel=False): s.astype(cdt2) def test_astype_from_categorical(self): From 99b6a303482f6614e462e31d67752a227dbd44f1 Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Mon, 1 Jul 2019 12:58:44 -0600 Subject: [PATCH 7/9] fix typing and docs --- doc/source/whatsnew/v0.23.0.rst | 22 ++++++++++++++++------ pandas/core/dtypes/dtypes.py | 19 +++++++++++-------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst index 41c946cc9a559..62cf977d8c8ac 100644 --- a/doc/source/whatsnew/v0.23.0.rst +++ b/doc/source/whatsnew/v0.23.0.rst @@ -935,13 +935,23 @@ In previous versions, the default value for the ``ordered`` parameter was ``Fals New behavior: -.. ipython:: python +.. code-block:: ipython - from pandas.api.types import CategoricalDtype - cat = pd.Categorical(list('abcaba'), ordered=True, categories=list('cba')) - cat - cdt = CategoricalDtype(categories=list('cbad')) - cat.astype(cdt) + In [2]: from pandas.api.types import CategoricalDtype + + In [3]: cat = pd.Categorical(list('abcaba'), ordered=True, categories=list('cba')) + + In [4]: cat + Out[4]: + [a, b, c, a, b, a] + Categories (3, object): [c < b < a] + + In [5]: cdt = CategoricalDtype(categories=list('cbad')) + + In [6]: cat.astype(cdt) + Out[6]: + [a, b, c, a, b, a] + Categories (4, object): [c < b < a < d] Notice in the example above that the converted ``Categorical`` has retained ``ordered=True``. Had the default value for ``ordered`` remained as ``False``, the converted ``Categorical`` would have become unordered, despite ``ordered=False`` never being explicitly specified. To change the value of ``ordered``, explicitly pass it to the new dtype, e.g. ``CategoricalDtype(categories=list('cbad'), ordered=False)``. diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 250fe38765165..85c97699cf63f 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -17,10 +17,13 @@ str_type = str -# sentinel value used for the default value of ordered in the CategoricalDtype -# constructor to detect when ordered=None is explicitly passed (GH 26403) +# GH26403: sentinel value used for the default value of ordered in the +# CategoricalDtype constructor to detect when ordered=None is explicitly passed sentinel = object() # type: object +# TODO(GH26403): Replace with Optional[bool] or bool +OrderedType = Union[None, bool, object] + def register_extension_dtype(cls: Type[ExtensionDtype], ) -> Type[ExtensionDtype]: @@ -220,7 +223,7 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): def __init__(self, categories=None, - ordered: Union[None, bool, object] = sentinel): + ordered: OrderedType = sentinel): self._finalize(categories, ordered, fastpath=False) @classmethod @@ -236,7 +239,7 @@ def _from_fastpath(cls, def _from_categorical_dtype(cls, dtype: 'CategoricalDtype', categories=None, - ordered: Optional[bool] = None, + ordered: OrderedType = None, ) -> 'CategoricalDtype': if categories is ordered is None: return dtype @@ -336,7 +339,7 @@ def _from_values_or_dtype(cls, def _finalize(self, categories, - ordered: Optional[bool], + ordered: OrderedType, fastpath: bool = False, ) -> None: @@ -416,7 +419,7 @@ def __repr__(self): return tpl.format(data, self._ordered) @staticmethod - def _hash_categories(categories, ordered: Optional[bool] = True) -> int: + def _hash_categories(categories, ordered: OrderedType = True) -> int: from pandas.core.util.hashing import ( hash_array, _combine_hash_arrays, hash_tuples ) @@ -466,7 +469,7 @@ def construct_array_type(cls): return Categorical @staticmethod - def validate_ordered(ordered: bool) -> None: + def validate_ordered(ordered: OrderedType) -> None: """ Validates that we have a valid ordered parameter. If it is not a boolean, a TypeError will be raised. @@ -570,7 +573,7 @@ def categories(self): return self._categories @property - def ordered(self) -> Optional[bool]: + def ordered(self) -> OrderedType: """ Whether the categories have an ordered relationship. """ From d999bd8c0c9b15cddd97bcb3d0e8241db771819e Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Mon, 1 Jul 2019 13:11:39 -0600 Subject: [PATCH 8/9] sentinel --> ordered_sentinel --- pandas/core/dtypes/dtypes.py | 10 +++++----- pandas/tests/dtypes/test_dtypes.py | 15 ++++++++------- pandas/tests/indexes/test_category.py | 4 ++-- pandas/tests/series/test_constructors.py | 4 ++-- pandas/tests/series/test_dtypes.py | 4 ++-- 5 files changed, 19 insertions(+), 18 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 85c97699cf63f..fc2057b74122c 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -19,7 +19,7 @@ # GH26403: sentinel value used for the default value of ordered in the # CategoricalDtype constructor to detect when ordered=None is explicitly passed -sentinel = object() # type: object +ordered_sentinel = object() # type: object # TODO(GH26403): Replace with Optional[bool] or bool OrderedType = Union[None, bool, object] @@ -223,7 +223,7 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): def __init__(self, categories=None, - ordered: OrderedType = sentinel): + ordered: OrderedType = ordered_sentinel): self._finalize(categories, ordered, fastpath=False) @classmethod @@ -343,7 +343,7 @@ def _finalize(self, fastpath: bool = False, ) -> None: - if ordered is not None and ordered is not sentinel: + if ordered is not None and ordered is not ordered_sentinel: self.validate_ordered(ordered) if categories is not None: @@ -351,8 +351,8 @@ def _finalize(self, fastpath=fastpath) self._categories = categories - self._ordered = ordered if ordered is not sentinel else None - self._ordered_from_sentinel = ordered is sentinel + self._ordered = ordered if ordered is not ordered_sentinel else None + self._ordered_from_sentinel = ordered is ordered_sentinel def __setstate__(self, state: Dict[str_type, Any]) -> None: # for pickle compat. __get_state__ is defined in the diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 56f1a52eb99cd..ad2195af73d96 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -10,8 +10,8 @@ is_datetime64tz_dtype, is_datetimetz, is_dtype_equal, is_interval_dtype, is_period, is_period_dtype, is_string_dtype) from pandas.core.dtypes.dtypes import ( - CategoricalDtype, DatetimeTZDtype, IntervalDtype, PeriodDtype, registry, - sentinel) + CategoricalDtype, DatetimeTZDtype, IntervalDtype, PeriodDtype, + ordered_sentinel, registry) import pandas as pd from pandas import ( @@ -807,7 +807,8 @@ def test_categorical_categories(self): @pytest.mark.parametrize('new_categories', [ list('abc'), list('cba'), list('wxyz'), None]) - @pytest.mark.parametrize('new_ordered', [True, False, None, sentinel]) + @pytest.mark.parametrize('new_ordered', [ + True, False, None, ordered_sentinel]) def test_update_dtype(self, ordered_fixture, new_categories, new_ordered): dtype = CategoricalDtype(list('abc'), ordered_fixture) new_dtype = CategoricalDtype(new_categories, new_ordered) @@ -817,11 +818,11 @@ def test_update_dtype(self, ordered_fixture, new_categories, new_ordered): expected_categories = dtype.categories expected_ordered = new_ordered - if new_ordered is sentinel or new_ordered is None: + if new_ordered is ordered_sentinel or new_ordered is None: expected_ordered = dtype.ordered # GH 26336 - if new_ordered is sentinel and ordered_fixture is True: + if new_ordered is ordered_sentinel and ordered_fixture is True: with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = dtype.update_dtype(new_dtype) @@ -847,11 +848,11 @@ def test_update_dtype_errors(self, bad_dtype): with pytest.raises(ValueError, match=msg): dtype.update_dtype(bad_dtype) - @pytest.mark.parametrize('ordered', [sentinel, None, True, False]) + @pytest.mark.parametrize('ordered', [ordered_sentinel, None, True, False]) def test_ordered_none_default_deprecated(self, ordered): # GH 26403: CDT.ordered only warns if ordered is not explicitly passed dtype = CategoricalDtype(list('abc'), ordered=ordered) - warning = FutureWarning if ordered is sentinel else None + warning = FutureWarning if ordered is ordered_sentinel else None with tm.assert_produces_warning(warning): dtype.ordered diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 21aa19f835495..3f2c4f3c2c69d 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -5,7 +5,7 @@ from pandas._libs import index as libindex -from pandas.core.dtypes.dtypes import CategoricalDtype, sentinel +from pandas.core.dtypes.dtypes import CategoricalDtype, ordered_sentinel import pandas as pd from pandas import Categorical, IntervalIndex @@ -491,7 +491,7 @@ def test_astype_category(self, name, dtype_ordered, index_ordered): tm.assert_index_equal(result, expected) @pytest.mark.parametrize('none, warning', [ - (None, None), (sentinel, FutureWarning)]) + (None, None), (ordered_sentinel, FutureWarning)]) def test_astype_category_ordered_none_deprecated(self, none, warning): # GH 26336: only warn if None is not explicitly passed cdt1 = CategoricalDtype(categories=list('cdab'), ordered=True) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 57529c88c1b5b..663d5ae505303 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -12,7 +12,7 @@ from pandas.core.dtypes.common import ( is_categorical_dtype, is_datetime64tz_dtype) -from pandas.core.dtypes.dtypes import CategoricalDtype, sentinel +from pandas.core.dtypes.dtypes import CategoricalDtype, ordered_sentinel import pandas as pd from pandas import ( @@ -387,7 +387,7 @@ def test_constructor_categorical_string(self): tm.assert_series_equal(result, expected) @pytest.mark.parametrize('none, warning', [ - (None, None), (sentinel, FutureWarning)]) + (None, None), (ordered_sentinel, FutureWarning)]) def test_categorical_ordered_none_deprecated(self, none, warning): # GH 26336: only warn if None is not explicitly passed cdt1 = CategoricalDtype(categories=list('cdab'), ordered=True) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index eea2aef2ca026..f919830bd8eeb 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -8,7 +8,7 @@ from pandas._libs.tslibs import iNaT -from pandas.core.dtypes.dtypes import CategoricalDtype, sentinel +from pandas.core.dtypes.dtypes import CategoricalDtype, ordered_sentinel import pandas as pd from pandas import ( @@ -235,7 +235,7 @@ def test_astype_categories_deprecation(self): tm.assert_series_equal(result, expected) @pytest.mark.parametrize('none, warning', [ - (None, None), (sentinel, FutureWarning)]) + (None, None), (ordered_sentinel, FutureWarning)]) def test_astype_category_ordered_none_deprecated(self, none, warning): # GH 26336: only warn if None is not explicitly passed cdt1 = CategoricalDtype(categories=list('cdab'), ordered=True) From fdb57708a2e26a2be1c8e9a334e16d13aae6e007 Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Mon, 1 Jul 2019 14:03:19 -0600 Subject: [PATCH 9/9] add more detail to warning --- pandas/core/dtypes/dtypes.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index fc2057b74122c..d8d910a16e32a 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -559,8 +559,9 @@ def update_dtype(self, dtype: 'CategoricalDtype') -> 'CategoricalDtype': # only warn if we'd actually change the existing behavior msg = ("Constructing a CategoricalDtype without specifying " "`ordered` will default to `ordered=False` in a future " - "version; `ordered=True` must be explicitly passed in " - "order to be retained") + "version, which will cause the resulting categorical's " + "`ordered` attribute to change to False; `ordered=True`" + " must be explicitly passed in order to be retained") warnings.warn(msg, FutureWarning, stacklevel=3) return CategoricalDtype(new_categories, new_ordered)