diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 812bc2e031d78..1c9e876d77bf8 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -157,11 +157,10 @@ The values have been correctly interpreted as integers. The ``.dtype`` property of a ``Categorical``, ``CategoricalIndex`` or a ``Series`` with categorical type will now return an instance of -``CategoricalDtype``. For the most part, this is backwards compatible, though -the string repr has changed. If you were previously using ``str(s.dtype) == -'category'`` to detect categorical data, switch to -:func:`pandas.api.types.is_categorical_dtype`, which is compatible with the old -and new ``CategoricalDtype``. +``CategoricalDtype``. This change should be backwards compatible, though the +repr has changed. ``str(CategoricalDtype())`` is still the string +``'category'``, but the preferred way to detect categorical data is to use +:func:`pandas.api.types.is_categorical_dtype`. See the :ref:`CategoricalDtype docs ` for more. diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index d2487905caced..4d97b7d17a6dc 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -220,7 +220,7 @@ def __eq__(self, other): # both unordered; this could probably be optimized / cached return hash(self) == hash(other) - def __unicode__(self): + def __repr__(self): tpl = u'CategoricalDtype(categories={}ordered={})' if self.categories is None: data = u"None, " diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index be3e5fdc467d3..0b9e2c9fe5ffc 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import re import pytest from itertools import product @@ -649,3 +650,10 @@ def test_from_categorical_dtype_both(self): result = CategoricalDtype._from_categorical_dtype( c1, categories=[1, 2], ordered=False) assert result == CategoricalDtype([1, 2], ordered=False) + + def test_str_vs_repr(self): + c1 = CategoricalDtype(['a', 'b']) + assert str(c1) == 'category' + # Py2 will have unicode prefixes + pat = r"CategoricalDtype\(categories=\[.*\], ordered=False\)" + assert re.match(pat, repr(c1)) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 9f5e4f2ac4b6e..6495d748e3823 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1784,7 +1784,8 @@ class TestNLargestNSmallest(object): # not supported on some archs # Series([3., 2, 1, 2, 5], dtype='complex256'), Series([3., 2, 1, 2, 5], dtype='complex128'), - Series(list('abcde'))]) + Series(list('abcde')), + Series(list('abcde'), dtype='category')]) def test_error(self, r): dt = r.dtype msg = ("Cannot use method 'n(larg|small)est' with " @@ -1795,16 +1796,6 @@ def test_error(self, r): with tm.assert_raises_regex(TypeError, msg): method(arg) - def test_error_categorical_dtype(self): - # same as test_error, but regex hard to escape properly - msg = ("Cannot use method 'n(larg|small)est' with dtype " - "CategoricalDtype.+") - with tm.assert_raises_regex(TypeError, msg): - Series(list('ab'), dtype='category').nlargest(2) - - with tm.assert_raises_regex(TypeError, msg): - Series(list('ab'), dtype='category').nsmallest(2) - @pytest.mark.parametrize( "s", [v for k, v in s_main_dtypes().iteritems()])