pandas-dev · jreback · Jun 21, 2018 · May 24, 2018 · May 24, 2018 · Jun 12, 2018
diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt
@@ -100,6 +100,7 @@ Bug Fixes
 - Bug in :meth:`Series.str.replace()` where the method throws `TypeError` on Python 3.5.2 (:issue: `21078`)
 - Bug in :class:`Timedelta`: where passing a float with a unit would prematurely round the float precision (:issue: `14156`)
 - Bug in :func:`pandas.testing.assert_index_equal` which raised ``AssertionError`` incorrectly, when comparing two :class:`CategoricalIndex` objects with param ``check_categorical=False`` (:issue:`19776`)
+- Bug in rendering :class:`Series` with ``Categorical`` dtype in rare conditions under Python 2.7 (:issue:`21002`)
 
 **Sparse**
 

diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx
@@ -8,8 +8,7 @@ import numpy as np
 from numpy cimport ndarray, uint8_t, uint32_t, uint64_t
 
 from util cimport _checknull
-from cpython cimport (PyString_Check,
-                      PyBytes_Check,
+from cpython cimport (PyBytes_Check,
                       PyUnicode_Check)
 from libc.stdlib cimport malloc, free
 
@@ -62,9 +61,7 @@ def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'):
     cdef list datas = []
     for i in range(n):
         val = arr[i]
-        if PyString_Check(val):
-            data = <bytes>val.encode(encoding)
-        elif PyBytes_Check(val):
+        if PyBytes_Check(val):
             data = <bytes>val
         elif PyUnicode_Check(val):
             data = <bytes>val.encode(encoding)

diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py
@@ -202,6 +202,42 @@ def test_latex_repr(self):
 
 class TestCategoricalRepr(object):
 
+    def test_categorical_repr_unicode(self):
+        # GH#21002 if len(index) > 60, sys.getdefaultencoding()=='ascii',
+        # and we are working in PY2, then rendering a Categorical could raise
+        # UnicodeDecodeError by trying to decode when it shouldn't
+        from pandas.core.base import StringMixin
+
+        class County(StringMixin):
+            name = u'San Sebastián'
+            state = u'PR'
+
+            def __unicode__(self):
+                return self.name + u', ' + self.state
+
+        cat = pd.Categorical([County() for n in range(61)])
+        idx = pd.Index(cat)
+        ser = idx.to_series()
+
+        if compat.PY3:
+            # no reloading of sys, just check that the default (utf8) works
+            # as expected
+            repr(ser)
+            str(ser)
+
+        else:
+            # set sys.defaultencoding to ascii, then change it back after
+            # the test
+            enc = sys.getdefaultencoding()
+            reload(sys)  # noqa:F821
+            sys.setdefaultencoding('ascii')
+            try:
+                repr(ser)
+                str(ser)
+            finally:
+                # restore encoding
+                sys.setdefaultencoding(enc)
+
     def test_categorical_repr(self):
         a = Series(Categorical([1, 2, 3, 4]))
         exp = u("0    1\n1    2\n2    3\n3    4\n" +