pandas-dev · jreback · Jun 24, 2020 · May 17, 2020 · May 17, 2020 · May 21, 2020
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -825,6 +825,7 @@ Categorical
 - Bug when passing categorical data to :class:`Index` constructor along with ``dtype=object`` incorrectly returning a :class:`CategoricalIndex` instead of object-dtype :class:`Index` (:issue:`32167`)
 - Bug where :class:`Categorical` comparison operator ``__ne__`` would incorrectly evaluate to ``False`` when either element was missing (:issue:`32276`)
 - :meth:`Categorical.fillna` now accepts :class:`Categorical` ``other`` argument (:issue:`32420`)
+- Repr of :class:`Categorical` was not distinguishing between int and str (:issue:`33676`)
 
 Datetimelike
 ^^^^^^^^^^^^

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -604,8 +604,8 @@ def factorize(
     >>> codes
     array([0, 0, 1]...)
     >>> uniques
-    [a, c]
-    Categories (3, object): [a, b, c]
+    ['a', 'c']
+    Categories (3, object): ['a', 'b', 'c']
 
     Notice that ``'b'`` is in ``uniques.categories``, despite not being
     present in ``cat.values``.

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -847,13 +847,13 @@ def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, "ExtensionArray"
         >>> cat = pd.Categorical(['a', 'b', 'c'])
         >>> cat
         [a, b, c]
-        Categories (3, object): [a, b, c]
+        Categories (3, object): ['a', 'b', 'c']
         >>> cat.repeat(2)
         [a, a, b, b, c, c]
-        Categories (3, object): [a, b, c]
+        Categories (3, object): ['a', 'b', 'c']
         >>> cat.repeat([1, 2, 3])
         [a, b, b, c, c, c]
-        Categories (3, object): [a, b, c]
+        Categories (3, object): ['a', 'b', 'c']
         """
 
     @Substitution(klass="ExtensionArray")

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -1,3 +1,5 @@
+from csv import QUOTE_NONNUMERIC
+from functools import partial
 import operator
 from shutil import get_terminal_size
 from typing import Dict, Hashable, List, Type, Union, cast
@@ -276,7 +278,7 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject):
 
     >>> pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c'])
     [a, b, c, a, b, c]
-    Categories (3, object): [a, b, c]
+    Categories (3, object): ['a', 'b', 'c']
 
     Ordered `Categoricals` can be sorted according to the custom order
     of the categories and can have a min and max value.
@@ -1129,10 +1131,10 @@ def map(self, mapper):
         >>> cat = pd.Categorical(['a', 'b', 'c'])
         >>> cat
         [a, b, c]
-        Categories (3, object): [a, b, c]
+        Categories (3, object): ['a', 'b', 'c']
         >>> cat.map(lambda x: x.upper())
         [A, B, C]
-        Categories (3, object): [A, B, C]
+        Categories (3, object): ['A', 'B', 'C']
         >>> cat.map({'a': 'first', 'b': 'second', 'c': 'third'})
         [first, second, third]
         Categories (3, object): [first, second, third]
@@ -1872,13 +1874,16 @@ def _repr_categories(self):
         )
         from pandas.io.formats import format as fmt
 
+        format_array = partial(
+            fmt.format_array, formatter=None, quoting=QUOTE_NONNUMERIC
+        )
         if len(self.categories) > max_categories:
             num = max_categories // 2
-            head = fmt.format_array(self.categories[:num], None)
-            tail = fmt.format_array(self.categories[-num:], None)
+            head = format_array(self.categories[:num])
+            tail = format_array(self.categories[-num:])
             category_strs = head + ["..."] + tail
         else:
-            category_strs = fmt.format_array(self.categories, None)
+            category_strs = format_array(self.categories)
 
         # Strip all leading spaces, which format_array adds for columns...
         category_strs = [x.strip() for x in category_strs]
@@ -2052,7 +2057,7 @@ def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]:
         >>> c = pd.Categorical(list('aabca'))
         >>> c
         [a, a, b, c, a]
-        Categories (3, object): [a, b, c]
+        Categories (3, object): ['a', 'b', 'c']
         >>> c.categories
         Index(['a', 'b', 'c'], dtype='object')
         >>> c.codes
@@ -2465,7 +2470,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
     4    c
     5    c
     dtype: category
-    Categories (3, object): [a, b, c]
+    Categories (3, object): ['a', 'b', 'c']
 
     >>> s.cat.categories
     Index(['a', 'b', 'c'], dtype='object')
@@ -2519,7 +2524,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
     4    c
     5    c
     dtype: category
-    Categories (3, object): [a, b, c]
+    Categories (3, object): ['a', 'b', 'c']
 
     >>> s.cat.set_categories(list("abcde"))
     0    a
@@ -2549,7 +2554,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
     4    c
     5    c
     dtype: category
-    Categories (3, object): [a, b, c]
+    Categories (3, object): ['a', 'b', 'c']
     """
 
     def __init__(self, data):

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -743,8 +743,8 @@ def array(self) -> ExtensionArray:
 
         >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a']))
         >>> ser.array
-        [a, b, a]
-        Categories (2, object): [a, b]
+        ['a', 'b', 'a']
+        Categories (2, object): ['a', 'b']
         """
         raise AbstractMethodError(self)
 
@@ -1481,8 +1481,8 @@ def factorize(self, sort=False, na_sentinel=-1):
         ...     ['apple', 'bread', 'bread', 'cheese', 'milk'], ordered=True
         ... )
         >>> ser
-        [apple, bread, bread, cheese, milk]
-        Categories (4, object): [apple < bread < cheese < milk]
+        ['apple', 'bread', 'bread', 'cheese', 'milk']
+        Categories (4, object): ['apple' < 'bread' < 'cheese' < 'milk']
 
         >>> ser.searchsorted('bread')
         1

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
@@ -217,15 +217,15 @@ def array(
     You can use the string alias for `dtype`
 
     >>> pd.array(['a', 'b', 'a'], dtype='category')
-    [a, b, a]
-    Categories (2, object): [a, b]
+    ['a', 'b', 'a']
+    Categories (2, object): ['a', 'b']
 
     Or specify the actual dtype
 
     >>> pd.array(['a', 'b', 'a'],
     ...          dtype=pd.CategoricalDtype(['a', 'b', 'c'], ordered=True))
-    [a, b, a]
-    Categories (3, object): [a < b < c]
+    ['a', 'b', 'a']
+    Categories (3, object): ['a' < 'b' < 'c']
 
     If pandas does not infer a dedicated extension type a
     :class:`arrays.PandasArray` is returned.
@@ -357,8 +357,8 @@ def extract_array(obj, extract_numpy: bool = False):
     Examples
     --------
     >>> extract_array(pd.Series(['a', 'b', 'c'], dtype='category'))
-    [a, b, c]
-    Categories (3, object): [a, b, c]
+    ['a', 'b', 'c']
+    Categories (3, object): ['a', 'b', 'c']
 
     Other objects like lists, arrays, and DataFrames are just passed through.
 

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
@@ -237,7 +237,7 @@ def union_categoricals(
 
     >>> union_categoricals([a, b], sort_categories=True)
     [b, c, a, b]
-    Categories (3, object): [a, b, c]
+    Categories (3, object): ['a', 'b', 'c']
 
     `union_categoricals` also works with the case of combining two
     categoricals of the same categories and order information (e.g. what
@@ -267,7 +267,7 @@ def union_categoricals(
     >>> b = pd.Categorical(["c", "b", "a"], ordered=True)
     >>> union_categoricals([a, b], ignore_order=True)
     [a, b, c, c, b, a]
-    Categories (3, object): [a, b, c]
+    Categories (3, object): ['a', 'b', 'c']
 
     `union_categoricals` also works with a `CategoricalIndex`, or `Series`
     containing categorical data, but note that the resulting array will

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -524,8 +524,8 @@ def values(self):
         array(['a', 'a', 'b', 'c'], dtype=object)
 
         >>> pd.Series(list('aabc')).astype('category').values
-        [a, a, b, c]
-        Categories (3, object): [a, b, c]
+        ['a', 'a', 'b', 'c']
+        Categories (3, object): ['a', 'b', 'c']
 
         Timezone aware datetime data is converted to UTC:
 
@@ -1850,15 +1850,15 @@ def unique(self):
         appearance.
 
         >>> pd.Series(pd.Categorical(list('baabc'))).unique()
-        [b, a, c]
-        Categories (3, object): [b, a, c]
+        ['b', 'a', 'c']
+        Categories (3, object): ['b', 'a', 'c']
 
         An ordered Categorical preserves the category ordering.
 
         >>> pd.Series(pd.Categorical(list('baabc'), categories=list('abc'),
         ...                          ordered=True)).unique()
-        [b, a, c]
-        Categories (3, object): [a < b < c]
+        ['b', 'a', 'c']
+        Categories (3, object): ['a' < 'b' < 'c']
         """
         result = super().unique()
         return result

diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -4,6 +4,7 @@
 """
 
 from contextlib import contextmanager
+from csv import QUOTE_NONE, QUOTE_NONNUMERIC
 from datetime import tzinfo
 import decimal
 from functools import partial
@@ -176,6 +177,7 @@ def __init__(
         self.na_rep = na_rep
         self.length = length
         self.footer = footer
+        self.quoting = QUOTE_NONNUMERIC
 
     def _get_footer(self) -> str:
         footer = ""
@@ -200,6 +202,7 @@ def _get_formatted_values(self) -> List[str]:
             None,
             float_format=None,
             na_rep=self.na_rep,
+            quoting=self.quoting,
         )
 
     def to_string(self) -> str:
@@ -1109,6 +1112,7 @@ def format_array(
     justify: str = "right",
     decimal: str = ".",
     leading_space: Optional[bool] = None,
+    quoting: Optional[int] = None,
 ) -> List[str]:
     """
     Format an array for printing.
@@ -1171,6 +1175,7 @@ def format_array(
         justify=justify,
         decimal=decimal,
         leading_space=leading_space,
+        quoting=quoting,
     )
 
     return fmt_obj.get_result()
@@ -1216,11 +1221,15 @@ def _format_strings(self) -> List[str]:
         else:
             float_format = self.float_format
 
-        formatter = (
-            self.formatter
-            if self.formatter is not None
-            else (lambda x: pprint_thing(x, escape_chars=("\t", "\r", "\n")))
-        )
+        if self.formatter is not None:
+            formatter = self.formatter
+        else:
+            quote_strings = self.quoting is not None and self.quoting != QUOTE_NONE
+            formatter = partial(
+                pprint_thing,
+                escape_chars=("\t", "\r", "\n"),
+                quote_strings=quote_strings,
+            )
 
         def _format(x):
             if self.na_rep is not None and is_scalar(x) and isna(x):

diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py
@@ -14,7 +14,10 @@
 
 class TestCategoricalReprWithFactor(TestCategorical):
     def test_print(self):
-        expected = ["[a, b, b, a, a, c, c, c]", "Categories (3, object): [a < b < c]"]
+        expected = [
+            "['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
+            "Categories (3, object): ['a' < 'b' < 'c']",
+        ]
         expected = "\n".join(expected)
         actual = repr(self.factor)
         assert actual == expected
@@ -24,9 +27,9 @@ class TestCategoricalRepr:
     def test_big_print(self):
         factor = Categorical([0, 1, 2, 0, 1, 2] * 100, ["a", "b", "c"], fastpath=True)
         expected = [
-            "[a, b, c, a, b, ..., b, c, a, b, c]",
+            "['a', 'b', 'c', 'a', 'b', ..., 'b', 'c', 'a', 'b', 'c']",
             "Length: 600",
-            "Categories (3, object): [a, b, c]",
+            "Categories (3, object): ['a', 'b', 'c']",
         ]
         expected = "\n".join(expected)
 
@@ -36,13 +39,13 @@ def test_big_print(self):
 
     def test_empty_print(self):
         factor = Categorical([], ["a", "b", "c"])
-        expected = "[], Categories (3, object): [a, b, c]"
+        expected = "[], Categories (3, object): ['a', 'b', 'c']"
         actual = repr(factor)
         assert actual == expected
 
         assert expected == actual
         factor = Categorical([], ["a", "b", "c"], ordered=True)
-        expected = "[], Categories (3, object): [a < b < c]"
+        expected = "[], Categories (3, object): ['a' < 'b' < 'c']"
         actual = repr(factor)
         assert expected == actual
 
@@ -64,17 +67,17 @@ def test_print_none_width(self):
     def test_unicode_print(self):
         c = Categorical(["aaaaa", "bb", "cccc"] * 20)
         expected = """\
-[aaaaa, bb, cccc, aaaaa, bb, ..., bb, cccc, aaaaa, bb, cccc]
+['aaaaa', 'bb', 'cccc', 'aaaaa', 'bb', ..., 'bb', 'cccc', 'aaaaa', 'bb', 'cccc']
 Length: 60
-Categories (3, object): [aaaaa, bb, cccc]"""
+Categories (3, object): ['aaaaa', 'bb', 'cccc']"""
 
         assert repr(c) == expected
 
         c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20)
         expected = """\
-[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう]
+['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう']
 Length: 60
-Categories (3, object): [ああああ, いいいいい, ううううううう]"""  # noqa
+Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']"""  # noqa
 
         assert repr(c) == expected
 
@@ -83,9 +86,9 @@ def test_unicode_print(self):
         with option_context("display.unicode.east_asian_width", True):
 
             c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20)
-            expected = """[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう]
+            expected = """['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう']
 Length: 60
-Categories (3, object): [ああああ, いいいいい, ううううううう]"""  # noqa
+Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']"""  # noqa
 
             assert repr(c) == expected
 
@@ -523,3 +526,9 @@ def test_categorical_index_repr_timedelta_ordered(self):
                  categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=True, dtype='category')"""  # noqa
 
         assert repr(i) == exp
+
+    def test_categorical_str_repr(self):
+        # GH 33676
+        result = repr(Categorical([1, "2", 3, 4]))
+        expected = "[1, '2', 3, 4]\nCategories (4, object): [1, 3, 4, '2']"
+        assert result == expected
diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py
@@ -270,7 +270,7 @@ def test_categorical_repr(self):
             "0     a\n1     b\n"
             + "     ..\n"
             + "48    a\n49    b\n"
-            + "Length: 50, dtype: category\nCategories (2, object): [a, b]"
+            + "Length: 50, dtype: category\nCategories (2, object): ['a', 'b']"
         )
         with option_context("display.max_rows", 5):
             assert exp == repr(a)
@@ -279,7 +279,7 @@ def test_categorical_repr(self):
         a = Series(Categorical(["a", "b"], categories=levs, ordered=True))
         exp = (
             "0    a\n1    b\n" + "dtype: category\n"
-            "Categories (26, object): [a < b < c < d ... w < x < y < z]"
+            "Categories (26, object): ['a' < 'b' < 'c' < 'd' ... 'w' < 'x' < 'y' < 'z']"
         )
         assert exp == a.__str__()
 

diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py
@@ -185,10 +185,10 @@ def test_series_equal_categorical_values_mismatch(check_less_precise):
 
 Series values are different \\(66\\.66667 %\\)
 \\[index\\]: \\[0, 1, 2\\]
-\\[left\\]:  \\[a, b, c\\]
-Categories \\(3, object\\): \\[a, b, c\\]
-\\[right\\]: \\[a, c, b\\]
-Categories \\(3, object\\): \\[a, b, c\\]"""
+\\[left\\]:  \\['a', 'b', 'c'\\]
+Categories \\(3, object\\): \\['a', 'b', 'c'\\]
+\\[right\\]: \\['a', 'c', 'b'\\]
+Categories \\(3, object\\): \\['a', 'b', 'c'\\]"""
 
     s1 = Series(Categorical(["a", "b", "c"]))
     s2 = Series(Categorical(["a", "c", "b"]))