pandas-dev · jreback · Jun 24, 2020 · May 17, 2020 · May 17, 2020 · May 21, 2020
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -997,6 +997,7 @@ I/O
 - Bug in :meth:`~SQLDatabase.execute` was raising a ``ProgrammingError`` for some DB-API drivers when the SQL statement contained the `%` character and no parameters were present (:issue:`34211`)
 - Bug in :meth:`~pandas.io.stata.StataReader` which resulted in categorical variables with difference dtypes when reading data using an iterator. (:issue:`31544`)
 - :meth:`HDFStore.keys` has now an optional `include` parameter that allows the retrieval of all native HDF5 table names (:issue:`29916`)
+- Repr of :class:`Categorical` was not distinguishing between int and str (:issue:`33676`)
 
 Plotting
 ^^^^^^^^

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -1,3 +1,4 @@
+from csv import QUOTE_NONNUMERIC
 import operator
 from shutil import get_terminal_size
 from typing import Dict, Hashable, List, Type, Union, cast
@@ -1874,11 +1875,17 @@ def _repr_categories(self):
 
         if len(self.categories) > max_categories:
             num = max_categories // 2
-            head = fmt.format_array(self.categories[:num], None)
-            tail = fmt.format_array(self.categories[-num:], None)
+            head = fmt.format_array(
+                self.categories[:num], None, quoting=QUOTE_NONNUMERIC
+            )
+            tail = fmt.format_array(
+                self.categories[-num:], None, quoting=QUOTE_NONNUMERIC
+            )
             category_strs = head + ["..."] + tail
         else:
-            category_strs = fmt.format_array(self.categories, None)
+            category_strs = fmt.format_array(
+                self.categories, None, quoting=QUOTE_NONNUMERIC
+            )
 
         # Strip all leading spaces, which format_array adds for columns...
         category_strs = [x.strip() for x in category_strs]
@@ -1921,7 +1928,7 @@ def _get_repr(self, length=True, na_rep="NaN", footer=True) -> str:
         from pandas.io.formats import format as fmt
 
         formatter = fmt.CategoricalFormatter(
-            self, length=length, na_rep=na_rep, footer=footer
+            self, length=length, na_rep=na_rep, footer=footer, quoting=QUOTE_NONNUMERIC
         )
         result = formatter.to_string()
         return str(result)

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -743,8 +743,8 @@ def array(self) -> ExtensionArray:
 
         >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a']))
         >>> ser.array
-        [a, b, a]
-        Categories (2, object): [a, b]
+        ['a', 'b', 'a']
+        Categories (2, object): ['a', 'b']
         """
         raise AbstractMethodError(self)
 

diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -4,6 +4,7 @@
 """
 
 from contextlib import contextmanager
+from csv import QUOTE_NONE
 from datetime import tzinfo
 import decimal
 from functools import partial
@@ -170,12 +171,14 @@ def __init__(
         length: bool = True,
         na_rep: str = "NaN",
         footer: bool = True,
+        quoting: Optional[int] = None,
     ):
         self.categorical = categorical
         self.buf = buf if buf is not None else StringIO("")
         self.na_rep = na_rep
         self.length = length
         self.footer = footer
+        self.quoting = quoting
 
     def _get_footer(self) -> str:
         footer = ""
@@ -200,6 +203,7 @@ def _get_formatted_values(self) -> List[str]:
             None,
             float_format=None,
             na_rep=self.na_rep,
+            quoting=self.quoting,
         )
 
     def to_string(self) -> str:
@@ -1109,6 +1113,7 @@ def format_array(
     justify: str = "right",
     decimal: str = ".",
     leading_space: Optional[bool] = None,
+    quoting: Optional[int] = None,
 ) -> List[str]:
     """
     Format an array for printing.
@@ -1171,6 +1176,7 @@ def format_array(
         justify=justify,
         decimal=decimal,
         leading_space=leading_space,
+        quoting=quoting,
     )
 
     return fmt_obj.get_result()
@@ -1216,10 +1222,15 @@ def _format_strings(self) -> List[str]:
         else:
             float_format = self.float_format
 
+        quote_strings = self.quoting is not None and self.quoting != QUOTE_NONE
         formatter = (
             self.formatter
             if self.formatter is not None
-            else (lambda x: pprint_thing(x, escape_chars=("\t", "\r", "\n")))
+            else (
+                lambda x: pprint_thing(
+                    x, escape_chars=("\t", "\r", "\n"), quote_strings=quote_strings
+                )
+            )
         )
 
         def _format(x):

diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py
@@ -14,7 +14,10 @@
 
 class TestCategoricalReprWithFactor(TestCategorical):
     def test_print(self):
-        expected = ["[a, b, b, a, a, c, c, c]", "Categories (3, object): [a < b < c]"]
+        expected = [
+            "['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
+            "Categories (3, object): ['a' < 'b' < 'c']",
+        ]
         expected = "\n".join(expected)
         actual = repr(self.factor)
         assert actual == expected
@@ -24,9 +27,9 @@ class TestCategoricalRepr:
     def test_big_print(self):
         factor = Categorical([0, 1, 2, 0, 1, 2] * 100, ["a", "b", "c"], fastpath=True)
         expected = [
-            "[a, b, c, a, b, ..., b, c, a, b, c]",
+            "['a', 'b', 'c', 'a', 'b', ..., 'b', 'c', 'a', 'b', 'c']",
             "Length: 600",
-            "Categories (3, object): [a, b, c]",
+            "Categories (3, object): ['a', 'b', 'c']",
         ]
         expected = "\n".join(expected)
 
@@ -36,13 +39,13 @@ def test_big_print(self):
 
     def test_empty_print(self):
         factor = Categorical([], ["a", "b", "c"])
-        expected = "[], Categories (3, object): [a, b, c]"
+        expected = "[], Categories (3, object): ['a', 'b', 'c']"
         actual = repr(factor)
         assert actual == expected
 
         assert expected == actual
         factor = Categorical([], ["a", "b", "c"], ordered=True)
-        expected = "[], Categories (3, object): [a < b < c]"
+        expected = "[], Categories (3, object): ['a' < 'b' < 'c']"
         actual = repr(factor)
         assert expected == actual
 
@@ -64,17 +67,17 @@ def test_print_none_width(self):
     def test_unicode_print(self):
         c = Categorical(["aaaaa", "bb", "cccc"] * 20)
         expected = """\
-[aaaaa, bb, cccc, aaaaa, bb, ..., bb, cccc, aaaaa, bb, cccc]
+['aaaaa', 'bb', 'cccc', 'aaaaa', 'bb', ..., 'bb', 'cccc', 'aaaaa', 'bb', 'cccc']
 Length: 60
-Categories (3, object): [aaaaa, bb, cccc]"""
+Categories (3, object): ['aaaaa', 'bb', 'cccc']"""
 
         assert repr(c) == expected
 
         c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20)
         expected = """\
-[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう]
+['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう']
 Length: 60
-Categories (3, object): [ああああ, いいいいい, ううううううう]"""  # noqa
+Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']"""  # noqa
 
         assert repr(c) == expected
 
@@ -83,9 +86,9 @@ def test_unicode_print(self):
         with option_context("display.unicode.east_asian_width", True):
 
             c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20)
-            expected = """[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう]
+            expected = """['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう']
 Length: 60
-Categories (3, object): [ああああ, いいいいい, ううううううう]"""  # noqa
+Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']"""  # noqa
 
             assert repr(c) == expected
 
@@ -523,3 +526,9 @@ def test_categorical_index_repr_timedelta_ordered(self):
                  categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=True, dtype='category')"""  # noqa
 
         assert repr(i) == exp
+
+    def test_categorical_str_repr(self):
+        # GH 33676
+        result = repr(Categorical([1, "2", 3, 4]))
+        expected = "[1, '2', 3, 4]\nCategories (4, object): [1, 3, 4, '2']"
+        assert result == expected
diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py
@@ -270,7 +270,7 @@ def test_categorical_repr(self):
             "0     a\n1     b\n"
             + "     ..\n"
             + "48    a\n49    b\n"
-            + "Length: 50, dtype: category\nCategories (2, object): [a, b]"
+            + "Length: 50, dtype: category\nCategories (2, object): ['a', 'b']"
         )
         with option_context("display.max_rows", 5):
             assert exp == repr(a)
@@ -279,7 +279,7 @@ def test_categorical_repr(self):
         a = Series(Categorical(["a", "b"], categories=levs, ordered=True))
         exp = (
             "0    a\n1    b\n" + "dtype: category\n"
-            "Categories (26, object): [a < b < c < d ... w < x < y < z]"
+            "Categories (26, object): ['a' < 'b' < 'c' < 'd' ... 'w' < 'x' < 'y' < 'z']"
         )
         assert exp == a.__str__()
 

diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py
@@ -185,10 +185,10 @@ def test_series_equal_categorical_values_mismatch(check_less_precise):
 
 Series values are different \\(66\\.66667 %\\)
 \\[index\\]: \\[0, 1, 2\\]
-\\[left\\]:  \\[a, b, c\\]
-Categories \\(3, object\\): \\[a, b, c\\]
-\\[right\\]: \\[a, c, b\\]
-Categories \\(3, object\\): \\[a, b, c\\]"""
+\\[left\\]:  \\['a', 'b', 'c'\\]
+Categories \\(3, object\\): \\['a', 'b', 'c'\\]
+\\[right\\]: \\['a', 'c', 'b'\\]
+Categories \\(3, object\\): \\['a', 'b', 'c'\\]"""
 
     s1 = Series(Categorical(["a", "b", "c"]))
     s2 = Series(Categorical(["a", "c", "b"]))