-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
BUG: repr of Categorical does not distinguish int and str. #34222
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 14 commits
aaf4e42
1f90a62
5a93a67
4cef20c
a3c6eda
9277d38
b978bf9
3215936
e6ce96f
197038b
f594fa1
4912ec3
79dd24b
d57ae96
aa62a24
9562313
457abe3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
from csv import QUOTE_NONNUMERIC | ||
import operator | ||
from shutil import get_terminal_size | ||
from typing import Dict, Hashable, List, Type, Union, cast | ||
|
@@ -1874,11 +1875,17 @@ def _repr_categories(self): | |
|
||
if len(self.categories) > max_categories: | ||
num = max_categories // 2 | ||
head = fmt.format_array(self.categories[:num], None) | ||
tail = fmt.format_array(self.categories[-num:], None) | ||
head = fmt.format_array( | ||
self.categories[:num], None, quoting=QUOTE_NONNUMERIC | ||
) | ||
tail = fmt.format_array( | ||
self.categories[-num:], None, quoting=QUOTE_NONNUMERIC | ||
) | ||
category_strs = head + ["..."] + tail | ||
else: | ||
category_strs = fmt.format_array(self.categories, None) | ||
category_strs = fmt.format_array( | ||
self.categories, None, quoting=QUOTE_NONNUMERIC | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe partial here to avoid duplication |
||
|
||
# Strip all leading spaces, which format_array adds for columns... | ||
category_strs = [x.strip() for x in category_strs] | ||
|
@@ -1921,7 +1928,7 @@ def _get_repr(self, length=True, na_rep="NaN", footer=True) -> str: | |
from pandas.io.formats import format as fmt | ||
|
||
formatter = fmt.CategoricalFormatter( | ||
self, length=length, na_rep=na_rep, footer=footer | ||
self, length=length, na_rep=na_rep, footer=footer, quoting=QUOTE_NONNUMERIC | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe don't need to pass this through here, since CategoricalFormatter should always be QUOTE_NONNUMERIC |
||
) | ||
result = formatter.to_string() | ||
return str(result) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ | |
""" | ||
|
||
from contextlib import contextmanager | ||
from csv import QUOTE_NONE | ||
from datetime import tzinfo | ||
import decimal | ||
from functools import partial | ||
|
@@ -170,12 +171,14 @@ def __init__( | |
length: bool = True, | ||
na_rep: str = "NaN", | ||
footer: bool = True, | ||
quoting: Optional[int] = None, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe redundant see above comment. |
||
): | ||
self.categorical = categorical | ||
self.buf = buf if buf is not None else StringIO("") | ||
self.na_rep = na_rep | ||
self.length = length | ||
self.footer = footer | ||
self.quoting = quoting | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same |
||
|
||
def _get_footer(self) -> str: | ||
footer = "" | ||
|
@@ -200,6 +203,7 @@ def _get_formatted_values(self) -> List[str]: | |
None, | ||
float_format=None, | ||
na_rep=self.na_rep, | ||
quoting=self.quoting, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. quoting always QUOTE_NONNUMERIC |
||
) | ||
|
||
def to_string(self) -> str: | ||
|
@@ -1109,6 +1113,7 @@ def format_array( | |
justify: str = "right", | ||
decimal: str = ".", | ||
leading_space: Optional[bool] = None, | ||
quoting: Optional[int] = None, | ||
) -> List[str]: | ||
""" | ||
Format an array for printing. | ||
|
@@ -1171,6 +1176,7 @@ def format_array( | |
justify=justify, | ||
decimal=decimal, | ||
leading_space=leading_space, | ||
quoting=quoting, | ||
) | ||
|
||
return fmt_obj.get_result() | ||
|
@@ -1216,10 +1222,15 @@ def _format_strings(self) -> List[str]: | |
else: | ||
float_format = self.float_format | ||
|
||
quote_strings = self.quoting is not None and self.quoting != QUOTE_NONE | ||
formatter = ( | ||
self.formatter | ||
if self.formatter is not None | ||
else (lambda x: pprint_thing(x, escape_chars=("\t", "\r", "\n"))) | ||
else ( | ||
lambda x: pprint_thing( | ||
x, escape_chars=("\t", "\r", "\n"), quote_strings=quote_strings | ||
) | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as the ternary expression is getting more complex, an if else may now be more readable. maybe use partial instead of lambda and maybe move quote_strings assignment inside the relevant if else block. |
||
) | ||
|
||
def _format(x): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The is probably more appropriate for the Categorical section to be consistent with the similar change for Sparse, see #34352. (NOTE: 1.1 does not yet have a Categorical section)
Also, I think safe to say this was a bug since the repr of the categories in the dtype repr included the quotes #34352 (comment)