Skip to content

REF: simplify Categorical.__repr__ #55391

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 34 additions & 33 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2147,21 +2147,6 @@ def _formatter(self, boxed: bool = False):
# Defer to CategoricalFormatter's formatter.
return None

def _tidy_repr(self, max_vals: int = 10, footer: bool = True) -> str:
"""
a short repr displaying only max_vals and an optional (but default
footer)
"""
num = max_vals // 2
head = self[:num]._get_repr(length=False, footer=False)
tail = self[-(max_vals - num) :]._get_repr(length=False, footer=False)

result = f"{head[:-1]}, ..., {tail[1:]}"
if footer:
result = f"{result}\n{self._repr_footer()}"

return str(result)

def _repr_categories(self) -> list[str]:
"""
return the base repr for the categories
Expand Down Expand Up @@ -2217,33 +2202,49 @@ def _repr_categories_info(self) -> str:
# replace to simple save space by
return f"{levheader}[{levstring.replace(' < ... < ', ' ... ')}]"

def _repr_footer(self) -> str:
info = self._repr_categories_info()
return f"Length: {len(self)}\n{info}"

def _get_repr(
self, length: bool = True, na_rep: str = "NaN", footer: bool = True
) -> str:
def _get_values_repr(self) -> str:
from pandas.io.formats import format as fmt

formatter = fmt.CategoricalFormatter(
self, length=length, na_rep=na_rep, footer=footer
assert len(self) > 0

vals = self._internal_get_values()
fmt_values = fmt.format_array(
vals,
None,
float_format=None,
na_rep="NaN",
quoting=QUOTE_NONNUMERIC,
)
result = formatter.to_string()
return str(result)

fmt_values = [i.strip() for i in fmt_values]
joined = ", ".join(fmt_values)
result = "[" + joined + "]"
return result

def __repr__(self) -> str:
"""
String representation.
"""
_maxlen = 10
if len(self._codes) > _maxlen:
result = self._tidy_repr(_maxlen)
elif len(self._codes) > 0:
result = self._get_repr(length=len(self) > _maxlen)
footer = self._repr_categories_info()
length = len(self)
max_len = 10
if length > max_len:
# In long cases we do not display all entries, so we add Length
# information to the __repr__.
num = max_len // 2
head = self[:num]._get_values_repr()
tail = self[-(max_len - num) :]._get_values_repr()
body = f"{head[:-1]}, ..., {tail[1:]}"
length_info = f"Length: {len(self)}"
result = f"{body}\n{length_info}\n{footer}"
elif length > 0:
body = self._get_values_repr()
result = f"{body}\n{footer}"
else:
msg = self._get_repr(length=False, footer=True).replace("\n", ", ")
result = f"[], {msg}"
# In the empty case we use a comma instead of newline to get
# a more compact __repr__
body = "[]"
result = f"{body}, {footer}"

return result

Expand Down
69 changes: 1 addition & 68 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,7 @@
Sequence,
)
from contextlib import contextmanager
from csv import (
QUOTE_NONE,
QUOTE_NONNUMERIC,
)
from csv import QUOTE_NONE
from decimal import Decimal
from functools import partial
from io import StringIO
Expand Down Expand Up @@ -198,70 +195,6 @@
"""


class CategoricalFormatter:
def __init__(
self,
categorical: Categorical,
buf: IO[str] | None = None,
length: bool = True,
na_rep: str = "NaN",
footer: bool = True,
) -> None:
self.categorical = categorical
self.buf = buf if buf is not None else StringIO("")
self.na_rep = na_rep
self.length = length
self.footer = footer
self.quoting = QUOTE_NONNUMERIC

def _get_footer(self) -> str:
footer = ""

if self.length:
if footer:
footer += ", "
footer += f"Length: {len(self.categorical)}"

level_info = self.categorical._repr_categories_info()

# Levels are added in a newline
if footer:
footer += "\n"
footer += level_info

return str(footer)

def _get_formatted_values(self) -> list[str]:
return format_array(
self.categorical._internal_get_values(),
None,
float_format=None,
na_rep=self.na_rep,
quoting=self.quoting,
)

def to_string(self) -> str:
categorical = self.categorical

if len(categorical) == 0:
if self.footer:
return self._get_footer()
else:
return ""

fmt_values = self._get_formatted_values()

fmt_values = [i.strip() for i in fmt_values]
values = ", ".join(fmt_values)
result = ["[" + values + "]"]
if self.footer:
footer = self._get_footer()
if footer:
result.append(footer)

return str("\n".join(result))


class SeriesFormatter:
def __init__(
self,
Expand Down