Skip to content

[WIP] style NA in reprs #30778

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions LICENSES/COLORAMA_LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

67 changes: 67 additions & 0 deletions pandas/core/_formats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""
Constants for formatting. Usable by both pandas.core and elsewhere.

The names are chosen to match colorama/ansi.py, whose license is included
in LICENSES/COLORAMA_LICENSE
"""
import re
from typing import Any, Callable, List, Optional

from pandas._libs import missing as libmissing

CSI = "\033["
ANSI_PAT = re.compile(r"\x1B[@-_][0-?]*[ -/]*[@-~]")


def strip_ansi(x):
return ANSI_PAT.sub("", x)


def format_with(value: str, formatters: List[str]):
return "".join(formatters + [value, AnsiStyle.RESET_ALL])


class AnsiFore:
BLACK = f"{CSI}30m"
RED = f"{CSI}31m"
GREEN = f"{CSI}32m"
YELLOW = f"{CSI}33m"
BLUE = f"{CSI}34m"
MAGENTA = f"{CSI}35m"
CYAN = f"{CSI}36m"
WHITE = f"{CSI}37m"
RESET = f"{CSI}39m"


class AnsiBack:
BLACK = f"{CSI}40m"
RED = f"{CSI}41m"
GREEN = f"{CSI}42m"
YELLOW = f"{CSI}43m"
BLUE = f"{CSI}44m"
MAGENTA = f"{CSI}45m"
CYAN = f"{CSI}46m"
WHITE = f"{CSI}47m"
RESET = f"{CSI}49m"


class AnsiStyle:
BRIGHT = f"{CSI}1m"
DIM = f"{CSI}2m"
NORMAL = f"{CSI}22m"
RESET_ALL = f"{CSI}0m"


class NAFormatterMixin:
def _formatter(
self, boxed: bool = False, terminal=False
) -> Callable[[Any], Optional[str]]:
def formatter(x):
if x is libmissing.NA and terminal:
return format_with("NA", [AnsiFore.RED])
elif boxed:
return str(x)
else:
return repr(x)

return formatter
41 changes: 37 additions & 4 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
This is an experimental API and subject to breaking changes
without warning.
"""
import inspect
import operator
from typing import Any, Callable, Dict, Optional, Sequence, Tuple, Union
import warnings

import numpy as np

Expand Down Expand Up @@ -910,20 +912,35 @@ def view(self, dtype=None) -> Union[ABCExtensionArray, np.ndarray]:
# ------------------------------------------------------------------------
# Printing
# ------------------------------------------------------------------------

def __repr__(self) -> str:
def _repr_base(self, is_repr=True):
from pandas.io.formats.printing import format_object_summary

# the short repr has no trailing newline, while the truncated
# repr does. So we include a newline in our template, and strip
# any trailing newlines from format_object_summary

if is_repr:
terminal = True
else:
terminal = False
# compatibility for older EAs
kwargs = _check_formatter_signature(self._formatter, terminal=terminal)

data = format_object_summary(
self, self._formatter(), indent_for_name=False
self, self._formatter(**kwargs), indent_for_name=False
).rstrip(", \n")
class_name = f"<{type(self).__name__}>\n"
return f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}"

def _formatter(self, boxed: bool = False) -> Callable[[Any], Optional[str]]:
def __repr__(self) -> str:
return self._repr_base(is_repr=True)

def __str__(self):
return self._repr_base(is_repr=False)

def _formatter(
self, boxed: bool = False, terminal: bool = False
) -> Callable[[Any], Optional[str]]:
"""Formatting function for scalar values.

This is used in the default '__repr__'. The returned formatting
Expand All @@ -937,6 +954,12 @@ def _formatter(self, boxed: bool = False) -> Callable[[Any], Optional[str]]:
itself (False). This may be useful if you want scalar values
to appear differently within a Series versus on its own (e.g.
quoted or not).
terminal : bool, default False
Indicator whether the result is being printed to a terminal
screen. This may be used to detect whether ANSI codes should
be used to style terminal output.

.. versionadded:: 1.0.0

Returns
-------
Expand Down Expand Up @@ -1203,3 +1226,13 @@ def _create_arithmetic_method(cls, op):
@classmethod
def _create_comparison_method(cls, op):
return cls._create_method(op, coerce_to_dtype=False)


def _check_formatter_signature(formatter, **kwargs):
if len(inspect.signature(formatter).parameters) == 1:
warnings.warn(
"'_formatter' signature is incorrect. Ensure it matches "
"the base class' signature."
)
kwargs.pop("terminal", None)
return kwargs
6 changes: 2 additions & 4 deletions pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from pandas.core.dtypes.missing import isna, notna

from pandas.core import nanops, ops
from pandas.core._formats import NAFormatterMixin
from pandas.core.algorithms import take
from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
import pandas.core.common as com
Expand Down Expand Up @@ -197,7 +198,7 @@ def coerce_to_array(values, mask=None, copy: bool = False):
return values, mask


class BooleanArray(ExtensionArray, ExtensionOpsMixin):
class BooleanArray(NAFormatterMixin, ExtensionArray, ExtensionOpsMixin):
"""
Array of boolean (True/False) data with missing values.

Expand Down Expand Up @@ -295,9 +296,6 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, Any]:
def _from_factorized(cls, values, original: "BooleanArray"):
return cls._from_sequence(values, dtype=original.dtype)

def _formatter(self, boxed=False):
return str

@property
def _hasna(self) -> bool:
# Note: this is expensive right now! The hope is that we can
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ def _constructor(self) -> Type["Categorical"]:
def _from_sequence(cls, scalars, dtype=None, copy=False):
return Categorical(scalars, dtype=dtype)

def _formatter(self, boxed=False):
def _formatter(self, boxed=False, terminal=False):
# Defer to CategoricalFormatter's formatter.
return None

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ def _format_native_types(self, na_rep="NaT", date_format=None):
"""
raise AbstractMethodError(self)

def _formatter(self, boxed=False):
def _formatter(self, boxed=False, terminal=False):
# TODO: Remove Datetime & DatetimeTZ formatters.
return "'{}'".format

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from pandas.core.dtypes.missing import isna, notna

from pandas.core import nanops, ops
from pandas.core._formats import NAFormatterMixin
from pandas.core.algorithms import take
from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
import pandas.core.common as com
Expand Down Expand Up @@ -259,7 +260,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
return values, mask


class IntegerArray(ExtensionArray, ExtensionOpsMixin):
class IntegerArray(NAFormatterMixin, ExtensionArray, ExtensionOpsMixin):
"""
Array of integer (optional missing) values.

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,7 @@ def asfreq(self, freq=None, how="E"):
# ------------------------------------------------------------------
# Rendering Methods

def _formatter(self, boxed=False):
def _formatter(self, boxed=False, terminal=False):
if boxed:
return str
return "'{}'".format
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1471,7 +1471,7 @@ def __repr__(self) -> str:
pp_index = printing.pprint_thing(self.sp_index)
return f"{pp_str}\nFill: {pp_fill}\n{pp_index}"

def _formatter(self, boxed=False):
def _formatter(self, boxed=False, terminal=False):
# Defer to the formatter from the GenericArrayFormatter calling us.
# This will infer the correct formatter from the dtype of the values.
return None
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from pandas import compat
from pandas.core import ops
from pandas.core._formats import NAFormatterMixin
from pandas.core.arrays import PandasArray
from pandas.core.construction import extract_array
from pandas.core.missing import isna
Expand Down Expand Up @@ -82,7 +83,7 @@ def __from_arrow__(self, array):
return StringArray._concat_same_type(results)


class StringArray(PandasArray):
class StringArray(NAFormatterMixin, PandasArray):
"""
Extension array for string data.

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ def median(
# ----------------------------------------------------------------
# Rendering Methods

def _formatter(self, boxed=False):
def _formatter(self, boxed=False, terminal=False):
from pandas.io.formats.format import _get_format_timedelta64

return _get_format_timedelta64(self, box=True)
Expand Down
11 changes: 10 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,7 @@ def _info_repr(self) -> bool:
self._repr_fits_horizontal_() and self._repr_fits_vertical_()
)

def __repr__(self) -> str:
def _repr_base(self, is_repr: bool = True) -> str:
"""
Return a string representation for a particular DataFrame.
"""
Expand All @@ -681,10 +681,17 @@ def __repr__(self) -> str:
line_width=width,
max_colwidth=max_colwidth,
show_dimensions=show_dimensions,
terminal=is_repr,
)

return buf.getvalue()

def __repr__(self) -> str:
return self._repr_base(is_repr=True)

def __str__(self) -> str:
return self._repr_base(is_repr=False)

def _repr_html_(self) -> Optional[str]:
"""
Return a html representation for a particular DataFrame.
Expand Down Expand Up @@ -761,6 +768,7 @@ def to_string(
line_width: Optional[int] = None,
max_colwidth: Optional[int] = None,
encoding: Optional[str] = None,
terminal: bool = False,
) -> Optional[str]:
"""
Render a DataFrame to a console-friendly tabular output.
Expand Down Expand Up @@ -812,6 +820,7 @@ def to_string(
show_dimensions=show_dimensions,
decimal=decimal,
line_width=line_width,
terminal=terminal,
)
return formatter.to_string(buf=buf, encoding=encoding)

Expand Down
12 changes: 10 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1292,8 +1292,7 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False):

# ----------------------------------------------------------------------
# Rendering Methods

def __repr__(self) -> str:
def _repr_base(self, is_repr: bool = True):
"""
Return a string representation for a particular Series.
"""
Expand All @@ -1318,11 +1317,18 @@ def __repr__(self) -> str:
min_rows=min_rows,
max_rows=max_rows,
length=show_dimensions,
terminal=is_repr,
)
result = buf.getvalue()

return result

def __repr__(self) -> str:
return self._repr_base(is_repr=True)

def __str__(self) -> str:
return self._repr_base(is_repr=False)

def to_string(
self,
buf=None,
Expand All @@ -1335,6 +1341,7 @@ def to_string(
name=False,
max_rows=None,
min_rows=None,
terminal: bool = False,
):
"""
Render a string representation of the Series.
Expand Down Expand Up @@ -1382,6 +1389,7 @@ def to_string(
float_format=float_format,
min_rows=min_rows,
max_rows=max_rows,
terminal=terminal,
)
result = formatter.to_string()

Expand Down
Loading