From c6d593f97e98b648a5d4416da7ef4207fd711860 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 30 Dec 2019 13:38:27 -0600 Subject: [PATCH] wip --- LICENSES/COLORAMA_LICENSE | 1 + pandas/core/_formats.py | 67 ++++++++++++++++++++++ pandas/core/arrays/base.py | 41 +++++++++++-- pandas/core/arrays/boolean.py | 6 +- pandas/core/arrays/categorical.py | 2 +- pandas/core/arrays/datetimelike.py | 2 +- pandas/core/arrays/integer.py | 3 +- pandas/core/arrays/period.py | 2 +- pandas/core/arrays/sparse/array.py | 2 +- pandas/core/arrays/string_.py | 3 +- pandas/core/arrays/timedeltas.py | 2 +- pandas/core/frame.py | 11 +++- pandas/core/series.py | 12 +++- pandas/io/formats/format.py | 33 ++++++++--- pandas/io/formats/printing.py | 44 ++++++++++++-- pandas/tests/arrays/string_/test_string.py | 12 ++++ pandas/tests/arrays/test_boolean.py | 63 ++++++++++++++++++++ pandas/tests/arrays/test_integer.py | 42 +++++++++++--- pandas/tests/extension/decimal/array.py | 2 +- pandas/tests/extension/test_compat.py | 22 +++++++ pandas/tests/io/formats/test_printing.py | 16 ++++++ 21 files changed, 349 insertions(+), 39 deletions(-) create mode 100644 LICENSES/COLORAMA_LICENSE create mode 100644 pandas/core/_formats.py create mode 100644 pandas/tests/extension/test_compat.py diff --git a/LICENSES/COLORAMA_LICENSE b/LICENSES/COLORAMA_LICENSE new file mode 100644 index 0000000000000..8b137891791fe --- /dev/null +++ b/LICENSES/COLORAMA_LICENSE @@ -0,0 +1 @@ + diff --git a/pandas/core/_formats.py b/pandas/core/_formats.py new file mode 100644 index 0000000000000..766185b673107 --- /dev/null +++ b/pandas/core/_formats.py @@ -0,0 +1,67 @@ +""" +Constants for formatting. Usable by both pandas.core and elsewhere. + +The names are chosen to match colorama/ansi.py, whose license is included +in LICENSES/COLORAMA_LICENSE +""" +import re +from typing import Any, Callable, List, Optional + +from pandas._libs import missing as libmissing + +CSI = "\033[" +ANSI_PAT = re.compile(r"\x1B[@-_][0-?]*[ -/]*[@-~]") + + +def strip_ansi(x): + return ANSI_PAT.sub("", x) + + +def format_with(value: str, formatters: List[str]): + return "".join(formatters + [value, AnsiStyle.RESET_ALL]) + + +class AnsiFore: + BLACK = f"{CSI}30m" + RED = f"{CSI}31m" + GREEN = f"{CSI}32m" + YELLOW = f"{CSI}33m" + BLUE = f"{CSI}34m" + MAGENTA = f"{CSI}35m" + CYAN = f"{CSI}36m" + WHITE = f"{CSI}37m" + RESET = f"{CSI}39m" + + +class AnsiBack: + BLACK = f"{CSI}40m" + RED = f"{CSI}41m" + GREEN = f"{CSI}42m" + YELLOW = f"{CSI}43m" + BLUE = f"{CSI}44m" + MAGENTA = f"{CSI}45m" + CYAN = f"{CSI}46m" + WHITE = f"{CSI}47m" + RESET = f"{CSI}49m" + + +class AnsiStyle: + BRIGHT = f"{CSI}1m" + DIM = f"{CSI}2m" + NORMAL = f"{CSI}22m" + RESET_ALL = f"{CSI}0m" + + +class NAFormatterMixin: + def _formatter( + self, boxed: bool = False, terminal=False + ) -> Callable[[Any], Optional[str]]: + def formatter(x): + if x is libmissing.NA and terminal: + return format_with("NA", [AnsiFore.RED]) + elif boxed: + return str(x) + else: + return repr(x) + + return formatter diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 96a4eb1b3bf32..9fe4512209892 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -5,8 +5,10 @@ This is an experimental API and subject to breaking changes without warning. """ +import inspect import operator from typing import Any, Callable, Dict, Optional, Sequence, Tuple, Union +import warnings import numpy as np @@ -910,20 +912,35 @@ def view(self, dtype=None) -> Union[ABCExtensionArray, np.ndarray]: # ------------------------------------------------------------------------ # Printing # ------------------------------------------------------------------------ - - def __repr__(self) -> str: + def _repr_base(self, is_repr=True): from pandas.io.formats.printing import format_object_summary # the short repr has no trailing newline, while the truncated # repr does. So we include a newline in our template, and strip # any trailing newlines from format_object_summary + + if is_repr: + terminal = True + else: + terminal = False + # compatibility for older EAs + kwargs = _check_formatter_signature(self._formatter, terminal=terminal) + data = format_object_summary( - self, self._formatter(), indent_for_name=False + self, self._formatter(**kwargs), indent_for_name=False ).rstrip(", \n") class_name = f"<{type(self).__name__}>\n" return f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}" - def _formatter(self, boxed: bool = False) -> Callable[[Any], Optional[str]]: + def __repr__(self) -> str: + return self._repr_base(is_repr=True) + + def __str__(self): + return self._repr_base(is_repr=False) + + def _formatter( + self, boxed: bool = False, terminal: bool = False + ) -> Callable[[Any], Optional[str]]: """Formatting function for scalar values. This is used in the default '__repr__'. The returned formatting @@ -937,6 +954,12 @@ def _formatter(self, boxed: bool = False) -> Callable[[Any], Optional[str]]: itself (False). This may be useful if you want scalar values to appear differently within a Series versus on its own (e.g. quoted or not). + terminal : bool, default False + Indicator whether the result is being printed to a terminal + screen. This may be used to detect whether ANSI codes should + be used to style terminal output. + + .. versionadded:: 1.0.0 Returns ------- @@ -1203,3 +1226,13 @@ def _create_arithmetic_method(cls, op): @classmethod def _create_comparison_method(cls, op): return cls._create_method(op, coerce_to_dtype=False) + + +def _check_formatter_signature(formatter, **kwargs): + if len(inspect.signature(formatter).parameters) == 1: + warnings.warn( + "'_formatter' signature is incorrect. Ensure it matches " + "the base class' signature." + ) + kwargs.pop("terminal", None) + return kwargs diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 409be244c4327..3c965592da1c3 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -27,6 +27,7 @@ from pandas.core.dtypes.missing import isna, notna from pandas.core import nanops, ops +from pandas.core._formats import NAFormatterMixin from pandas.core.algorithms import take from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin import pandas.core.common as com @@ -197,7 +198,7 @@ def coerce_to_array(values, mask=None, copy: bool = False): return values, mask -class BooleanArray(ExtensionArray, ExtensionOpsMixin): +class BooleanArray(NAFormatterMixin, ExtensionArray, ExtensionOpsMixin): """ Array of boolean (True/False) data with missing values. @@ -295,9 +296,6 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: def _from_factorized(cls, values, original: "BooleanArray"): return cls._from_sequence(values, dtype=original.dtype) - def _formatter(self, boxed=False): - return str - @property def _hasna(self) -> bool: # Note: this is expensive right now! The hope is that we can diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 136c7fa32a6e7..0c7ec8b6271ba 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -459,7 +459,7 @@ def _constructor(self) -> Type["Categorical"]: def _from_sequence(cls, scalars, dtype=None, copy=False): return Categorical(scalars, dtype=dtype) - def _formatter(self, boxed=False): + def _formatter(self, boxed=False, terminal=False): # Defer to CategoricalFormatter's formatter. return None diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 2bdd9acaeb70f..af3c7b6026003 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -389,7 +389,7 @@ def _format_native_types(self, na_rep="NaT", date_format=None): """ raise AbstractMethodError(self) - def _formatter(self, boxed=False): + def _formatter(self, boxed=False, terminal=False): # TODO: Remove Datetime & DatetimeTZ formatters. return "'{}'".format diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 0922f4ac6f71d..3378735f2f1e0 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -24,6 +24,7 @@ from pandas.core.dtypes.missing import isna, notna from pandas.core import nanops, ops +from pandas.core._formats import NAFormatterMixin from pandas.core.algorithms import take from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin import pandas.core.common as com @@ -259,7 +260,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False): return values, mask -class IntegerArray(ExtensionArray, ExtensionOpsMixin): +class IntegerArray(NAFormatterMixin, ExtensionArray, ExtensionOpsMixin): """ Array of integer (optional missing) values. diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 6b9c7f4e1eb38..6f3f2669c9598 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -615,7 +615,7 @@ def asfreq(self, freq=None, how="E"): # ------------------------------------------------------------------ # Rendering Methods - def _formatter(self, boxed=False): + def _formatter(self, boxed=False, terminal=False): if boxed: return str return "'{}'".format diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 9838cdfabbb95..ff29cd5873376 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1471,7 +1471,7 @@ def __repr__(self) -> str: pp_index = printing.pprint_thing(self.sp_index) return f"{pp_str}\nFill: {pp_fill}\n{pp_index}" - def _formatter(self, boxed=False): + def _formatter(self, boxed=False, terminal=False): # Defer to the formatter from the GenericArrayFormatter calling us. # This will infer the correct formatter from the dtype of the values. return None diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 0da877fb1ad45..d494721764811 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -13,6 +13,7 @@ from pandas import compat from pandas.core import ops +from pandas.core._formats import NAFormatterMixin from pandas.core.arrays import PandasArray from pandas.core.construction import extract_array from pandas.core.missing import isna @@ -82,7 +83,7 @@ def __from_arrow__(self, array): return StringArray._concat_same_type(results) -class StringArray(PandasArray): +class StringArray(NAFormatterMixin, PandasArray): """ Extension array for string data. diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 1874517f0f2e4..422c4251fa967 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -457,7 +457,7 @@ def median( # ---------------------------------------------------------------- # Rendering Methods - def _formatter(self, boxed=False): + def _formatter(self, boxed=False, terminal=False): from pandas.io.formats.format import _get_format_timedelta64 return _get_format_timedelta64(self, box=True) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ba0c0e7d66b1d..e6e87e507bb93 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -655,7 +655,7 @@ def _info_repr(self) -> bool: self._repr_fits_horizontal_() and self._repr_fits_vertical_() ) - def __repr__(self) -> str: + def _repr_base(self, is_repr: bool = True) -> str: """ Return a string representation for a particular DataFrame. """ @@ -681,10 +681,17 @@ def __repr__(self) -> str: line_width=width, max_colwidth=max_colwidth, show_dimensions=show_dimensions, + terminal=is_repr, ) return buf.getvalue() + def __repr__(self) -> str: + return self._repr_base(is_repr=True) + + def __str__(self) -> str: + return self._repr_base(is_repr=False) + def _repr_html_(self) -> Optional[str]: """ Return a html representation for a particular DataFrame. @@ -761,6 +768,7 @@ def to_string( line_width: Optional[int] = None, max_colwidth: Optional[int] = None, encoding: Optional[str] = None, + terminal: bool = False, ) -> Optional[str]: """ Render a DataFrame to a console-friendly tabular output. @@ -812,6 +820,7 @@ def to_string( show_dimensions=show_dimensions, decimal=decimal, line_width=line_width, + terminal=terminal, ) return formatter.to_string(buf=buf, encoding=encoding) diff --git a/pandas/core/series.py b/pandas/core/series.py index b81659920cfe8..8bc07857ab4f7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1292,8 +1292,7 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False): # ---------------------------------------------------------------------- # Rendering Methods - - def __repr__(self) -> str: + def _repr_base(self, is_repr: bool = True): """ Return a string representation for a particular Series. """ @@ -1318,11 +1317,18 @@ def __repr__(self) -> str: min_rows=min_rows, max_rows=max_rows, length=show_dimensions, + terminal=is_repr, ) result = buf.getvalue() return result + def __repr__(self) -> str: + return self._repr_base(is_repr=True) + + def __str__(self) -> str: + return self._repr_base(is_repr=False) + def to_string( self, buf=None, @@ -1335,6 +1341,7 @@ def to_string( name=False, max_rows=None, min_rows=None, + terminal: bool = False, ): """ Render a string representation of the Series. @@ -1382,6 +1389,7 @@ def to_string( float_format=float_format, min_rows=min_rows, max_rows=max_rows, + terminal=terminal, ) result = formatter.to_string() diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 3020ac421fc2f..9886d4c8ebd35 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -64,6 +64,8 @@ ) from pandas.core.dtypes.missing import isna, notna +from pandas.core._formats import strip_ansi +from pandas.core.arrays.base import _check_formatter_signature from pandas.core.arrays.datetimes import DatetimeArray from pandas.core.arrays.timedeltas import TimedeltaArray from pandas.core.base import PandasObject @@ -240,6 +242,7 @@ def __init__( dtype: bool = True, max_rows: Optional[int] = None, min_rows: Optional[int] = None, + terminal: bool = False, ): self.series = series self.buf = buf if buf is not None else StringIO() @@ -256,6 +259,7 @@ def __init__( self.float_format = float_format self.dtype = dtype self.adj = _get_adjustment() + self.terminal = terminal self._chk_truncate() @@ -346,6 +350,7 @@ def _get_formatted_values(self) -> List[str]: None, float_format=self.float_format, na_rep=self.na_rep, + terminal=self.terminal, ) def to_string(self) -> str: @@ -394,7 +399,7 @@ def __init__(self): self.encoding = get_option("display.encoding") def len(self, text: str) -> int: - return len(text) + return len(strip_ansi(text)) def justify(self, texts: Any, max_len: int, mode: str = "right") -> List[str]: return justify(texts, max_len, mode=mode) @@ -562,6 +567,7 @@ def __init__( render_links: bool = False, bold_rows: bool = False, escape: bool = True, + terminal: bool = False, ): self.frame = frame self.show_index_names = index_names @@ -596,6 +602,7 @@ def __init__( self.show_dimensions = show_dimensions self.table_id = table_id self.render_links = render_links + self.terminal = terminal if justify is None: self.justify = get_option("display.colheader_justify") @@ -954,6 +961,7 @@ def _format_col(self, i: int) -> List[str]: na_rep=self.na_rep, space=self.col_space, decimal=self.decimal, + terminal=self.terminal, ) def to_html( @@ -1106,6 +1114,7 @@ def format_array( justify: str = "right", decimal: str = ".", leading_space: Optional[bool] = None, + terminal: bool = False, ) -> List[str]: """ Format an array for printing. @@ -1169,6 +1178,7 @@ def format_array( justify=justify, decimal=decimal, leading_space=leading_space, + terminal=terminal, ) return fmt_obj.get_result() @@ -1188,6 +1198,7 @@ def __init__( quoting: Optional[int] = None, fixed_width: bool = True, leading_space: Optional[bool] = None, + terminal: bool = False, ): self.values = values self.digits = digits @@ -1200,6 +1211,7 @@ def __init__( self.quoting = quoting self.fixed_width = fixed_width self.leading_space = leading_space + self.terminal = terminal def get_result(self) -> List[str]: fmt_values = self._format_strings() @@ -1230,7 +1242,7 @@ def _format(x): if x is None: return "None" elif x is NA: - return "NA" + return formatter(x) elif x is NaT or np.isnat(x): return "NaT" except (TypeError, ValueError): @@ -1482,7 +1494,10 @@ def _format_strings(self) -> List[str]: if isinstance(values, (ABCIndexClass, ABCSeries)): values = values._values - formatter = values._formatter(boxed=True) + kwargs = _check_formatter_signature( + values._formatter, boxed=True, terminal=self.terminal + ) + formatter = values._formatter(**kwargs) if is_categorical_dtype(values.dtype): # Categorical is special for now, so that we can preserve tzinfo @@ -1499,6 +1514,7 @@ def _format_strings(self) -> List[str]: space=self.space, justify=self.justify, leading_space=self.leading_space, + terminal=self.terminal, ) return fmt_values @@ -1736,14 +1752,15 @@ def _make_fixed_width( minimum: Optional[int] = None, adj: Optional[TextAdjustment] = None, ) -> List[str]: - + # https://stackoverflow.com/questions/14693701/how-can-i-remove-the-ansi-escape-sequences-from-a-string-in-python if len(strings) == 0 or justify == "all": return strings + escaped = [strip_ansi(x) for x in strings] if adj is None: adj = _get_adjustment() - max_len = max(adj.len(x) for x in strings) + max_len = max(adj.len(x) for x in escaped) if minimum is not None: max_len = max(minimum, max_len) @@ -1752,13 +1769,13 @@ def _make_fixed_width( if conf_max is not None and max_len > conf_max: max_len = conf_max - def just(x): + def just(x, y): if conf_max is not None: - if (conf_max > 3) & (adj.len(x) > max_len): + if (conf_max > 3) & (adj.len(y) > max_len): x = x[: max_len - 3] + "..." return x - strings = [just(x) for x in strings] + strings = [just(x, y) for x, y in zip(strings, escaped)] result = adj.justify(strings, max_len, mode=justify) return result diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 4b5b5e9a0ce15..7788990994568 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -1,7 +1,7 @@ """ Printing tools. """ - +import math import sys from typing import ( Any, @@ -19,6 +19,8 @@ from pandas.core.dtypes.inference import is_sequence +from pandas.core._formats import strip_ansi + EscapeChars = Union[Mapping[str, str], Iterable[str]] @@ -57,16 +59,50 @@ def adjoin(space: int, *lists: List[str], **kwargs) -> str: return "\n".join(out_lines) +def ljust(x: str, max_len: int): + ansi_len = len(strip_ansi(x)) + padding = max_len - ansi_len + if padding > 0: + return f'{x}{" " * padding}' + return x + + +def rjust(x: str, max_len: int): + ansi_len = len(strip_ansi(x)) + padding = max_len - ansi_len + if padding > 0: + return f"{padding * ' '}" + x + return x + + +def center(x: str, max_len: int): + ansi_len = len(strip_ansi(x)) + padding = max_len - ansi_len + if padding > 0: + if padding % 2 and ansi_len % 2: + # uneven, extra on the right + rpadding = math.ceil(padding / 2) + lpadding = rpadding - 1 + elif padding % 2: + # uneven, extra on the left + lpadding = math.ceil(padding / 2) + rpadding = lpadding - 1 + else: + lpadding = rpadding = padding // 2 + return f"{lpadding * ' '}{x}{rpadding * ' '}" + return x + + def justify(texts: Iterable[str], max_len: int, mode: str = "right") -> List[str]: """ Perform ljust, center, rjust against string or list-like """ if mode == "left": - return [x.ljust(max_len) for x in texts] + return [ljust(x, max_len) for x in texts] elif mode == "center": - return [x.center(max_len) for x in texts] + return [center(x, max_len) for x in texts] else: - return [x.rjust(max_len) for x in texts] + return [rjust(x, max_len) for x in texts] # Unicode consolidation diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index ec7e35e5c6db4..bce9b2f718f26 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -237,3 +237,15 @@ def test_arrow_roundtrip(): tm.assert_frame_equal(result, df) # ensure the missing value is represented by NA and not np.nan or None assert result.loc[2, "a"] is pd.NA + + +def test_na_str(): + a = pd.array(["a", "NA", None], dtype="string") + expected = "\n['a', 'NA', NA]\nLength: 3, dtype: string" + assert str(a) == expected + + +def test_na_repr(): + a = pd.array(["a", "NA", None], dtype="string") + expected = "\n['a', 'NA', \x1b[31mNA\x1b[0m]\nLength: 3, dtype: string" + assert repr(a) == expected diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index 278b4d41262b7..51d41d5e1192b 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -1,4 +1,5 @@ import operator +import textwrap import numpy as np import pytest @@ -783,3 +784,65 @@ def test_arrow_roundtrip(): result = table.to_pandas() assert isinstance(result["a"].dtype, pd.BooleanDtype) tm.assert_frame_equal(result, df) + + +def test_na_str(): + a = pd.array([True, False, None], dtype="boolean") + expected = "\n[True, False, NA]\nLength: 3, dtype: boolean" + assert str(a) == expected + + # mult-line + a = pd.array([True, False, None] * 5, dtype="boolean") + expected = textwrap.dedent( + """\ + + [ True, False, NA, True, False, NA, True, False, NA, True, False, + NA, True, False, NA] + Length: 15, dtype: boolean""" + ) + assert str(a) == expected + + # multi-line, truncated + a = pd.array([True, False, None] * 100, dtype="boolean") + expected = textwrap.dedent( + """\ + + [ True, False, NA, True, False, NA, True, False, NA, True, + ... + NA, True, False, NA, True, False, NA, True, False, NA] + Length: 300, dtype: boolean""" + ) + assert str(a) == expected + + +def test_na_repr(): + a = pd.array([True, False, None], dtype="boolean") + expected = ( + "\n[True, False, \x1b[31mNA\x1b[0m]\nLength: 3, dtype: boolean" + ) + assert repr(a) == expected + + a = pd.array([True, False, None] * 5, dtype="boolean") + expected = textwrap.dedent( + """\ + + [ True, False, \x1b[31mNA\x1b[0m, True, False, \x1b[31mNA\x1b[0m, \ +True, False, \x1b[31mNA\x1b[0m, True, False, \x1b[31mNA\x1b[0m, True, + False, \x1b[31mNA\x1b[0m] + Length: 15, dtype: boolean""" + ) + assert repr(a) == expected + + # multi-line, truncated + a = pd.array([True, False, None] * 100, dtype="boolean") + expected = textwrap.dedent( + """\ + + [ True, False, \x1b[31mNA\x1b[0m, True, False, \x1b[31mNA\x1b[0m, \ +True, False, \x1b[31mNA\x1b[0m, True, + ... + \x1b[31mNA\x1b[0m, True, False, \x1b[31mNA\x1b[0m, True, False, \ +\x1b[31mNA\x1b[0m, True, False, \x1b[31mNA\x1b[0m] + Length: 300, dtype: boolean""" + ) + assert repr(a) == expected diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index f172280202e64..fbb2185d6aeda 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -88,19 +88,46 @@ def test_repr_dtype(dtype, expected): assert repr(dtype) == expected -def test_repr_array(): - result = repr(integer_array([1, None, 3])) - expected = "\n[1, NA, 3]\nLength: 3, dtype: Int64" - assert result == expected +def test_na_str(): + a = pd.array([1, 2, None], dtype="Int64") + expected = "\n[1, 2, NA]\nLength: 3, dtype: Int64" + assert str(a) == expected + + +def test_na_repr(): + a = pd.array([1, 2, None], dtype="Int64") + expected = "\n[1, 2, \x1b[31mNA\x1b[0m]\nLength: 3, dtype: Int64" + assert repr(a) == expected + + +def test_series_str(): + s = pd.Series([1, 2, None], dtype="Int64") + expected = "0 1\n1 2\n2 NA\ndtype: Int64" + assert str(s) == expected + + s = pd.Series([1, 2, None, 11111111111111], dtype="Int64") + expected = "0 1\n1 2\n2 NA\n3 11111111111111\ndtype: Int64" + assert str(s) == expected + + +def test_series_repr(): + s = pd.Series([1, 2, None], dtype="Int64") + expected = "0 1 \n1 2 \n2 \x1b[31mNA\x1b[0m \ndtype: Int64" + assert repr(s) == expected + + s = pd.Series([1, 2, None, 11111111111111], dtype="Int64") + expected = "0 1 \n1 2 \n2 \x1b[31mNA\x1b[0m \n3 11111111111111 \ndtype: Int64" + assert repr(s) == expected def test_repr_array_long(): data = integer_array([1, 2, None] * 1000) + NA = "\x1b[31mNA\x1b[0m" expected = ( "\n" - "[ 1, 2, NA, 1, 2, NA, 1, 2, NA, 1,\n" + f"[ 1, 2, {NA}, 1, 2, {NA}, 1, 2, {NA}, 1,\n" " ...\n" - " NA, 1, 2, NA, 1, 2, NA, 1, 2, NA]\n" + f" {NA}, 1, 2, {NA}, 1, 2, {NA}, 1, 2, {NA}]\n" "Length: 3000, dtype: Int64" ) result = repr(data) @@ -641,10 +668,9 @@ def test_coerce_to_ndarray_float_NA_rasies(self): def test_frame_repr(data_missing): - df = pd.DataFrame({"A": data_missing}) result = repr(df) - expected = " A\n0 NA\n1 1" + expected = " A \n0 \x1b[31mNA\x1b[0m \n1 1 " assert result == expected diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 570cdf5f29d00..4f564a2212b5e 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -164,7 +164,7 @@ def isna(self): def _na_value(self): return decimal.Decimal("NaN") - def _formatter(self, boxed=False): + def _formatter(self, boxed=False, terminal=False): if boxed: return "Decimal: {0}".format return repr diff --git a/pandas/tests/extension/test_compat.py b/pandas/tests/extension/test_compat.py new file mode 100644 index 0000000000000..ed00e355d8135 --- /dev/null +++ b/pandas/tests/extension/test_compat.py @@ -0,0 +1,22 @@ +""" +Tests for downstream compatibility. +""" +import pandas as pd +import pandas._testing as tm + + +class SingleFormatter(pd.arrays.StringArray): + """A StringArray with the old single-argument of _formatter""" + + def _formatter(self, boxed: bool = False): + breakpoint() + return super()._formatter(boxed) + + +def test_single_argument_formatter(): + s = SingleFormatter._from_sequence(["a", "b", "c"]) + with tm.assert_produces_warning(DeprecationWarning): + str(s) + repr(s) + str(pd.Series(s)) + repr(pd.Series(s)) diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index f0d5ef19c4468..f6bf4cbb2f6b0 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -203,3 +203,19 @@ def test_enable_data_resource_formatter(self): assert formatters[mimetype].enabled # smoke test that it works self.display_formatter.format(cf) + + +@pytest.mark.parametrize("max_len", [0, 1, 4, 5, 6, 10, 100]) +@pytest.mark.parametrize("method", ["ljust", "rjust", "center"]) +@pytest.mark.parametrize("value", ["N", "NA"]) +def test_justify(value, method, max_len): + ansi_value = f"\x1b[31m{value}\x1b[0m" + result = getattr(printing, method)(ansi_value, max_len) + expected = getattr(value, method)(max_len) + n_non_printed = len(ansi_value) - len(value) + + assert (len(result) - n_non_printed) == len(expected) + + result = getattr(printing, method)(value, max_len) + expected = getattr(value, method)(max_len) + assert result == expected