Skip to content

BUG/ENH: Styler.format() always inherits from .set_na_rep() #40060

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 15 commits into from
179 changes: 103 additions & 76 deletions pandas/io/formats/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from contextlib import contextmanager
import copy
from functools import partial
from itertools import product
from typing import (
Any,
Callable,
Expand Down Expand Up @@ -36,14 +35,10 @@
from pandas.compat._optional import import_optional_dependency
from pandas.util._decorators import doc

from pandas.core.dtypes.common import is_float
from pandas.core.dtypes.generic import ABCSeries

import pandas as pd
from pandas.api.types import (
is_dict_like,
is_list_like,
)
from pandas.api.types import is_list_like
from pandas.core import generic
import pandas.core.common as com
from pandas.core.frame import DataFrame
Expand Down Expand Up @@ -222,13 +217,7 @@ def _init_tooltips(self):
self.tooltips = _Tooltips()

def _default_display_func(self, x):
if self.na_rep is not None and pd.isna(x):
return self.na_rep
elif is_float(x):
display_format = f"{x:.{self.precision}f}"
return display_format
else:
return x
return self._maybe_wrap_formatter(formatter=None)(x)

def set_tooltips(self, ttips: DataFrame) -> Styler:
"""
Expand Down Expand Up @@ -575,75 +564,99 @@ def _translate(self):

return d

def format(self, formatter, subset=None, na_rep: Optional[str] = None) -> Styler:
def format(
self,
formatter: Optional[
Union[Dict[Any, Optional[Union[str, Callable]]], str, Callable]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you type this at the top / in conftest.py (note that we can probably generally type these formatters across IO functions, but that's for another day)

] = None,
subset=None,
na_rep: Optional[str] = None,
) -> Styler:
"""
Format the text display value of cells.

Parameters
----------
formatter : str, callable, dict or None
If ``formatter`` is None, the default formatter is used.
Format specification to use for displaying values. If ``None``, the default
formatter is used. If ``dict``, keys should corresponcd to column names,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

correspond

and values should be string or callable.
subset : IndexSlice
An argument to ``DataFrame.loc`` that restricts which elements
``formatter`` is applied to.
na_rep : str, optional
Representation for missing values.
If ``na_rep`` is None, no special formatting is applied.
Representation for missing values. If ``None``, will revert to using
``Styler.na_rep``
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm why are you adding this special function? we don't do this anywhere else

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think your comment about the fact that nowhere else is there a set_na_rep method that lends itself to the answer here. It is possible for there to be a solution where both functions (that currently exists) operate consistently, as opposed to somewhat inconsistently and inconveniently at present

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would add that this might not even be interpreted as a change. no special formatting applied might be expected to be that `Styler.na_rep`` is used as default, it is ambiguous. I was just clarifying what the bug fix did here.


.. versionadded:: 1.0.0

Returns
-------
self : Styler

See Also
--------
Styler.set_na_rep : Set the missing data representation on a Styler.
Styler.set_precision : Set the precision used to display values.

Notes
-----
``formatter`` is either an ``a`` or a dict ``{column name: a}`` where
``a`` is one of
This method assigns a formatting function to each cell in the DataFrame. Where
arguments are given as string this is wrapped to a callable as ``str.format(x)``

- str: this will be wrapped in: ``a.format(x)``
- callable: called with the value of an individual cell
If the ``subset`` argument is given as well as the ``formatter`` argument in
dict form then the intersection of the ``subset`` and the columns as keys
of the dict are used to define the formatting region. Keys in the dict that
do not exist in the ``subset`` will raise a ``KeyError``.

The default display value for numeric values is the "general" (``g``)
format with ``pd.options.display.precision`` precision.
The default formatter currently expresses floats and complex numbers with the
precision defined by ``Styler.precision``, leaving all other types unformatted,
and replacing missing values with the string defined in ``Styler.na_rep``, if
set.

Examples
--------
>>> df = pd.DataFrame(np.random.randn(4, 2), columns=['a', 'b'])
>>> df.style.format("{:.2%}")
>>> df['c'] = ['a', 'b', 'c', 'd']
>>> df.style.format({'c': str.upper})
>>> df = pd.DataFrame([[1.0, 2.0],[3.0, 4.0]], columns=['a', 'b'])
>>> df.style.format({'a': '{:.0f}'})
a b
0 1 2.000000
1 3 4.000000

>>> df = pd.DataFrame(np.nan,
... columns=['a', 'b', 'c', 'd'],
... index=['x', 'y', 'z'])
>>> df.iloc[0, :] = 1.9
>>> df.style.set_precision(3)
... .format({'b': '{:.0f}', 'c': '{:.1f}'.format},
... na_rep='HARD',
... subset=pd.IndexSlice[['y','x'], ['a', 'b', 'c']])
... .set_na_rep('SOFT')
a b c d
x 1.900 2 1.9 1.900
y SOFT HARD HARD SOFT
z SOFT SOFT SOFT SOFT
"""
if formatter is None:
assert self._display_funcs.default_factory is not None
formatter = self._display_funcs.default_factory()
subset = slice(None) if subset is None else subset
subset = _non_reducing_slice(subset)
data = self.data.loc[subset]

if subset is None:
row_locs = range(len(self.data))
col_locs = range(len(self.data.columns))
if not isinstance(formatter, dict):
columns = data.columns
formatter = {col: formatter for col in columns}
else:
subset = _non_reducing_slice(subset)
if len(subset) == 1:
subset = subset, self.data.columns

sub_df = self.data.loc[subset]
row_locs = self.data.index.get_indexer_for(sub_df.index)
col_locs = self.data.columns.get_indexer_for(sub_df.columns)

if is_dict_like(formatter):
for col, col_formatter in formatter.items():
# formatter must be callable, so '{}' are converted to lambdas
col_formatter = _maybe_wrap_formatter(col_formatter, na_rep)
col_num = self.data.columns.get_indexer_for([col])[0]

for row_num in row_locs:
self._display_funcs[(row_num, col_num)] = col_formatter
else:
# single scalar to format all cells with
formatter = _maybe_wrap_formatter(formatter, na_rep)
locs = product(*(row_locs, col_locs))
for i, j in locs:
self._display_funcs[(i, j)] = formatter
columns = formatter.keys()

for col in columns:
try:
format_func = formatter[col]
except KeyError:
format_func = None
format_func = self._maybe_wrap_formatter(format_func, na_rep=na_rep)

for row, value in data[[col]].itertuples():
i, j = self.index.get_loc(row), self.columns.get_loc(col)
self._display_funcs[(i, j)] = format_func

return self

def set_td_classes(self, classes: DataFrame) -> Styler:
Expand Down Expand Up @@ -1031,7 +1044,7 @@ def where(

def set_precision(self, precision: int) -> Styler:
"""
Set the precision used to render.
Set the precision used to display values.

Parameters
----------
Expand Down Expand Up @@ -1294,6 +1307,40 @@ def hide_columns(self, subset) -> Styler:
self.hidden_columns = self.columns.get_indexer_for(hidden_df.columns)
return self

def _default_formatter(self, x):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this not a module level function?

if isinstance(x, (float, complex)):
return f"{x:.{self.precision}f}"
return x

def _maybe_wrap_formatter(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this moved? what is the point of self.na_rep?

Copy link
Contributor Author

@attack68 attack68 Feb 27, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

its moved because as a module level it can't act dynamically and update self.na_rep if it is later changed. At class level it can so this goes to inheritance.

self,
formatter: Optional[Union[Callable, str]] = None,
na_rep: Optional[str] = None,
) -> Callable:
"""
Allows formatters to be expressed as str, callable or None, where None returns
a default formatting function. wraps with na_rep where it is available.
"""
if isinstance(formatter, str):
func = lambda x: formatter.format(x)
elif callable(formatter):
func = formatter
elif formatter is None:
func = self._default_formatter
else:
raise TypeError(
f"'formatter' expected str or callable, got {type(formatter)}"
)

if na_rep is not None:
return lambda x: na_rep if pd.isna(x) else func(x)
else:
return (
lambda x: self.na_rep
if all((self.na_rep is not None, pd.isna(x)))
else func(x)
)

# -----------------------------------------------------------------------
# A collection of "builtin" styles
# -----------------------------------------------------------------------
Expand Down Expand Up @@ -2035,26 +2082,6 @@ def _get_level_lengths(index, hidden_elements=None):
return non_zero_lengths


def _maybe_wrap_formatter(
formatter: Union[Callable, str], na_rep: Optional[str]
) -> Callable:
if isinstance(formatter, str):
formatter_func = lambda x: formatter.format(x)
elif callable(formatter):
formatter_func = formatter
else:
msg = f"Expected a template string or callable, got {formatter} instead"
raise TypeError(msg)

if na_rep is None:
return formatter_func
elif isinstance(na_rep, str):
return lambda x: na_rep if pd.isna(x) else formatter_func(x)
else:
msg = f"Expected a string, got {na_rep} instead"
raise TypeError(msg)


def _maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList:
"""
Convert css-string to sequence of tuples format if needed.
Expand Down
55 changes: 39 additions & 16 deletions pandas/tests/io/formats/style/test_style.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,12 +565,40 @@ def test_format_non_numeric_na(self):
assert ctx["body"][1][1]["display_value"] == "-"
assert ctx["body"][1][2]["display_value"] == "-"

def test_format_with_bad_na_rep(self):
# GH 21527 28358
df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"])
msg = "Expected a string, got -1 instead"
with pytest.raises(TypeError, match=msg):
df.style.format(None, na_rep=-1)
def test_display_format_subset_interaction(self):
# GH40032
# test subset and formatter interaction in conjunction with other methods
df = DataFrame([[np.nan, 1], [2, np.nan]], columns=["a", "b"], index=["x", "y"])

ctx = df.style.format({"a": "{:.1f}"}).set_na_rep("X")._translate()
assert ctx["body"][0][1]["display_value"] == "X"
assert ctx["body"][1][2]["display_value"] == "X"
ctx = df.style.format({"a": "{:.1f}"}, na_rep="Y").set_na_rep("X")._translate()
assert ctx["body"][0][1]["display_value"] == "Y"
assert ctx["body"][1][2]["display_value"] == "X"
ctx = (
df.style.format("{:.1f}", na_rep="Y", subset=["a"])
.set_na_rep("X")
._translate()
)
assert ctx["body"][0][1]["display_value"] == "Y"
assert ctx["body"][1][2]["display_value"] == "X"

ctx = df.style.format({"a": "{:.1f}"}).set_precision(2)._translate()
assert ctx["body"][0][2]["display_value"] == "1.00"
assert ctx["body"][1][1]["display_value"] == "2.0"
ctx = df.style.format("{:.1f}").set_precision(2)._translate()
assert ctx["body"][0][2]["display_value"] == "1.0"
assert ctx["body"][1][1]["display_value"] == "2.0"
ctx = df.style.format("{:.1f}", subset=["a"]).set_precision(2)._translate()
assert ctx["body"][0][2]["display_value"] == "1.00"
assert ctx["body"][1][1]["display_value"] == "2.0"
ctx = df.style.format(None, subset=["a"]).set_precision(2)._translate()
assert ctx["body"][0][2]["display_value"] == "1.00"
assert ctx["body"][1][1]["display_value"] == "2.00"

with pytest.raises(KeyError, match="are in the [columns]"):
df.style.format({"a": "{:.0f}"}, subset=["b"])

def test_nonunique_raises(self):
df = DataFrame([[1, 2]], columns=["A", "A"])
Expand Down Expand Up @@ -697,15 +725,10 @@ def test_display_format(self):
)
assert len(ctx["body"][0][1]["display_value"].lstrip("-")) <= 3

def test_display_format_raises(self):
df = DataFrame(np.random.randn(2, 2))
msg = "Expected a template string or callable, got 5 instead"
with pytest.raises(TypeError, match=msg):
df.style.format(5)

msg = "Expected a template string or callable, got True instead"
with pytest.raises(TypeError, match=msg):
df.style.format(True)
@pytest.mark.parametrize("formatter", [5, True, [2.0]])
def test_display_format_raises(self, formatter):
with pytest.raises(TypeError, match="expected str or callable"):
self.df.style.format(formatter)

def test_display_set_precision(self):
# Issue #13257
Expand Down Expand Up @@ -734,7 +757,7 @@ def test_display_set_precision(self):
assert ctx["body"][1][1]["display_value"] == "3.212"
assert ctx["body"][1][2]["display_value"] == "4.566"

def test_display_subset(self):
def test_format_subset(self):
df = DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"])
ctx = df.style.format(
{"a": "{:0.1f}", "b": "{0:.2%}"}, subset=pd.IndexSlice[0, :]
Expand Down