diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 854f41d6b4dc3..82d9ef87dd25c 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -7,7 +7,6 @@ from contextlib import contextmanager import copy from functools import partial -from itertools import product from typing import ( Any, Callable, @@ -36,14 +35,10 @@ from pandas.compat._optional import import_optional_dependency from pandas.util._decorators import doc -from pandas.core.dtypes.common import is_float from pandas.core.dtypes.generic import ABCSeries import pandas as pd -from pandas.api.types import ( - is_dict_like, - is_list_like, -) +from pandas.api.types import is_list_like from pandas.core import generic import pandas.core.common as com from pandas.core.frame import DataFrame @@ -222,13 +217,7 @@ def _init_tooltips(self): self.tooltips = _Tooltips() def _default_display_func(self, x): - if self.na_rep is not None and pd.isna(x): - return self.na_rep - elif is_float(x): - display_format = f"{x:.{self.precision}f}" - return display_format - else: - return x + return self._maybe_wrap_formatter(formatter=None)(x) def set_tooltips(self, ttips: DataFrame) -> Styler: """ @@ -575,20 +564,29 @@ def _translate(self): return d - def format(self, formatter, subset=None, na_rep: Optional[str] = None) -> Styler: + def format( + self, + formatter: Optional[ + Union[Dict[Any, Optional[Union[str, Callable]]], str, Callable] + ] = None, + subset=None, + na_rep: Optional[str] = None, + ) -> Styler: """ Format the text display value of cells. Parameters ---------- formatter : str, callable, dict or None - If ``formatter`` is None, the default formatter is used. + Format specification to use for displaying values. If ``None``, the default + formatter is used. If ``dict``, keys should corresponcd to column names, + and values should be string or callable. subset : IndexSlice An argument to ``DataFrame.loc`` that restricts which elements ``formatter`` is applied to. na_rep : str, optional - Representation for missing values. - If ``na_rep`` is None, no special formatting is applied. + Representation for missing values. If ``None``, will revert to using + ``Styler.na_rep`` .. versionadded:: 1.0.0 @@ -596,54 +594,69 @@ def format(self, formatter, subset=None, na_rep: Optional[str] = None) -> Styler ------- self : Styler + See Also + -------- + Styler.set_na_rep : Set the missing data representation on a Styler. + Styler.set_precision : Set the precision used to display values. + Notes ----- - ``formatter`` is either an ``a`` or a dict ``{column name: a}`` where - ``a`` is one of + This method assigns a formatting function to each cell in the DataFrame. Where + arguments are given as string this is wrapped to a callable as ``str.format(x)`` - - str: this will be wrapped in: ``a.format(x)`` - - callable: called with the value of an individual cell + If the ``subset`` argument is given as well as the ``formatter`` argument in + dict form then the intersection of the ``subset`` and the columns as keys + of the dict are used to define the formatting region. Keys in the dict that + do not exist in the ``subset`` will raise a ``KeyError``. - The default display value for numeric values is the "general" (``g``) - format with ``pd.options.display.precision`` precision. + The default formatter currently expresses floats and complex numbers with the + precision defined by ``Styler.precision``, leaving all other types unformatted, + and replacing missing values with the string defined in ``Styler.na_rep``, if + set. Examples -------- - >>> df = pd.DataFrame(np.random.randn(4, 2), columns=['a', 'b']) - >>> df.style.format("{:.2%}") - >>> df['c'] = ['a', 'b', 'c', 'd'] - >>> df.style.format({'c': str.upper}) + >>> df = pd.DataFrame([[1.0, 2.0],[3.0, 4.0]], columns=['a', 'b']) + >>> df.style.format({'a': '{:.0f}'}) + a b + 0 1 2.000000 + 1 3 4.000000 + + >>> df = pd.DataFrame(np.nan, + ... columns=['a', 'b', 'c', 'd'], + ... index=['x', 'y', 'z']) + >>> df.iloc[0, :] = 1.9 + >>> df.style.set_precision(3) + ... .format({'b': '{:.0f}', 'c': '{:.1f}'.format}, + ... na_rep='HARD', + ... subset=pd.IndexSlice[['y','x'], ['a', 'b', 'c']]) + ... .set_na_rep('SOFT') + a b c d + x 1.900 2 1.9 1.900 + y SOFT HARD HARD SOFT + z SOFT SOFT SOFT SOFT """ - if formatter is None: - assert self._display_funcs.default_factory is not None - formatter = self._display_funcs.default_factory() + subset = slice(None) if subset is None else subset + subset = _non_reducing_slice(subset) + data = self.data.loc[subset] - if subset is None: - row_locs = range(len(self.data)) - col_locs = range(len(self.data.columns)) + if not isinstance(formatter, dict): + columns = data.columns + formatter = {col: formatter for col in columns} else: - subset = _non_reducing_slice(subset) - if len(subset) == 1: - subset = subset, self.data.columns - - sub_df = self.data.loc[subset] - row_locs = self.data.index.get_indexer_for(sub_df.index) - col_locs = self.data.columns.get_indexer_for(sub_df.columns) - - if is_dict_like(formatter): - for col, col_formatter in formatter.items(): - # formatter must be callable, so '{}' are converted to lambdas - col_formatter = _maybe_wrap_formatter(col_formatter, na_rep) - col_num = self.data.columns.get_indexer_for([col])[0] - - for row_num in row_locs: - self._display_funcs[(row_num, col_num)] = col_formatter - else: - # single scalar to format all cells with - formatter = _maybe_wrap_formatter(formatter, na_rep) - locs = product(*(row_locs, col_locs)) - for i, j in locs: - self._display_funcs[(i, j)] = formatter + columns = formatter.keys() + + for col in columns: + try: + format_func = formatter[col] + except KeyError: + format_func = None + format_func = self._maybe_wrap_formatter(format_func, na_rep=na_rep) + + for row, value in data[[col]].itertuples(): + i, j = self.index.get_loc(row), self.columns.get_loc(col) + self._display_funcs[(i, j)] = format_func + return self def set_td_classes(self, classes: DataFrame) -> Styler: @@ -1031,7 +1044,7 @@ def where( def set_precision(self, precision: int) -> Styler: """ - Set the precision used to render. + Set the precision used to display values. Parameters ---------- @@ -1294,6 +1307,40 @@ def hide_columns(self, subset) -> Styler: self.hidden_columns = self.columns.get_indexer_for(hidden_df.columns) return self + def _default_formatter(self, x): + if isinstance(x, (float, complex)): + return f"{x:.{self.precision}f}" + return x + + def _maybe_wrap_formatter( + self, + formatter: Optional[Union[Callable, str]] = None, + na_rep: Optional[str] = None, + ) -> Callable: + """ + Allows formatters to be expressed as str, callable or None, where None returns + a default formatting function. wraps with na_rep where it is available. + """ + if isinstance(formatter, str): + func = lambda x: formatter.format(x) + elif callable(formatter): + func = formatter + elif formatter is None: + func = self._default_formatter + else: + raise TypeError( + f"'formatter' expected str or callable, got {type(formatter)}" + ) + + if na_rep is not None: + return lambda x: na_rep if pd.isna(x) else func(x) + else: + return ( + lambda x: self.na_rep + if all((self.na_rep is not None, pd.isna(x))) + else func(x) + ) + # ----------------------------------------------------------------------- # A collection of "builtin" styles # ----------------------------------------------------------------------- @@ -2035,26 +2082,6 @@ def _get_level_lengths(index, hidden_elements=None): return non_zero_lengths -def _maybe_wrap_formatter( - formatter: Union[Callable, str], na_rep: Optional[str] -) -> Callable: - if isinstance(formatter, str): - formatter_func = lambda x: formatter.format(x) - elif callable(formatter): - formatter_func = formatter - else: - msg = f"Expected a template string or callable, got {formatter} instead" - raise TypeError(msg) - - if na_rep is None: - return formatter_func - elif isinstance(na_rep, str): - return lambda x: na_rep if pd.isna(x) else formatter_func(x) - else: - msg = f"Expected a string, got {na_rep} instead" - raise TypeError(msg) - - def _maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList: """ Convert css-string to sequence of tuples format if needed. diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 01ed234f6e248..f534ecddb9cfc 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -565,12 +565,40 @@ def test_format_non_numeric_na(self): assert ctx["body"][1][1]["display_value"] == "-" assert ctx["body"][1][2]["display_value"] == "-" - def test_format_with_bad_na_rep(self): - # GH 21527 28358 - df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) - msg = "Expected a string, got -1 instead" - with pytest.raises(TypeError, match=msg): - df.style.format(None, na_rep=-1) + def test_display_format_subset_interaction(self): + # GH40032 + # test subset and formatter interaction in conjunction with other methods + df = DataFrame([[np.nan, 1], [2, np.nan]], columns=["a", "b"], index=["x", "y"]) + + ctx = df.style.format({"a": "{:.1f}"}).set_na_rep("X")._translate() + assert ctx["body"][0][1]["display_value"] == "X" + assert ctx["body"][1][2]["display_value"] == "X" + ctx = df.style.format({"a": "{:.1f}"}, na_rep="Y").set_na_rep("X")._translate() + assert ctx["body"][0][1]["display_value"] == "Y" + assert ctx["body"][1][2]["display_value"] == "X" + ctx = ( + df.style.format("{:.1f}", na_rep="Y", subset=["a"]) + .set_na_rep("X") + ._translate() + ) + assert ctx["body"][0][1]["display_value"] == "Y" + assert ctx["body"][1][2]["display_value"] == "X" + + ctx = df.style.format({"a": "{:.1f}"}).set_precision(2)._translate() + assert ctx["body"][0][2]["display_value"] == "1.00" + assert ctx["body"][1][1]["display_value"] == "2.0" + ctx = df.style.format("{:.1f}").set_precision(2)._translate() + assert ctx["body"][0][2]["display_value"] == "1.0" + assert ctx["body"][1][1]["display_value"] == "2.0" + ctx = df.style.format("{:.1f}", subset=["a"]).set_precision(2)._translate() + assert ctx["body"][0][2]["display_value"] == "1.00" + assert ctx["body"][1][1]["display_value"] == "2.0" + ctx = df.style.format(None, subset=["a"]).set_precision(2)._translate() + assert ctx["body"][0][2]["display_value"] == "1.00" + assert ctx["body"][1][1]["display_value"] == "2.00" + + with pytest.raises(KeyError, match="are in the [columns]"): + df.style.format({"a": "{:.0f}"}, subset=["b"]) def test_nonunique_raises(self): df = DataFrame([[1, 2]], columns=["A", "A"]) @@ -697,15 +725,10 @@ def test_display_format(self): ) assert len(ctx["body"][0][1]["display_value"].lstrip("-")) <= 3 - def test_display_format_raises(self): - df = DataFrame(np.random.randn(2, 2)) - msg = "Expected a template string or callable, got 5 instead" - with pytest.raises(TypeError, match=msg): - df.style.format(5) - - msg = "Expected a template string or callable, got True instead" - with pytest.raises(TypeError, match=msg): - df.style.format(True) + @pytest.mark.parametrize("formatter", [5, True, [2.0]]) + def test_display_format_raises(self, formatter): + with pytest.raises(TypeError, match="expected str or callable"): + self.df.style.format(formatter) def test_display_set_precision(self): # Issue #13257 @@ -734,7 +757,7 @@ def test_display_set_precision(self): assert ctx["body"][1][1]["display_value"] == "3.212" assert ctx["body"][1][2]["display_value"] == "4.566" - def test_display_subset(self): + def test_format_subset(self): df = DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"]) ctx = df.style.format( {"a": "{:0.1f}", "b": "{0:.2%}"}, subset=pd.IndexSlice[0, :]