pandas-dev · attack68 · Feb 25, 2021 · Feb 25, 2021 · Feb 25, 2021 · Feb 25, 2021
diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
@@ -7,7 +7,6 @@
 from contextlib import contextmanager
 import copy
 from functools import partial
-from itertools import product
 from typing import (
     Any,
     Callable,
@@ -36,14 +35,10 @@
 from pandas.compat._optional import import_optional_dependency
 from pandas.util._decorators import doc
 
-from pandas.core.dtypes.common import is_float
 from pandas.core.dtypes.generic import ABCSeries
 
 import pandas as pd
-from pandas.api.types import (
-    is_dict_like,
-    is_list_like,
-)
+from pandas.api.types import is_list_like
 from pandas.core import generic
 import pandas.core.common as com
 from pandas.core.frame import DataFrame
@@ -222,13 +217,7 @@ def _init_tooltips(self):
             self.tooltips = _Tooltips()
 
     def _default_display_func(self, x):
-        if self.na_rep is not None and pd.isna(x):
-            return self.na_rep
-        elif is_float(x):
-            display_format = f"{x:.{self.precision}f}"
-            return display_format
-        else:
-            return x
+        return self._maybe_wrap_formatter(formatter=None)(x)
 
     def set_tooltips(self, ttips: DataFrame) -> Styler:
         """
@@ -575,75 +564,99 @@ def _translate(self):
 
         return d
 
-    def format(self, formatter, subset=None, na_rep: Optional[str] = None) -> Styler:
+    def format(
+        self,
+        formatter: Optional[
+            Union[Dict[Any, Optional[Union[str, Callable]]], str, Callable]
+        ] = None,
+        subset=None,
+        na_rep: Optional[str] = None,
+    ) -> Styler:
         """
         Format the text display value of cells.
 
         Parameters
         ----------
         formatter : str, callable, dict or None
-            If ``formatter`` is None, the default formatter is used.
+            Format specification to use for displaying values. If ``None``, the default
+            formatter is used. If ``dict``, keys should corresponcd to column names,
+            and values should be string or callable.
         subset : IndexSlice
             An argument to ``DataFrame.loc`` that restricts which elements
             ``formatter`` is applied to.
         na_rep : str, optional
-            Representation for missing values.
-            If ``na_rep`` is None, no special formatting is applied.
+            Representation for missing values. If ``None``, will revert to using
+            ``Styler.na_rep``
 
             .. versionadded:: 1.0.0
 
         Returns
         -------
         self : Styler
 
+        See Also
+        --------
+        Styler.set_na_rep : Set the missing data representation on a Styler.
+        Styler.set_precision : Set the precision used to display values.
+
         Notes
         -----
-        ``formatter`` is either an ``a`` or a dict ``{column name: a}`` where
-        ``a`` is one of
+        This method assigns a formatting function to each cell in the DataFrame. Where
+        arguments are given as string this is wrapped to a callable as ``str.format(x)``
 
-        - str: this will be wrapped in: ``a.format(x)``
-        - callable: called with the value of an individual cell
+        If the ``subset`` argument is given as well as the ``formatter`` argument in
+        dict form then the intersection of the ``subset`` and the columns as keys
+        of the dict are used to define the formatting region. Keys in the dict that
+        do not exist in the ``subset`` will raise a ``KeyError``.
 
-        The default display value for numeric values is the "general" (``g``)
-        format with ``pd.options.display.precision`` precision.
+        The default formatter currently expresses floats and complex numbers with the
+        precision defined by ``Styler.precision``, leaving all other types unformatted,
+        and replacing missing values with the string defined in ``Styler.na_rep``, if
+        set.
 
         Examples
         --------
-        >>> df = pd.DataFrame(np.random.randn(4, 2), columns=['a', 'b'])
-        >>> df.style.format("{:.2%}")
-        >>> df['c'] = ['a', 'b', 'c', 'd']
-        >>> df.style.format({'c': str.upper})
+        >>> df = pd.DataFrame([[1.0, 2.0],[3.0, 4.0]], columns=['a', 'b'])
+        >>> df.style.format({'a': '{:.0f}'})
+            a          b
+        0   1   2.000000
+        1   3   4.000000
+
+        >>> df = pd.DataFrame(np.nan,
+        ...                   columns=['a', 'b', 'c', 'd'],
+        ...                   index=['x', 'y', 'z'])
+        >>> df.iloc[0, :] = 1.9
+        >>> df.style.set_precision(3)
+        ...         .format({'b': '{:.0f}', 'c': '{:.1f}'.format},
+        ...                 na_rep='HARD',
+        ...                 subset=pd.IndexSlice[['y','x'], ['a', 'b', 'c']])
+        ...         .set_na_rep('SOFT')
+               a     b     c       d
+        x  1.900     2   1.9   1.900
+        y   SOFT  HARD  HARD    SOFT
+        z   SOFT  SOFT  SOFT    SOFT
         """
-        if formatter is None:
-            assert self._display_funcs.default_factory is not None
-            formatter = self._display_funcs.default_factory()
+        subset = slice(None) if subset is None else subset
+        subset = _non_reducing_slice(subset)
+        data = self.data.loc[subset]
 
-        if subset is None:
-            row_locs = range(len(self.data))
-            col_locs = range(len(self.data.columns))
+        if not isinstance(formatter, dict):
+            columns = data.columns
+            formatter = {col: formatter for col in columns}
         else:
-            subset = _non_reducing_slice(subset)
-            if len(subset) == 1:
-                subset = subset, self.data.columns
-
-            sub_df = self.data.loc[subset]
-            row_locs = self.data.index.get_indexer_for(sub_df.index)
-            col_locs = self.data.columns.get_indexer_for(sub_df.columns)
-
-        if is_dict_like(formatter):
-            for col, col_formatter in formatter.items():
-                # formatter must be callable, so '{}' are converted to lambdas
-                col_formatter = _maybe_wrap_formatter(col_formatter, na_rep)
-                col_num = self.data.columns.get_indexer_for([col])[0]
-
-                for row_num in row_locs:
-                    self._display_funcs[(row_num, col_num)] = col_formatter
-        else:
-            # single scalar to format all cells with
-            formatter = _maybe_wrap_formatter(formatter, na_rep)
-            locs = product(*(row_locs, col_locs))
-            for i, j in locs:
-                self._display_funcs[(i, j)] = formatter
+            columns = formatter.keys()
+
+        for col in columns:
+            try:
+                format_func = formatter[col]
+            except KeyError:
+                format_func = None
+            format_func = self._maybe_wrap_formatter(format_func, na_rep=na_rep)
+
+            for row, value in data[[col]].itertuples():
+                i, j = self.index.get_loc(row), self.columns.get_loc(col)
+                self._display_funcs[(i, j)] = format_func
+
         return self
 
     def set_td_classes(self, classes: DataFrame) -> Styler:
@@ -1031,7 +1044,7 @@ def where(
 
     def set_precision(self, precision: int) -> Styler:
         """
-        Set the precision used to render.
+        Set the precision used to display values.
 
         Parameters
         ----------
@@ -1294,6 +1307,40 @@ def hide_columns(self, subset) -> Styler:
         self.hidden_columns = self.columns.get_indexer_for(hidden_df.columns)
         return self
 
+    def _default_formatter(self, x):
+        if isinstance(x, (float, complex)):
+            return f"{x:.{self.precision}f}"
+        return x
+
+    def _maybe_wrap_formatter(
+        self,
+        formatter: Optional[Union[Callable, str]] = None,
+        na_rep: Optional[str] = None,
+    ) -> Callable:
+        """
+        Allows formatters to be expressed as str, callable or None, where None returns
+        a default formatting function. wraps with na_rep where it is available.
+        """
+        if isinstance(formatter, str):
+            func = lambda x: formatter.format(x)
+        elif callable(formatter):
+            func = formatter
+        elif formatter is None:
+            func = self._default_formatter
+        else:
+            raise TypeError(
+                f"'formatter' expected str or callable, got {type(formatter)}"
+            )
+
+        if na_rep is not None:
+            return lambda x: na_rep if pd.isna(x) else func(x)
+        else:
+            return (
+                lambda x: self.na_rep
+                if all((self.na_rep is not None, pd.isna(x)))
+                else func(x)
+            )
+
     # -----------------------------------------------------------------------
     # A collection of "builtin" styles
     # -----------------------------------------------------------------------
@@ -2035,26 +2082,6 @@ def _get_level_lengths(index, hidden_elements=None):
     return non_zero_lengths
 
 
-def _maybe_wrap_formatter(
-    formatter: Union[Callable, str], na_rep: Optional[str]
-) -> Callable:
-    if isinstance(formatter, str):
-        formatter_func = lambda x: formatter.format(x)
-    elif callable(formatter):
-        formatter_func = formatter
-    else:
-        msg = f"Expected a template string or callable, got {formatter} instead"
-        raise TypeError(msg)
-
-    if na_rep is None:
-        return formatter_func
-    elif isinstance(na_rep, str):
-        return lambda x: na_rep if pd.isna(x) else formatter_func(x)
-    else:
-        msg = f"Expected a string, got {na_rep} instead"
-        raise TypeError(msg)
-
-
 def _maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList:
     """
     Convert css-string to sequence of tuples format if needed.

diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py
@@ -565,12 +565,40 @@ def test_format_non_numeric_na(self):
         assert ctx["body"][1][1]["display_value"] == "-"
         assert ctx["body"][1][2]["display_value"] == "-"
 
-    def test_format_with_bad_na_rep(self):
-        # GH 21527 28358
-        df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"])
-        msg = "Expected a string, got -1 instead"
-        with pytest.raises(TypeError, match=msg):
-            df.style.format(None, na_rep=-1)
+    def test_display_format_subset_interaction(self):
+        # GH40032
+        # test subset and formatter interaction in conjunction with other methods
+        df = DataFrame([[np.nan, 1], [2, np.nan]], columns=["a", "b"], index=["x", "y"])
+
+        ctx = df.style.format({"a": "{:.1f}"}).set_na_rep("X")._translate()
+        assert ctx["body"][0][1]["display_value"] == "X"
+        assert ctx["body"][1][2]["display_value"] == "X"
+        ctx = df.style.format({"a": "{:.1f}"}, na_rep="Y").set_na_rep("X")._translate()
+        assert ctx["body"][0][1]["display_value"] == "Y"
+        assert ctx["body"][1][2]["display_value"] == "X"
+        ctx = (
+            df.style.format("{:.1f}", na_rep="Y", subset=["a"])
+            .set_na_rep("X")
+            ._translate()
+        )
+        assert ctx["body"][0][1]["display_value"] == "Y"
+        assert ctx["body"][1][2]["display_value"] == "X"
+
+        ctx = df.style.format({"a": "{:.1f}"}).set_precision(2)._translate()
+        assert ctx["body"][0][2]["display_value"] == "1.00"
+        assert ctx["body"][1][1]["display_value"] == "2.0"
+        ctx = df.style.format("{:.1f}").set_precision(2)._translate()
+        assert ctx["body"][0][2]["display_value"] == "1.0"
+        assert ctx["body"][1][1]["display_value"] == "2.0"
+        ctx = df.style.format("{:.1f}", subset=["a"]).set_precision(2)._translate()
+        assert ctx["body"][0][2]["display_value"] == "1.00"
+        assert ctx["body"][1][1]["display_value"] == "2.0"
+        ctx = df.style.format(None, subset=["a"]).set_precision(2)._translate()
+        assert ctx["body"][0][2]["display_value"] == "1.00"
+        assert ctx["body"][1][1]["display_value"] == "2.00"
+
+        with pytest.raises(KeyError, match="are in the [columns]"):
+            df.style.format({"a": "{:.0f}"}, subset=["b"])
 
     def test_nonunique_raises(self):
         df = DataFrame([[1, 2]], columns=["A", "A"])
@@ -697,15 +725,10 @@ def test_display_format(self):
         )
         assert len(ctx["body"][0][1]["display_value"].lstrip("-")) <= 3
 
-    def test_display_format_raises(self):
-        df = DataFrame(np.random.randn(2, 2))
-        msg = "Expected a template string or callable, got 5 instead"
-        with pytest.raises(TypeError, match=msg):
-            df.style.format(5)
-
-        msg = "Expected a template string or callable, got True instead"
-        with pytest.raises(TypeError, match=msg):
-            df.style.format(True)
+    @pytest.mark.parametrize("formatter", [5, True, [2.0]])
+    def test_display_format_raises(self, formatter):
+        with pytest.raises(TypeError, match="expected str or callable"):
+            self.df.style.format(formatter)
 
     def test_display_set_precision(self):
         # Issue #13257
@@ -734,7 +757,7 @@ def test_display_set_precision(self):
         assert ctx["body"][1][1]["display_value"] == "3.212"
         assert ctx["body"][1][2]["display_value"] == "4.566"
 
-    def test_display_subset(self):
+    def test_format_subset(self):
         df = DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"])
         ctx = df.style.format(
             {"a": "{:0.1f}", "b": "{0:.2%}"}, subset=pd.IndexSlice[0, :]