diff --git a/ci/code_checks.sh b/ci/code_checks.sh index a9967dcb8efe6..63e5d20160dd2 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -797,8 +797,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.io.formats.style.Styler.clear SA01" \ -i "pandas.io.formats.style.Styler.concat RT03,SA01" \ -i "pandas.io.formats.style.Styler.export RT03" \ - -i "pandas.io.formats.style.Styler.format RT03" \ - -i "pandas.io.formats.style.Styler.format_index RT03" \ -i "pandas.io.formats.style.Styler.from_custom_template SA01" \ -i "pandas.io.formats.style.Styler.hide RT03,SA01" \ -i "pandas.io.formats.style.Styler.highlight_between RT03" \ @@ -808,7 +806,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.io.formats.style.Styler.highlight_quantile RT03" \ -i "pandas.io.formats.style.Styler.map RT03" \ -i "pandas.io.formats.style.Styler.map_index RT03" \ - -i "pandas.io.formats.style.Styler.relabel_index RT03" \ -i "pandas.io.formats.style.Styler.set_caption RT03,SA01" \ -i "pandas.io.formats.style.Styler.set_properties RT03,SA01" \ -i "pandas.io.formats.style.Styler.set_sticky RT03,SA01" \ diff --git a/doc/source/reference/style.rst b/doc/source/reference/style.rst index 2256876c93e01..0e1d93841d52f 100644 --- a/doc/source/reference/style.rst +++ b/doc/source/reference/style.rst @@ -41,6 +41,7 @@ Style application Styler.map_index Styler.format Styler.format_index + Styler.format_index_names Styler.relabel_index Styler.hide Styler.concat diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index a398b93b60018..878eb4e79a1d5 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -34,6 +34,8 @@ Other enhancements - Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`) - Support passing a :class:`Series` input to :func:`json_normalize` that retains the :class:`Series` :class:`Index` (:issue:`51452`) - Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`) +- :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`) +- .. --------------------------------------------------------------------------- .. _whatsnew_300.notable_bug_fixes: diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 7247e11be874e..ab5f1c039b7ca 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1683,6 +1683,8 @@ def _copy(self, deepcopy: bool = False) -> Styler: "_display_funcs", "_display_funcs_index", "_display_funcs_columns", + "_display_funcs_index_names", + "_display_funcs_column_names", "hidden_rows", "hidden_columns", "ctx", diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 2c93dbe74eace..92afbc0e150ef 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -140,9 +140,15 @@ def __init__( self._display_funcs_index: DefaultDict[ # maps (row, level) -> format func tuple[int, int], Callable[[Any], str] ] = defaultdict(lambda: partial(_default_formatter, precision=precision)) + self._display_funcs_index_names: DefaultDict[ # maps index level -> format func + int, Callable[[Any], str] + ] = defaultdict(lambda: partial(_default_formatter, precision=precision)) self._display_funcs_columns: DefaultDict[ # maps (level, col) -> format func tuple[int, int], Callable[[Any], str] ] = defaultdict(lambda: partial(_default_formatter, precision=precision)) + self._display_funcs_column_names: DefaultDict[ # maps col level -> format func + int, Callable[[Any], str] + ] = defaultdict(lambda: partial(_default_formatter, precision=precision)) def _render( self, @@ -460,6 +466,12 @@ def _generate_col_header_row( ] * (self.index.nlevels - sum(self.hide_index_) - 1) name = self.data.columns.names[r] + + is_display = name is not None and not self.hide_column_names + value = name if is_display else self.css["blank_value"] + display_value = ( + self._display_funcs_column_names[r](value) if is_display else None + ) column_name = [ _element( "th", @@ -468,10 +480,9 @@ def _generate_col_header_row( if name is None else f"{self.css['index_name']} {self.css['level']}{r}" ), - name - if (name is not None and not self.hide_column_names) - else self.css["blank_value"], + value, not all(self.hide_index_), + display_value=display_value, ) ] @@ -553,6 +564,9 @@ def _generate_index_names_row( f"{self.css['index_name']} {self.css['level']}{c}", self.css["blank_value"] if name is None else name, not self.hide_index_[c], + display_value=( + None if name is None else self._display_funcs_index_names[c](name) + ), ) for c, name in enumerate(self.data.index.names) ] @@ -1005,6 +1019,7 @@ def format( Returns ------- Styler + Returns itself for chaining. See Also -------- @@ -1261,6 +1276,7 @@ def format_index( Returns ------- Styler + Returns itself for chaining. See Also -------- @@ -1425,6 +1441,7 @@ def relabel_index( Returns ------- Styler + Returns itself for chaining. See Also -------- @@ -1560,6 +1577,140 @@ def alias_(x, value): return self + def format_index_names( + self, + formatter: ExtFormatter | None = None, + axis: Axis = 0, + level: Level | list[Level] | None = None, + na_rep: str | None = None, + precision: int | None = None, + decimal: str = ".", + thousands: str | None = None, + escape: str | None = None, + hyperlinks: str | None = None, + ) -> StylerRenderer: + r""" + Format the text display value of index names or column names. + + .. versionadded:: 3.0 + + Parameters + ---------- + formatter : str, callable, dict or None + Object to define how values are displayed. See notes. + axis : {0, "index", 1, "columns"} + Whether to apply the formatter to the index or column headers. + level : int, str, list + The level(s) over which to apply the generic formatter. + na_rep : str, optional + Representation for missing values. + If ``na_rep`` is None, no special formatting is applied. + precision : int, optional + Floating point precision to use for display purposes, if not determined by + the specified ``formatter``. + decimal : str, default "." + Character used as decimal separator for floats, complex and integers. + thousands : str, optional, default None + Character used as thousands separator for floats, complex and integers. + escape : str, optional + Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` + in cell display string with HTML-safe sequences. + Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, + ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with + LaTeX-safe sequences. + Escaping is done before ``formatter``. + hyperlinks : {"html", "latex"}, optional + Convert string patterns containing https://, http://, ftp:// or www. to + HTML tags as clickable URL hyperlinks if "html", or LaTeX \href + commands if "latex". + + Returns + ------- + Styler + Returns itself for chaining. + + Raises + ------ + ValueError + If the `formatter` is a string and the dtypes are incompatible. + + See Also + -------- + Styler.format_index: Format the text display value of index labels + or column headers. + + Notes + ----- + This method has a similar signature to :meth:`Styler.format_index`. Since + `names` are generally label based, and often not numeric, the typical features + expected to be more frequently used here are ``escape`` and ``hyperlinks``. + + .. warning:: + `Styler.format_index_names` is ignored when using the output format + `Styler.to_excel`, since Excel and Python have inherrently different + formatting structures. + + Examples + -------- + >>> df = pd.DataFrame( + ... [[1, 2], [3, 4]], + ... index=pd.Index(["a", "b"], name="idx"), + ... ) + >>> df # doctest: +SKIP + 0 1 + idx + a 1 2 + b 3 4 + >>> df.style.format_index_names(lambda x: x.upper(), axis=0) # doctest: +SKIP + 0 1 + IDX + a 1 2 + b 3 4 + """ + axis = self.data._get_axis_number(axis) + if axis == 0: + display_funcs_, obj = self._display_funcs_index_names, self.index + else: + display_funcs_, obj = self._display_funcs_column_names, self.columns + levels_ = refactor_levels(level, obj) + + if all( + ( + formatter is None, + level is None, + precision is None, + decimal == ".", + thousands is None, + na_rep is None, + escape is None, + hyperlinks is None, + ) + ): + display_funcs_.clear() + return self # clear the formatter / revert to default and avoid looping + + if not isinstance(formatter, dict): + formatter = {level: formatter for level in levels_} + else: + formatter = { + obj._get_level_number(level): formatter_ + for level, formatter_ in formatter.items() + } + + for lvl in levels_: + format_func = _maybe_wrap_formatter( + formatter.get(lvl), + na_rep=na_rep, + precision=precision, + decimal=decimal, + thousands=thousands, + escape=escape, + hyperlinks=hyperlinks, + ) + display_funcs_[lvl] = format_func + + return self + def _element( html_element: str, @@ -1571,7 +1722,7 @@ def _element( """ Template to return container with information for a or element. """ - if "display_value" not in kwargs: + if "display_value" not in kwargs or kwargs["display_value"] is None: kwargs["display_value"] = value return { "type": html_element, diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index 1c84816ead140..ae68fcf9ef1fc 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -32,10 +32,14 @@ def styler(df): @pytest.fixture def df_multi(): - return DataFrame( - data=np.arange(16).reshape(4, 4), - columns=MultiIndex.from_product([["A", "B"], ["a", "b"]]), - index=MultiIndex.from_product([["X", "Y"], ["x", "y"]]), + return ( + DataFrame( + data=np.arange(16).reshape(4, 4), + columns=MultiIndex.from_product([["A", "B"], ["a", "b"]]), + index=MultiIndex.from_product([["X", "Y"], ["x", "y"]]), + ) + .rename_axis(["0_0", "0_1"], axis=0) + .rename_axis(["1_0", "1_1"], axis=1) ) @@ -560,3 +564,98 @@ def test_relabel_roundtrip(styler): ctx = styler._translate(True, True) assert {"value": "x", "display_value": "x"}.items() <= ctx["body"][0][0].items() assert {"value": "y", "display_value": "y"}.items() <= ctx["body"][1][0].items() + + +@pytest.mark.parametrize("axis", [0, 1]) +@pytest.mark.parametrize( + "level, expected", + [ + (0, ["X", "one"]), # level int + ("zero", ["X", "one"]), # level name + (1, ["zero", "X"]), # other level int + ("one", ["zero", "X"]), # other level name + ([0, 1], ["X", "X"]), # both levels + ([0, "zero"], ["X", "one"]), # level int and name simultaneous + ([0, "one"], ["X", "X"]), # both levels as int and name + (["one", "zero"], ["X", "X"]), # both level names, reversed + ], +) +def test_format_index_names_level(axis, level, expected): + midx = MultiIndex.from_arrays([["_", "_"], ["_", "_"]], names=["zero", "one"]) + df = DataFrame([[1, 2], [3, 4]]) + if axis == 0: + df.index = midx + else: + df.columns = midx + + styler = df.style.format_index_names(lambda v: "X", level=level, axis=axis) + ctx = styler._translate(True, True) + + if axis == 0: # compare index + result = [ctx["head"][1][s]["display_value"] for s in range(2)] + else: # compare columns + result = [ctx["head"][s][0]["display_value"] for s in range(2)] + assert expected == result + + +@pytest.mark.parametrize( + "attr, kwargs", + [ + ("_display_funcs_index_names", {"axis": 0}), + ("_display_funcs_column_names", {"axis": 1}), + ], +) +def test_format_index_names_clear(styler, attr, kwargs): + assert 0 not in getattr(styler, attr) # using default + styler.format_index_names("{:.2f}", **kwargs) + assert 0 in getattr(styler, attr) # formatter is specified + styler.format_index_names(**kwargs) + assert 0 not in getattr(styler, attr) # formatter cleared to default + + +@pytest.mark.parametrize("axis", [0, 1]) +def test_format_index_names_callable(styler_multi, axis): + ctx = styler_multi.format_index_names( + lambda v: v.replace("_", "A"), axis=axis + )._translate(True, True) + result = [ + ctx["head"][2][0]["display_value"], + ctx["head"][2][1]["display_value"], + ctx["head"][0][1]["display_value"], + ctx["head"][1][1]["display_value"], + ] + if axis == 0: + expected = ["0A0", "0A1", "1_0", "1_1"] + else: + expected = ["0_0", "0_1", "1A0", "1A1"] + assert result == expected + + +def test_format_index_names_dict(styler_multi): + ctx = ( + styler_multi.format_index_names({"0_0": "{:<<5}"}) + .format_index_names({"1_1": "{:>>4}"}, axis=1) + ._translate(True, True) + ) + assert ctx["head"][2][0]["display_value"] == "0_0<<" + assert ctx["head"][1][1]["display_value"] == ">1_1" + + +def test_format_index_names_with_hidden_levels(styler_multi): + ctx = styler_multi._translate(True, True) + full_head_height = len(ctx["head"]) + full_head_width = len(ctx["head"][0]) + assert full_head_height == 3 + assert full_head_width == 6 + + ctx = ( + styler_multi.hide(axis=0, level=1) + .hide(axis=1, level=1) + .format_index_names("{:>>4}", axis=1) + .format_index_names("{:!<5}") + ._translate(True, True) + ) + assert len(ctx["head"]) == full_head_height - 1 + assert len(ctx["head"][0]) == full_head_width - 1 + assert ctx["head"][0][0]["display_value"] == ">1_0" + assert ctx["head"][1][0]["display_value"] == "0_0!!" diff --git a/pandas/tests/io/formats/style/test_html.py b/pandas/tests/io/formats/style/test_html.py index 8cb06e3b7619d..2306324efb974 100644 --- a/pandas/tests/io/formats/style/test_html.py +++ b/pandas/tests/io/formats/style/test_html.py @@ -34,6 +34,16 @@ def styler_mi(): return Styler(DataFrame(np.arange(16).reshape(4, 4), index=midx, columns=midx)) +@pytest.fixture +def styler_multi(): + df = DataFrame( + data=np.arange(16).reshape(4, 4), + columns=MultiIndex.from_product([["A", "B"], ["a", "b"]], names=["A&", "b&"]), + index=MultiIndex.from_product([["X", "Y"], ["x", "y"]], names=["X>", "y_"]), + ) + return Styler(df) + + @pytest.fixture def tpl_style(env): return env.get_template("html_style.tpl") @@ -1003,3 +1013,23 @@ def test_to_html_na_rep_non_scalar_data(datapath): """ assert result == expected + + +@pytest.mark.parametrize("escape_axis_0", [True, False]) +@pytest.mark.parametrize("escape_axis_1", [True, False]) +def test_format_index_names(styler_multi, escape_axis_0, escape_axis_1): + if escape_axis_0: + styler_multi.format_index_names(axis=0, escape="html") + expected_index = ["X>", "y_"] + else: + expected_index = ["X>", "y_"] + + if escape_axis_1: + styler_multi.format_index_names(axis=1, escape="html") + expected_columns = ["A&", "b&"] + else: + expected_columns = ["A&", "b&"] + + result = styler_multi.to_html(table_uuid="test") + for expected_str in expected_index + expected_columns: + assert f"{expected_str}" in result diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 6fa72bd48031c..89addbbbc1ded 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -77,6 +77,8 @@ def mi_styler_comp(mi_styler): columns=mi_styler.columns, ) ) + mi_styler.format_index_names(escape="html", axis=0) + mi_styler.format_index_names(escape="html", axis=1) return mi_styler