diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index c85a087835b80..9a9362d66be88 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -38,6 +38,8 @@ Styler ^^^^^^ - New method :meth:`.Styler.to_string` for alternative customisable output methods (:issue:`44502`) + - New keyword argument ``rename`` added to :meth:`.Styler.format_index` to allow simple label string replacement (:issue:`45288`) + - Various bug fixes, see below. - Added the ability to render ``border`` and ``border-{side}`` CSS properties in Excel (:issue:`42276`) - Added a new method :meth:`.Styler.concat` which allows adding customised footer rows to visualise additional calculations on the data, e.g. totals and counts etc. (:issue:`43875`, :issue:`46186`) - :meth:`.Styler.highlight_null` now accepts ``color`` consistently with other builtin methods and deprecates ``null_color`` although this remains backwards compatible (:issue:`45907`) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 4e3f86d21b228..da4ef096e7871 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -1168,6 +1168,7 @@ def format_index( thousands: str | None = None, escape: str | None = None, hyperlinks: str | None = None, + rename: list[str] | list[list[str]] | None = None, ) -> StylerRenderer: r""" Format the text display value of index labels or column headers. @@ -1179,9 +1180,12 @@ def format_index( formatter : str, callable, dict or None Object to define how values are displayed. See notes. axis : {0, "index", 1, "columns"} - Whether to apply the formatter to the index or column headers. + Whether to apply the ``formatter`` or ``rename`` to the index or column + headers. level : int, str, list - The level(s) over which to apply the generic formatter. + The level(s) over which to apply the generic ``formatter``, or ``rename``. + In the case of ``rename`` defaults to the last level of a MultiIndex, + for the reason that the last level is never sparsified. na_rep : str, optional Representation for missing values. If ``na_rep`` is None, no special formatting is applied. @@ -1203,6 +1207,17 @@ def format_index( Convert string patterns containing https://, http://, ftp:// or www. to HTML tags as clickable URL hyperlinks if "html", or LaTeX \href commands if "latex". + rename : list of str, list of list of str + Values to replace the existing index or column headers. If specifying + more than one ``level`` then this should be a list containing sub-lists for + each identified level, in the respective order. + Cannot be used simultaneously with ``formatter`` and the associated + arguments; ``thousands``, ``decimal``, ``escape``, ``hyperlinks``, + ``na_rep`` and ``precision``. + This list (or each sub-list) must be of length equal to the number of + visible columns, see examples. + + .. versionadded:: 1.5.0 Returns ------- @@ -1238,6 +1253,10 @@ def format_index( When using a ``formatter`` string the dtypes must be compatible, otherwise a `ValueError` will be raised. + Since it is not possible to apply a generic function which will return an + arbitrary set of column aliases, the argument ``rename`` provides the + ability to automate this, across individual index levels if necessary. + .. warning:: `Styler.format_index` is ignored when using the output format `Styler.to_excel`, since Excel and Python have inherrently different @@ -1298,18 +1317,41 @@ def format_index( {} & {\textbf{123}} & {\textbf{\textasciitilde }} & {\textbf{\$\%\#}} \\ 0 & 1 & 2 & 3 \\ \end{tabular} + + Using ``rename`` to overwrite column names. + + >>> df = pd.DataFrame([[1, 2, 3]], columns=[1, 2, 3]) + >>> df.style.format_index(axis=1, rename=["A", "B", "C"]) # doctest: +SKIP + A B C + 0 1 2 3 + + Using ``rename`` to overwrite column names of remaining **visible** items. + + >>> df = pd.DataFrame([[1, 2, 3]], + ... columns=pd.MultiIndex.from_product([[1, 2, 3], ["X"]])) + >>> styler = df.style # doctest: +SKIP + 1 2 3 + X X X + 0 1 2 3 + + >>> styler.hide([2], axis=1) # hides a column as a `subset` hide + ... .hide(level=1, axis=1) # hides the entire axis level + ... .format_index(axis=1, rename=["A", "C"], level=0) # doctest: +SKIP + A C + 0 1 3 """ axis = self.data._get_axis_number(axis) if axis == 0: display_funcs_, obj = self._display_funcs_index, self.index + hidden_labels = self.hidden_rows else: display_funcs_, obj = self._display_funcs_columns, self.columns + hidden_labels = self.hidden_columns levels_ = refactor_levels(level, obj) - if all( + formatting_args_unset = all( ( formatter is None, - level is None, precision is None, decimal == ".", thousands is None, @@ -1317,31 +1359,75 @@ def format_index( escape is None, hyperlinks is None, ) - ): + ) + + if formatting_args_unset and level is None and rename is None: + # clear the formatter / revert to default and avoid looping display_funcs_.clear() - return self # clear the formatter / revert to default and avoid looping - if not isinstance(formatter, dict): - formatter = {level: formatter for level in levels_} - else: - formatter = { - obj._get_level_number(level): formatter_ - for level, formatter_ in formatter.items() - } + elif rename is not None: # then apply a formatting function from arg: rename + if not formatting_args_unset: + raise ValueError( + "``rename`` cannot be supplied together with any of " + "``formatter``, ``precision``, ``decimal``, ``na_rep``, " + "``escape``, or ``hyperlinks``." + ) + else: + visible_len = len(obj) - len(set(hidden_labels)) + if level is None: + levels_ = [obj.nlevels - 1] # default to last level + elif len(levels_) > 1 and len(rename) != len(levels_): + raise ValueError( + f"``level`` specifies {len(levels_)} levels but the length of " + f"``rename``, {len(rename)}, does not match." + ) - for lvl in levels_: - format_func = _maybe_wrap_formatter( - formatter.get(lvl), - na_rep=na_rep, - precision=precision, - decimal=decimal, - thousands=thousands, - escape=escape, - hyperlinks=hyperlinks, - ) + def alias_(x, value): + return value + + for i, lvl in enumerate(levels_): + level_alias = rename[i] if len(levels_) > 1 else rename + if len(level_alias) != visible_len: + raise ValueError( + "``rename`` must be of length equal to the number of " + "visible labels along ``axis``. If ``level`` is given and " + "contains more than one level ``rename`` should be a " + "list of lists with each sub-list having length equal to" + "the number of visible labels along ``axis``." + ) + for ai, idx in enumerate( + [ + (i, lvl) if axis == 0 else (lvl, i) + for i in range(len(obj)) + if i not in hidden_labels + ] + ): + display_funcs_[idx] = partial(alias_, value=level_alias[ai]) + + else: # then apply a formatting function from arg: formatter + if not isinstance(formatter, dict): + formatter = {level: formatter for level in levels_} + else: + formatter = { + obj._get_level_number(level): formatter_ + for level, formatter_ in formatter.items() + } + + for lvl in levels_: + format_func = _maybe_wrap_formatter( + formatter.get(lvl), + na_rep=na_rep, + precision=precision, + decimal=decimal, + thousands=thousands, + escape=escape, + hyperlinks=hyperlinks, + ) - for idx in [(i, lvl) if axis == 0 else (lvl, i) for i in range(len(obj))]: - display_funcs_[idx] = format_func + for idx in [ + (i, lvl) if axis == 0 else (lvl, i) for i in range(len(obj)) + ]: + display_funcs_[idx] = format_func return self diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index a52c679e16ad5..40b47441c26ff 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -442,3 +442,82 @@ def test_boolean_format(): ctx = df.style._translate(True, True) assert ctx["body"][0][1]["display_value"] is True assert ctx["body"][0][2]["display_value"] is False + + +def test_basic_rename(styler): + styler.format_index(axis=1, rename=["alias1", "alias2"]) + ctx = styler._translate(True, True) + assert ctx["head"][0][1]["value"] == "A" + assert ctx["head"][0][1]["display_value"] == "alias1" # alias + assert ctx["head"][0][2]["value"] == "B" + assert ctx["head"][0][2]["display_value"] == "alias2" # alias + + +def test_basic_rename_hidden_column(styler): + styler.hide(subset="A", axis=1) + styler.format_index(axis=1, rename=["alias2"]) + ctx = styler._translate(True, True) + assert ctx["head"][0][1]["value"] == "A" + assert ctx["head"][0][1]["display_value"] == "A" # no alias for hidden + assert ctx["head"][0][2]["value"] == "B" + assert ctx["head"][0][2]["display_value"] == "alias2" # alias + + +@pytest.mark.parametrize("level", [None, 0, 1]) +def test_rename_single_levels(df, level): + df.columns = MultiIndex.from_tuples([("X", "A"), ("Y", "B")]) + styler = Styler(df, cell_ids=False, uuid_len=0) + styler.format_index(axis=1, level=level, rename=["alias1", "alias2"]) + ctx = styler._translate(True, True) + assert len(ctx["head"]) == 2 # MultiIndex levels + + level = 1 if level is None else level # defaults to last + assert f"level{level}" in ctx["head"][level][1]["class"] + assert ctx["head"][level][1]["display_value"] == "alias1" + assert ctx["head"][level][2]["display_value"] == "alias2" + + +@pytest.mark.parametrize("level", [[0, 1], [1, 0]]) +def test_rename_multi_levels_order(df, level): + df.columns = MultiIndex.from_tuples([("X", "A"), ("Y", "B")]) + styler = Styler(df, cell_ids=False, uuid_len=0) + styler.format_index(axis=1, level=level, rename=[["a1", "a2"], ["b1", "b2"]]) + ctx = styler._translate(True, True) + + assert ctx["head"][1 - level[1]][1]["display_value"] == "a1" + assert ctx["head"][1 - level[1]][2]["display_value"] == "a2" + assert ctx["head"][1 - level[0]][1]["display_value"] == "b1" + assert ctx["head"][1 - level[0]][2]["display_value"] == "b2" + + +@pytest.mark.parametrize( + "level, rename", + [ + ([0, 1], ["alias1", "alias2"]), # no sublists + ([0], ["alias1"]), # too short + (None, ["alias1", "alias2", "alias3"]), # too long + ([0, 1], [["alias1", "alias2"], ["alias1"]]), # sublist too short + ([0, 1], [["a1", "a2"], ["a1", "a2", "a3"]]), # sublist too long + ], +) +def test_rename_warning(df, level, rename): + df.columns = MultiIndex.from_tuples([("X", "A"), ("Y", "B")]) + styler = Styler(df, cell_ids=False, uuid_len=0) + msg = "``rename`` must be of length equal to" + with pytest.raises(ValueError, match=msg): + styler.format_index(axis=1, level=level, rename=rename) + + +@pytest.mark.parametrize( + "level, rename", + [ + ([0, 1], [["a1", "a2"]]), # too few sublists + ([0, 1], [["a1", "a2"], ["a1", "a2"], ["a1", "a2"]]), # too many sublists + ], +) +def test_rename_warning2(df, level, rename): + df.columns = MultiIndex.from_tuples([("X", "A"), ("Y", "B")]) + styler = Styler(df, cell_ids=False, uuid_len=0) + msg = "``level`` specifies 2 levels but the length of" + with pytest.raises(ValueError, match=msg): + styler.format_index(axis=1, level=level, rename=rename)