Skip to content

EHN: add ability to format index and col names to Styler #57880

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Mar 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -797,8 +797,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.io.formats.style.Styler.clear SA01" \
-i "pandas.io.formats.style.Styler.concat RT03,SA01" \
-i "pandas.io.formats.style.Styler.export RT03" \
-i "pandas.io.formats.style.Styler.format RT03" \
-i "pandas.io.formats.style.Styler.format_index RT03" \
-i "pandas.io.formats.style.Styler.from_custom_template SA01" \
-i "pandas.io.formats.style.Styler.hide RT03,SA01" \
-i "pandas.io.formats.style.Styler.highlight_between RT03" \
Expand All @@ -808,7 +806,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.io.formats.style.Styler.highlight_quantile RT03" \
-i "pandas.io.formats.style.Styler.map RT03" \
-i "pandas.io.formats.style.Styler.map_index RT03" \
-i "pandas.io.formats.style.Styler.relabel_index RT03" \
-i "pandas.io.formats.style.Styler.set_caption RT03,SA01" \
-i "pandas.io.formats.style.Styler.set_properties RT03,SA01" \
-i "pandas.io.formats.style.Styler.set_sticky RT03,SA01" \
Expand Down
1 change: 1 addition & 0 deletions doc/source/reference/style.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ Style application
Styler.map_index
Styler.format
Styler.format_index
Styler.format_index_names
Styler.relabel_index
Styler.hide
Styler.concat
Expand Down
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ Other enhancements
- Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`)
- Support passing a :class:`Series` input to :func:`json_normalize` that retains the :class:`Series` :class:`Index` (:issue:`51452`)
- Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
- :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
-

.. ---------------------------------------------------------------------------
.. _whatsnew_300.notable_bug_fixes:
Expand Down
2 changes: 2 additions & 0 deletions pandas/io/formats/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -1683,6 +1683,8 @@ def _copy(self, deepcopy: bool = False) -> Styler:
"_display_funcs",
"_display_funcs_index",
"_display_funcs_columns",
"_display_funcs_index_names",
"_display_funcs_column_names",
"hidden_rows",
"hidden_columns",
"ctx",
Expand Down
159 changes: 155 additions & 4 deletions pandas/io/formats/style_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,15 @@ def __init__(
self._display_funcs_index: DefaultDict[ # maps (row, level) -> format func
tuple[int, int], Callable[[Any], str]
] = defaultdict(lambda: partial(_default_formatter, precision=precision))
self._display_funcs_index_names: DefaultDict[ # maps index level -> format func
int, Callable[[Any], str]
] = defaultdict(lambda: partial(_default_formatter, precision=precision))
self._display_funcs_columns: DefaultDict[ # maps (level, col) -> format func
tuple[int, int], Callable[[Any], str]
] = defaultdict(lambda: partial(_default_formatter, precision=precision))
self._display_funcs_column_names: DefaultDict[ # maps col level -> format func
int, Callable[[Any], str]
] = defaultdict(lambda: partial(_default_formatter, precision=precision))

def _render(
self,
Expand Down Expand Up @@ -460,6 +466,12 @@ def _generate_col_header_row(
] * (self.index.nlevels - sum(self.hide_index_) - 1)

name = self.data.columns.names[r]

is_display = name is not None and not self.hide_column_names
value = name if is_display else self.css["blank_value"]
display_value = (
self._display_funcs_column_names[r](value) if is_display else None
)
column_name = [
_element(
"th",
Expand All @@ -468,10 +480,9 @@ def _generate_col_header_row(
if name is None
else f"{self.css['index_name']} {self.css['level']}{r}"
),
name
if (name is not None and not self.hide_column_names)
else self.css["blank_value"],
value,
not all(self.hide_index_),
display_value=display_value,
)
]

Expand Down Expand Up @@ -553,6 +564,9 @@ def _generate_index_names_row(
f"{self.css['index_name']} {self.css['level']}{c}",
self.css["blank_value"] if name is None else name,
not self.hide_index_[c],
display_value=(
None if name is None else self._display_funcs_index_names[c](name)
),
)
for c, name in enumerate(self.data.index.names)
]
Expand Down Expand Up @@ -1005,6 +1019,7 @@ def format(
Returns
-------
Styler
Returns itself for chaining.

See Also
--------
Expand Down Expand Up @@ -1261,6 +1276,7 @@ def format_index(
Returns
-------
Styler
Returns itself for chaining.

See Also
--------
Expand Down Expand Up @@ -1425,6 +1441,7 @@ def relabel_index(
Returns
-------
Styler
Returns itself for chaining.

See Also
--------
Expand Down Expand Up @@ -1560,6 +1577,140 @@ def alias_(x, value):

return self

def format_index_names(
self,
formatter: ExtFormatter | None = None,
axis: Axis = 0,
level: Level | list[Level] | None = None,
na_rep: str | None = None,
precision: int | None = None,
decimal: str = ".",
thousands: str | None = None,
escape: str | None = None,
hyperlinks: str | None = None,
) -> StylerRenderer:
r"""
Format the text display value of index names or column names.

.. versionadded:: 3.0

Parameters
----------
formatter : str, callable, dict or None
Object to define how values are displayed. See notes.
axis : {0, "index", 1, "columns"}
Whether to apply the formatter to the index or column headers.
level : int, str, list
The level(s) over which to apply the generic formatter.
na_rep : str, optional
Representation for missing values.
If ``na_rep`` is None, no special formatting is applied.
precision : int, optional
Floating point precision to use for display purposes, if not determined by
the specified ``formatter``.
decimal : str, default "."
Character used as decimal separator for floats, complex and integers.
thousands : str, optional, default None
Character used as thousands separator for floats, complex and integers.
escape : str, optional
Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"``
in cell display string with HTML-safe sequences.
Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``,
``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with
LaTeX-safe sequences.
Escaping is done before ``formatter``.
hyperlinks : {"html", "latex"}, optional
Convert string patterns containing https://, http://, ftp:// or www. to
HTML <a> tags as clickable URL hyperlinks if "html", or LaTeX \href
commands if "latex".

Returns
-------
Styler
Returns itself for chaining.

Raises
------
ValueError
If the `formatter` is a string and the dtypes are incompatible.

See Also
--------
Styler.format_index: Format the text display value of index labels
or column headers.

Notes
-----
This method has a similar signature to :meth:`Styler.format_index`. Since
`names` are generally label based, and often not numeric, the typical features
expected to be more frequently used here are ``escape`` and ``hyperlinks``.

.. warning::
`Styler.format_index_names` is ignored when using the output format
`Styler.to_excel`, since Excel and Python have inherrently different
formatting structures.

Examples
--------
>>> df = pd.DataFrame(
... [[1, 2], [3, 4]],
... index=pd.Index(["a", "b"], name="idx"),
... )
>>> df # doctest: +SKIP
0 1
idx
a 1 2
b 3 4
>>> df.style.format_index_names(lambda x: x.upper(), axis=0) # doctest: +SKIP
0 1
IDX
a 1 2
b 3 4
"""
axis = self.data._get_axis_number(axis)
if axis == 0:
display_funcs_, obj = self._display_funcs_index_names, self.index
else:
display_funcs_, obj = self._display_funcs_column_names, self.columns
levels_ = refactor_levels(level, obj)

if all(
(
formatter is None,
level is None,
precision is None,
decimal == ".",
thousands is None,
na_rep is None,
escape is None,
hyperlinks is None,
)
):
display_funcs_.clear()
return self # clear the formatter / revert to default and avoid looping

if not isinstance(formatter, dict):
formatter = {level: formatter for level in levels_}
else:
formatter = {
obj._get_level_number(level): formatter_
for level, formatter_ in formatter.items()
}

for lvl in levels_:
format_func = _maybe_wrap_formatter(
formatter.get(lvl),
na_rep=na_rep,
precision=precision,
decimal=decimal,
thousands=thousands,
escape=escape,
hyperlinks=hyperlinks,
)
display_funcs_[lvl] = format_func

return self


def _element(
html_element: str,
Expand All @@ -1571,7 +1722,7 @@ def _element(
"""
Template to return container with information for a <td></td> or <th></th> element.
"""
if "display_value" not in kwargs:
if "display_value" not in kwargs or kwargs["display_value"] is None:
kwargs["display_value"] = value
return {
"type": html_element,
Expand Down
107 changes: 103 additions & 4 deletions pandas/tests/io/formats/style/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,14 @@ def styler(df):

@pytest.fixture
def df_multi():
return DataFrame(
data=np.arange(16).reshape(4, 4),
columns=MultiIndex.from_product([["A", "B"], ["a", "b"]]),
index=MultiIndex.from_product([["X", "Y"], ["x", "y"]]),
return (
DataFrame(
data=np.arange(16).reshape(4, 4),
columns=MultiIndex.from_product([["A", "B"], ["a", "b"]]),
index=MultiIndex.from_product([["X", "Y"], ["x", "y"]]),
)
.rename_axis(["0_0", "0_1"], axis=0)
.rename_axis(["1_0", "1_1"], axis=1)
)


Expand Down Expand Up @@ -560,3 +564,98 @@ def test_relabel_roundtrip(styler):
ctx = styler._translate(True, True)
assert {"value": "x", "display_value": "x"}.items() <= ctx["body"][0][0].items()
assert {"value": "y", "display_value": "y"}.items() <= ctx["body"][1][0].items()


@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize(
"level, expected",
[
(0, ["X", "one"]), # level int
("zero", ["X", "one"]), # level name
(1, ["zero", "X"]), # other level int
("one", ["zero", "X"]), # other level name
([0, 1], ["X", "X"]), # both levels
([0, "zero"], ["X", "one"]), # level int and name simultaneous
([0, "one"], ["X", "X"]), # both levels as int and name
(["one", "zero"], ["X", "X"]), # both level names, reversed
],
)
def test_format_index_names_level(axis, level, expected):
midx = MultiIndex.from_arrays([["_", "_"], ["_", "_"]], names=["zero", "one"])
df = DataFrame([[1, 2], [3, 4]])
if axis == 0:
df.index = midx
else:
df.columns = midx

styler = df.style.format_index_names(lambda v: "X", level=level, axis=axis)
ctx = styler._translate(True, True)

if axis == 0: # compare index
result = [ctx["head"][1][s]["display_value"] for s in range(2)]
else: # compare columns
result = [ctx["head"][s][0]["display_value"] for s in range(2)]
assert expected == result


@pytest.mark.parametrize(
"attr, kwargs",
[
("_display_funcs_index_names", {"axis": 0}),
("_display_funcs_column_names", {"axis": 1}),
],
)
def test_format_index_names_clear(styler, attr, kwargs):
assert 0 not in getattr(styler, attr) # using default
styler.format_index_names("{:.2f}", **kwargs)
assert 0 in getattr(styler, attr) # formatter is specified
styler.format_index_names(**kwargs)
assert 0 not in getattr(styler, attr) # formatter cleared to default


@pytest.mark.parametrize("axis", [0, 1])
def test_format_index_names_callable(styler_multi, axis):
ctx = styler_multi.format_index_names(
lambda v: v.replace("_", "A"), axis=axis
)._translate(True, True)
result = [
ctx["head"][2][0]["display_value"],
ctx["head"][2][1]["display_value"],
ctx["head"][0][1]["display_value"],
ctx["head"][1][1]["display_value"],
]
if axis == 0:
expected = ["0A0", "0A1", "1_0", "1_1"]
else:
expected = ["0_0", "0_1", "1A0", "1A1"]
assert result == expected


def test_format_index_names_dict(styler_multi):
ctx = (
styler_multi.format_index_names({"0_0": "{:<<5}"})
.format_index_names({"1_1": "{:>>4}"}, axis=1)
._translate(True, True)
)
assert ctx["head"][2][0]["display_value"] == "0_0<<"
assert ctx["head"][1][1]["display_value"] == ">1_1"


def test_format_index_names_with_hidden_levels(styler_multi):
ctx = styler_multi._translate(True, True)
full_head_height = len(ctx["head"])
full_head_width = len(ctx["head"][0])
assert full_head_height == 3
assert full_head_width == 6

ctx = (
styler_multi.hide(axis=0, level=1)
.hide(axis=1, level=1)
.format_index_names("{:>>4}", axis=1)
.format_index_names("{:!<5}")
._translate(True, True)
)
assert len(ctx["head"]) == full_head_height - 1
assert len(ctx["head"][0]) == full_head_width - 1
assert ctx["head"][0][0]["display_value"] == ">1_0"
assert ctx["head"][1][0]["display_value"] == "0_0!!"
Loading