Skip to content

PERF: Improve Styler to_excel Performance #47371

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Jun 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions asv_bench/benchmarks/io/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,25 @@ def time_write_excel(self, engine):
writer.save()


class WriteExcelStyled:
params = ["openpyxl", "xlsxwriter"]
param_names = ["engine"]

def setup(self, engine):
self.df = _generate_dataframe()

def time_write_excel_style(self, engine):
bio = BytesIO()
bio.seek(0)
writer = ExcelWriter(bio, engine=engine)
df_style = self.df.style
df_style.applymap(lambda x: "border: red 1px solid;")
df_style.applymap(lambda x: "color: blue")
df_style.applymap(lambda x: "border-color: green black", subset=["float1"])
df_style.to_excel(writer, sheet_name="Sheet1")
writer.save()


class ReadExcel:

params = ["xlrd", "openpyxl", "odf"]
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,7 @@ Performance improvements
- Performance improvement in :func:`factorize` (:issue:`46109`)
- Performance improvement in :class:`DataFrame` and :class:`Series` constructors for extension dtype scalars (:issue:`45854`)
- Performance improvement in :func:`read_excel` when ``nrows`` argument provided (:issue:`32727`)
- Performance improvement in :meth:`.Styler.to_excel` when applying repeated CSS formats (:issue:`47371`)
- Performance improvement in :meth:`MultiIndex.is_monotonic_increasing` (:issue:`47458`)

.. ---------------------------------------------------------------------------
Expand Down
55 changes: 31 additions & 24 deletions pandas/io/formats/css.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import (
Callable,
Generator,
Iterable,
Iterator,
)
import warnings
Expand Down Expand Up @@ -188,18 +189,35 @@ class CSSResolver:

SIDES = ("top", "right", "bottom", "left")

CSS_EXPANSIONS = {
**{
"-".join(["border", prop] if prop else ["border"]): _border_expander(prop)
for prop in ["", "top", "right", "bottom", "left"]
},
**{
"-".join(["border", prop]): _side_expander("border-{:s}-" + prop)
for prop in ["color", "style", "width"]
},
**{
"margin": _side_expander("margin-{:s}"),
"padding": _side_expander("padding-{:s}"),
},
}

def __call__(
self,
declarations_str: str,
declarations: str | Iterable[tuple[str, str]],
inherited: dict[str, str] | None = None,
) -> dict[str, str]:
"""
The given declarations to atomic properties.

Parameters
----------
declarations_str : str
A list of CSS declarations
declarations_str : str | Iterable[tuple[str, str]]
A CSS string or set of CSS declaration tuples
e.g. "font-weight: bold; background: blue" or
{("font-weight", "bold"), ("background", "blue")}
inherited : dict, optional
Atomic properties indicating the inherited style context in which
declarations_str is to be resolved. ``inherited`` should already
Expand Down Expand Up @@ -230,7 +248,9 @@ def __call__(
('font-size', '24pt'),
('font-weight', 'bold')]
"""
props = dict(self.atomize(self.parse(declarations_str)))
if isinstance(declarations, str):
declarations = self.parse(declarations)
props = dict(self.atomize(declarations))
if inherited is None:
inherited = {}

Expand Down Expand Up @@ -347,28 +367,15 @@ def _error():
size_fmt = f"{val:f}pt"
return size_fmt

def atomize(self, declarations) -> Generator[tuple[str, str], None, None]:
def atomize(self, declarations: Iterable) -> Generator[tuple[str, str], None, None]:
for prop, value in declarations:
attr = "expand_" + prop.replace("-", "_")
try:
expand = getattr(self, attr)
except AttributeError:
yield prop, value
prop = prop.lower()
value = value.lower()
if prop in self.CSS_EXPANSIONS:
expand = self.CSS_EXPANSIONS[prop]
yield from expand(self, prop, value)
else:
for prop, value in expand(prop, value):
yield prop, value

expand_border = _border_expander()
expand_border_top = _border_expander("top")
expand_border_right = _border_expander("right")
expand_border_bottom = _border_expander("bottom")
expand_border_left = _border_expander("left")

expand_border_color = _side_expander("border-{:s}-color")
expand_border_style = _side_expander("border-{:s}-style")
expand_border_width = _side_expander("border-{:s}-width")
expand_margin = _side_expander("margin-{:s}")
expand_padding = _side_expander("padding-{:s}")
yield prop, value

def parse(self, declarations_str: str) -> Iterator[tuple[str, str]]:
"""
Expand Down
31 changes: 20 additions & 11 deletions pandas/io/formats/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
"""
from __future__ import annotations

from functools import reduce
from functools import (
lru_cache,
reduce,
)
import itertools
import re
from typing import (
Expand Down Expand Up @@ -85,10 +88,13 @@ def __init__(
**kwargs,
) -> None:
if css_styles and css_converter:
css = ";".join(
[a + ":" + str(v) for (a, v) in css_styles[css_row, css_col]]
)
style = css_converter(css)
# Use dict to get only one (case-insensitive) declaration per property
declaration_dict = {
prop.lower(): val for prop, val in css_styles[css_row, css_col]
}
# Convert to frozenset for order-invariant caching
unique_declarations = frozenset(declaration_dict.items())
style = css_converter(unique_declarations)

return super().__init__(row=row, col=col, val=val, style=style, **kwargs)

Expand Down Expand Up @@ -166,24 +172,27 @@ def __init__(self, inherited: str | None = None) -> None:

compute_css = CSSResolver()

def __call__(self, declarations_str: str) -> dict[str, dict[str, str]]:
@lru_cache(maxsize=None)
def __call__(
self, declarations: str | frozenset[tuple[str, str]]
) -> dict[str, dict[str, str]]:
"""
Convert CSS declarations to ExcelWriter style.

Parameters
----------
declarations_str : str
List of CSS declarations.
e.g. "font-weight: bold; background: blue"
declarations : str | frozenset[tuple[str, str]]
CSS string or set of CSS declaration tuples.
e.g. "font-weight: bold; background: blue" or
{("font-weight", "bold"), ("background", "blue")}

Returns
-------
xlstyle : dict
A style as interpreted by ExcelWriter when found in
ExcelCell.style.
"""
# TODO: memoize?
properties = self.compute_css(declarations_str, self.inherited)
properties = self.compute_css(declarations, self.inherited)
return self.build_xlstyle(properties)

def build_xlstyle(self, props: Mapping[str, str]) -> dict[str, dict[str, str]]:
Expand Down
91 changes: 90 additions & 1 deletion pandas/tests/io/formats/test_to_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
import pandas._testing as tm

from pandas.io.formats.css import CSSWarning
from pandas.io.formats.excel import CSSToExcelConverter
from pandas.io.formats.excel import (
CssExcelCell,
CSSToExcelConverter,
)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -340,3 +343,89 @@ def test_css_named_colors_from_mpl_present():
pd_colors = CSSToExcelConverter.NAMED_COLORS
for name, color in mpl_colors.items():
assert name in pd_colors and pd_colors[name] == color[1:]


@pytest.mark.parametrize(
"styles,expected",
[
([("color", "green"), ("color", "red")], "color: red;"),
([("font-weight", "bold"), ("font-weight", "normal")], "font-weight: normal;"),
([("text-align", "center"), ("TEXT-ALIGN", "right")], "text-align: right;"),
],
)
def test_css_excel_cell_precedence(styles, expected):
"""It applies favors latter declarations over former declarations"""
# See GH 47371
converter = CSSToExcelConverter()
converter.__call__.cache_clear()
css_styles = {(0, 0): styles}
cell = CssExcelCell(
row=0,
col=0,
val="",
style=None,
css_styles=css_styles,
css_row=0,
css_col=0,
css_converter=converter,
)
converter.__call__.cache_clear()

assert cell.style == converter(expected)


@pytest.mark.parametrize(
"styles,cache_hits,cache_misses",
[
([[("color", "green"), ("color", "red"), ("color", "green")]], 0, 1),
(
[
[("font-weight", "bold")],
[("font-weight", "normal"), ("font-weight", "bold")],
],
1,
1,
),
([[("text-align", "center")], [("TEXT-ALIGN", "center")]], 1, 1),
(
[
[("font-weight", "bold"), ("text-align", "center")],
[("font-weight", "bold"), ("text-align", "left")],
],
0,
2,
),
(
[
[("font-weight", "bold"), ("text-align", "center")],
[("font-weight", "bold"), ("text-align", "left")],
[("font-weight", "bold"), ("text-align", "center")],
],
1,
2,
),
],
)
def test_css_excel_cell_cache(styles, cache_hits, cache_misses):
"""It caches unique cell styles"""
# See GH 47371
converter = CSSToExcelConverter()
converter.__call__.cache_clear()

css_styles = {(0, i): _style for i, _style in enumerate(styles)}
for css_row, css_col in css_styles:
CssExcelCell(
row=0,
col=0,
val="",
style=None,
css_styles=css_styles,
css_row=css_row,
css_col=css_col,
css_converter=converter,
)
cache_info = converter.__call__.cache_info()
converter.__call__.cache_clear()

assert cache_info.hits == cache_hits
assert cache_info.misses == cache_misses