Skip to content

Commit ad842d3

Browse files
tehunterThomas Hunter
and
Thomas Hunter
authored
PERF: Improve Styler to_excel Performance (#47371)
* Move CSS expansion lookup to dictionary * Implement simple CSSToExcelConverter cache * Eliminate list -> str -> list in CSSResolver * Allow for resolution of duplicate properties * Add performance benchmark for styled Excel * CLN: Clean up PEP8 issues * DOC: Update PR documentation * CLN: Clean up PEP8 issues * Fixes from pre-commit [automated commit] * Make Excel CSS case-insensitive * Test for ordering and caching * Pre-commit fixes * Remove built-in filter * Increase maxsize of Excel cache Co-authored-by: Thomas Hunter <[email protected]>
1 parent 2288135 commit ad842d3

File tree

5 files changed

+161
-36
lines changed

5 files changed

+161
-36
lines changed

asv_bench/benchmarks/io/excel.py

+19
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,25 @@ def time_write_excel(self, engine):
4747
writer.save()
4848

4949

50+
class WriteExcelStyled:
51+
params = ["openpyxl", "xlsxwriter"]
52+
param_names = ["engine"]
53+
54+
def setup(self, engine):
55+
self.df = _generate_dataframe()
56+
57+
def time_write_excel_style(self, engine):
58+
bio = BytesIO()
59+
bio.seek(0)
60+
writer = ExcelWriter(bio, engine=engine)
61+
df_style = self.df.style
62+
df_style.applymap(lambda x: "border: red 1px solid;")
63+
df_style.applymap(lambda x: "color: blue")
64+
df_style.applymap(lambda x: "border-color: green black", subset=["float1"])
65+
df_style.to_excel(writer, sheet_name="Sheet1")
66+
writer.save()
67+
68+
5069
class ReadExcel:
5170

5271
params = ["xlrd", "openpyxl", "odf"]

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -752,6 +752,7 @@ Performance improvements
752752
- Performance improvement in :func:`factorize` (:issue:`46109`)
753753
- Performance improvement in :class:`DataFrame` and :class:`Series` constructors for extension dtype scalars (:issue:`45854`)
754754
- Performance improvement in :func:`read_excel` when ``nrows`` argument provided (:issue:`32727`)
755+
- Performance improvement in :meth:`.Styler.to_excel` when applying repeated CSS formats (:issue:`47371`)
755756
- Performance improvement in :meth:`MultiIndex.is_monotonic_increasing` (:issue:`47458`)
756757

757758
.. ---------------------------------------------------------------------------

pandas/io/formats/css.py

+31-24
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from typing import (
88
Callable,
99
Generator,
10+
Iterable,
1011
Iterator,
1112
)
1213
import warnings
@@ -188,18 +189,35 @@ class CSSResolver:
188189

189190
SIDES = ("top", "right", "bottom", "left")
190191

192+
CSS_EXPANSIONS = {
193+
**{
194+
"-".join(["border", prop] if prop else ["border"]): _border_expander(prop)
195+
for prop in ["", "top", "right", "bottom", "left"]
196+
},
197+
**{
198+
"-".join(["border", prop]): _side_expander("border-{:s}-" + prop)
199+
for prop in ["color", "style", "width"]
200+
},
201+
**{
202+
"margin": _side_expander("margin-{:s}"),
203+
"padding": _side_expander("padding-{:s}"),
204+
},
205+
}
206+
191207
def __call__(
192208
self,
193-
declarations_str: str,
209+
declarations: str | Iterable[tuple[str, str]],
194210
inherited: dict[str, str] | None = None,
195211
) -> dict[str, str]:
196212
"""
197213
The given declarations to atomic properties.
198214
199215
Parameters
200216
----------
201-
declarations_str : str
202-
A list of CSS declarations
217+
declarations_str : str | Iterable[tuple[str, str]]
218+
A CSS string or set of CSS declaration tuples
219+
e.g. "font-weight: bold; background: blue" or
220+
{("font-weight", "bold"), ("background", "blue")}
203221
inherited : dict, optional
204222
Atomic properties indicating the inherited style context in which
205223
declarations_str is to be resolved. ``inherited`` should already
@@ -230,7 +248,9 @@ def __call__(
230248
('font-size', '24pt'),
231249
('font-weight', 'bold')]
232250
"""
233-
props = dict(self.atomize(self.parse(declarations_str)))
251+
if isinstance(declarations, str):
252+
declarations = self.parse(declarations)
253+
props = dict(self.atomize(declarations))
234254
if inherited is None:
235255
inherited = {}
236256

@@ -347,28 +367,15 @@ def _error():
347367
size_fmt = f"{val:f}pt"
348368
return size_fmt
349369

350-
def atomize(self, declarations) -> Generator[tuple[str, str], None, None]:
370+
def atomize(self, declarations: Iterable) -> Generator[tuple[str, str], None, None]:
351371
for prop, value in declarations:
352-
attr = "expand_" + prop.replace("-", "_")
353-
try:
354-
expand = getattr(self, attr)
355-
except AttributeError:
356-
yield prop, value
372+
prop = prop.lower()
373+
value = value.lower()
374+
if prop in self.CSS_EXPANSIONS:
375+
expand = self.CSS_EXPANSIONS[prop]
376+
yield from expand(self, prop, value)
357377
else:
358-
for prop, value in expand(prop, value):
359-
yield prop, value
360-
361-
expand_border = _border_expander()
362-
expand_border_top = _border_expander("top")
363-
expand_border_right = _border_expander("right")
364-
expand_border_bottom = _border_expander("bottom")
365-
expand_border_left = _border_expander("left")
366-
367-
expand_border_color = _side_expander("border-{:s}-color")
368-
expand_border_style = _side_expander("border-{:s}-style")
369-
expand_border_width = _side_expander("border-{:s}-width")
370-
expand_margin = _side_expander("margin-{:s}")
371-
expand_padding = _side_expander("padding-{:s}")
378+
yield prop, value
372379

373380
def parse(self, declarations_str: str) -> Iterator[tuple[str, str]]:
374381
"""

pandas/io/formats/excel.py

+20-11
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
"""
44
from __future__ import annotations
55

6-
from functools import reduce
6+
from functools import (
7+
lru_cache,
8+
reduce,
9+
)
710
import itertools
811
import re
912
from typing import (
@@ -85,10 +88,13 @@ def __init__(
8588
**kwargs,
8689
) -> None:
8790
if css_styles and css_converter:
88-
css = ";".join(
89-
[a + ":" + str(v) for (a, v) in css_styles[css_row, css_col]]
90-
)
91-
style = css_converter(css)
91+
# Use dict to get only one (case-insensitive) declaration per property
92+
declaration_dict = {
93+
prop.lower(): val for prop, val in css_styles[css_row, css_col]
94+
}
95+
# Convert to frozenset for order-invariant caching
96+
unique_declarations = frozenset(declaration_dict.items())
97+
style = css_converter(unique_declarations)
9298

9399
return super().__init__(row=row, col=col, val=val, style=style, **kwargs)
94100

@@ -166,24 +172,27 @@ def __init__(self, inherited: str | None = None) -> None:
166172

167173
compute_css = CSSResolver()
168174

169-
def __call__(self, declarations_str: str) -> dict[str, dict[str, str]]:
175+
@lru_cache(maxsize=None)
176+
def __call__(
177+
self, declarations: str | frozenset[tuple[str, str]]
178+
) -> dict[str, dict[str, str]]:
170179
"""
171180
Convert CSS declarations to ExcelWriter style.
172181
173182
Parameters
174183
----------
175-
declarations_str : str
176-
List of CSS declarations.
177-
e.g. "font-weight: bold; background: blue"
184+
declarations : str | frozenset[tuple[str, str]]
185+
CSS string or set of CSS declaration tuples.
186+
e.g. "font-weight: bold; background: blue" or
187+
{("font-weight", "bold"), ("background", "blue")}
178188
179189
Returns
180190
-------
181191
xlstyle : dict
182192
A style as interpreted by ExcelWriter when found in
183193
ExcelCell.style.
184194
"""
185-
# TODO: memoize?
186-
properties = self.compute_css(declarations_str, self.inherited)
195+
properties = self.compute_css(declarations, self.inherited)
187196
return self.build_xlstyle(properties)
188197

189198
def build_xlstyle(self, props: Mapping[str, str]) -> dict[str, dict[str, str]]:

pandas/tests/io/formats/test_to_excel.py

+90-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@
1111
import pandas._testing as tm
1212

1313
from pandas.io.formats.css import CSSWarning
14-
from pandas.io.formats.excel import CSSToExcelConverter
14+
from pandas.io.formats.excel import (
15+
CssExcelCell,
16+
CSSToExcelConverter,
17+
)
1518

1619

1720
@pytest.mark.parametrize(
@@ -340,3 +343,89 @@ def test_css_named_colors_from_mpl_present():
340343
pd_colors = CSSToExcelConverter.NAMED_COLORS
341344
for name, color in mpl_colors.items():
342345
assert name in pd_colors and pd_colors[name] == color[1:]
346+
347+
348+
@pytest.mark.parametrize(
349+
"styles,expected",
350+
[
351+
([("color", "green"), ("color", "red")], "color: red;"),
352+
([("font-weight", "bold"), ("font-weight", "normal")], "font-weight: normal;"),
353+
([("text-align", "center"), ("TEXT-ALIGN", "right")], "text-align: right;"),
354+
],
355+
)
356+
def test_css_excel_cell_precedence(styles, expected):
357+
"""It applies favors latter declarations over former declarations"""
358+
# See GH 47371
359+
converter = CSSToExcelConverter()
360+
converter.__call__.cache_clear()
361+
css_styles = {(0, 0): styles}
362+
cell = CssExcelCell(
363+
row=0,
364+
col=0,
365+
val="",
366+
style=None,
367+
css_styles=css_styles,
368+
css_row=0,
369+
css_col=0,
370+
css_converter=converter,
371+
)
372+
converter.__call__.cache_clear()
373+
374+
assert cell.style == converter(expected)
375+
376+
377+
@pytest.mark.parametrize(
378+
"styles,cache_hits,cache_misses",
379+
[
380+
([[("color", "green"), ("color", "red"), ("color", "green")]], 0, 1),
381+
(
382+
[
383+
[("font-weight", "bold")],
384+
[("font-weight", "normal"), ("font-weight", "bold")],
385+
],
386+
1,
387+
1,
388+
),
389+
([[("text-align", "center")], [("TEXT-ALIGN", "center")]], 1, 1),
390+
(
391+
[
392+
[("font-weight", "bold"), ("text-align", "center")],
393+
[("font-weight", "bold"), ("text-align", "left")],
394+
],
395+
0,
396+
2,
397+
),
398+
(
399+
[
400+
[("font-weight", "bold"), ("text-align", "center")],
401+
[("font-weight", "bold"), ("text-align", "left")],
402+
[("font-weight", "bold"), ("text-align", "center")],
403+
],
404+
1,
405+
2,
406+
),
407+
],
408+
)
409+
def test_css_excel_cell_cache(styles, cache_hits, cache_misses):
410+
"""It caches unique cell styles"""
411+
# See GH 47371
412+
converter = CSSToExcelConverter()
413+
converter.__call__.cache_clear()
414+
415+
css_styles = {(0, i): _style for i, _style in enumerate(styles)}
416+
for css_row, css_col in css_styles:
417+
CssExcelCell(
418+
row=0,
419+
col=0,
420+
val="",
421+
style=None,
422+
css_styles=css_styles,
423+
css_row=css_row,
424+
css_col=css_col,
425+
css_converter=converter,
426+
)
427+
cache_info = converter.__call__.cache_info()
428+
converter.__call__.cache_clear()
429+
430+
assert cache_info.hits == cache_hits
431+
assert cache_info.misses == cache_misses

0 commit comments

Comments
 (0)