Skip to content

Commit 7454dc5

Browse files
committed
Replace custom css parsing code with tinycss2
1 parent b9b8cad commit 7454dc5

File tree

8 files changed

+93
-28
lines changed

8 files changed

+93
-28
lines changed

ci/deps/actions-38-minimum_versions.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ dependencies:
4949
- scipy=1.7.1
5050
- sqlalchemy=1.4.16
5151
- tabulate=0.8.9
52+
- tinycss2=1.0.0
5253
- tzdata=2022a
5354
- xarray=0.19.0
5455
- xlrd=2.0.1

doc/source/getting_started/install.rst

+1
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,7 @@ Dependency Minimum Version Notes
296296
matplotlib 3.3.2 Plotting library
297297
Jinja2 3.0.0 Conditional formatting with DataFrame.style
298298
tabulate 0.8.9 Printing in Markdown-friendly format (see `tabulate`_)
299+
tinycss2 1.0.0 Style formatting with DataFrame.style
299300
========================= ================== =============================================================
300301

301302
Computation

doc/source/whatsnew/v1.6.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ enhancement2
2929
Other enhancements
3030
^^^^^^^^^^^^^^^^^^
3131
- :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support an ``axis`` argument. If ``axis`` is set, the default behaviour of which axis to consider can be overwritten (:issue:`47819`)
32+
- Replace custom css parser with tinycss2 library (:issue:`48868`)
3233
-
3334

3435
.. ---------------------------------------------------------------------------

pandas/compat/_optional.py

+1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
"sqlalchemy": "1.4.16",
4343
"tables": "3.6.1",
4444
"tabulate": "0.8.9",
45+
"tinycss2": "1.0.0",
4546
"xarray": "0.19.0",
4647
"xlrd": "2.0.1",
4748
"xlwt": "1.3.0",

pandas/io/formats/css.py

+39-13
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,12 @@
1313
)
1414
import warnings
1515

16+
from pandas.compat._optional import import_optional_dependency
1617
from pandas.errors import CSSWarning
1718
from pandas.util._exceptions import find_stack_level
1819

20+
tinycss2 = import_optional_dependency("tinycss2")
21+
1922

2023
def _side_expander(prop_fmt: str) -> Callable:
2124
"""
@@ -377,8 +380,20 @@ def _error():
377380

378381
def atomize(self, declarations: Iterable) -> Generator[tuple[str, str], None, None]:
379382
for prop, value in declarations:
380-
prop = prop.lower()
381-
value = value.lower()
383+
# Need to reparse the value here in case the prop was passed
384+
# through as a dict from the styler rather than through this
385+
# classes parse method
386+
tokens = []
387+
for token in tinycss2.parse_component_value_list(value):
388+
if token.type == "ident":
389+
# The old css parser normalized all identifier values, do
390+
# so here to keep backwards compatibility
391+
token.value = token.lower_value
392+
393+
tokens.append(token)
394+
395+
value = tinycss2.serialize(tokens).strip()
396+
382397
if prop in self.CSS_EXPANSIONS:
383398
expand = self.CSS_EXPANSIONS[prop]
384399
yield from expand(self, prop, value)
@@ -395,18 +410,29 @@ def parse(self, declarations_str: str) -> Iterator[tuple[str, str]]:
395410
----------
396411
declarations_str : str
397412
"""
398-
for decl in declarations_str.split(";"):
399-
if not decl.strip():
400-
continue
401-
prop, sep, val = decl.partition(":")
402-
prop = prop.strip().lower()
403-
# TODO: don't lowercase case sensitive parts of values (strings)
404-
val = val.strip().lower()
405-
if sep:
406-
yield prop, val
407-
else:
413+
declarations = tinycss2.parse_declaration_list(
414+
declarations_str, skip_comments=True, skip_whitespace=True
415+
)
416+
417+
for decl in declarations:
418+
if decl.type == "error":
408419
warnings.warn(
409-
f"Ill-formatted attribute: expected a colon in {repr(decl)}",
420+
decl.message,
410421
CSSWarning,
411422
stacklevel=find_stack_level(inspect.currentframe()),
412423
)
424+
else:
425+
tokens = []
426+
for token in decl.value:
427+
if token.type == "ident":
428+
# The old css parser normalized all identifier values,
429+
# do so here to keep backwards compatibility
430+
token.value = token.lower_value
431+
432+
tokens.append(token)
433+
434+
value = tinycss2.serialize(tokens).strip()
435+
if decl.important:
436+
value = f"{value} !important"
437+
438+
yield decl.lower_name, value

pandas/io/formats/excel.py

+20
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
IndexLabel,
2929
StorageOptions,
3030
)
31+
from pandas.compat._optional import import_optional_dependency
3132
from pandas.util._decorators import doc
3233
from pandas.util._exceptions import find_stack_level
3334

@@ -54,6 +55,8 @@
5455
from pandas.io.formats.format import get_level_lengths
5556
from pandas.io.formats.printing import pprint_thing
5657

58+
tinycss2 = import_optional_dependency("tinycss2")
59+
5760

5861
class ExcelCell:
5962
__fields__ = ("row", "col", "val", "style", "mergestart", "mergeend")
@@ -328,7 +331,24 @@ def build_fill(self, props: Mapping[str, str]):
328331

329332
def build_number_format(self, props: Mapping[str, str]) -> dict[str, str | None]:
330333
fc = props.get("number-format")
334+
335+
# Old work around for getting a literal ';'
331336
fc = fc.replace("§", ";") if isinstance(fc, str) else fc
337+
338+
# If a quoted string is passed as the value to number format, get the
339+
# real value of the string. This will allow passing all the characters
340+
# that would otherwise break css if unquoted.
341+
if isinstance(fc, str):
342+
tokens = tinycss2.parse_component_value_list(fc)
343+
344+
try:
345+
(token,) = tokens
346+
except ValueError:
347+
pass
348+
else:
349+
if token.type == "string":
350+
fc = token.value
351+
332352
return {"format_code": fc}
333353

334354
def build_font(

pandas/io/formats/style_render.py

+25-14
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@
4949
jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.")
5050
from markupsafe import escape as escape_html # markupsafe is jinja2 dependency
5151

52+
tinycss2 = import_optional_dependency("tinycss2")
53+
5254
BaseFormatter = Union[str, Callable]
5355
ExtFormatter = Union[BaseFormatter, Dict[Any, Optional[BaseFormatter]]]
5456
CSSPair = Tuple[str, Union[str, float]]
@@ -1106,10 +1108,11 @@ def format(
11061108
method to create `to_excel` permissible formatting. Note that semi-colons are
11071109
CSS protected characters but used as separators in Excel's format string.
11081110
Replace semi-colons with the section separator character (ASCII-245) when
1109-
defining the formatting here.
1111+
defining the formatting here, or wrap the entire string in quotes to
1112+
have the value passed directly through to the writter.
11101113
11111114
>>> df = pd.DataFrame({"A": [1, 0, -1]})
1112-
>>> pseudo_css = "number-format: [Red](0)§-§@;"
1115+
>>> pseudo_css = "number-format: '0;[Red](0);-;@';"
11131116
>>> df.style.applymap(lambda v: css).to_excel("formatted_file.xlsx")
11141117
... # doctest: +SKIP
11151118
@@ -1853,18 +1856,26 @@ def maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList:
18531856
('border','1px solid red')]
18541857
"""
18551858
if isinstance(style, str):
1856-
s = style.split(";")
1857-
try:
1858-
return [
1859-
(x.split(":")[0].strip(), x.split(":")[1].strip())
1860-
for x in s
1861-
if x.strip() != ""
1862-
]
1863-
except IndexError:
1864-
raise ValueError(
1865-
"Styles supplied as string must follow CSS rule formats, "
1866-
f"for example 'attr: val;'. '{style}' was given."
1867-
)
1859+
declarations = tinycss2.parse_declaration_list(
1860+
style, skip_comments=True, skip_whitespace=True
1861+
)
1862+
1863+
parsed_styles = []
1864+
for decl in declarations:
1865+
if decl.type == "error":
1866+
raise ValueError(
1867+
"Styles supplied as string must follow CSS rule formats, "
1868+
f"for example 'attr: val;'. '{style}' was given. "
1869+
f"{decl.message}"
1870+
)
1871+
1872+
value = tinycss2.serialize(decl.value).strip()
1873+
if decl.important:
1874+
value = f"{value} !important"
1875+
1876+
parsed_styles.append((decl.name, value))
1877+
1878+
return parsed_styles
18681879
return style
18691880

18701881

pandas/tests/io/formats/test_to_excel.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,11 @@
215215
("number-format: 0%", {"number_format": {"format_code": "0%"}}),
216216
(
217217
"number-format: 0§[Red](0)§-§@;",
218-
{"number_format": {"format_code": "0;[red](0);-;@"}}, # GH 46152
218+
{"number_format": {"format_code": "0;[Red](0);-;@"}}, # GH 46152
219+
),
220+
(
221+
"number-format: '#,##0_);[Red](#,##0)';",
222+
{"number_format": {"format_code": "#,##0_);[Red](#,##0)"}},
219223
),
220224
],
221225
)

0 commit comments

Comments
 (0)