Skip to content

ENH: Use a proper CSS parser for pandas Styler objects #48869

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ci/deps/actions-38-minimum_versions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ dependencies:
- scipy=1.7.1
- sqlalchemy=1.4.16
- tabulate=0.8.9
- tinycss2=1.0.2
- tzdata=2022a
- xarray=0.19.0
- xlrd=2.0.1
Expand Down
1 change: 1 addition & 0 deletions doc/source/getting_started/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ Dependency Minimum Version Notes
matplotlib 3.3.2 Plotting library
Jinja2 3.0.0 Conditional formatting with DataFrame.style
tabulate 0.8.9 Printing in Markdown-friendly format (see `tabulate`_)
tinycss2 1.0.0 Style formatting with DataFrame.style
========================= ================== =============================================================

Computation
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.6.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Other enhancements
- Added ``index`` parameter to :meth:`DataFrame.to_dict` (:issue:`46398`)
- Added metadata propagation for binary operators on :class:`DataFrame` (:issue:`28283`)
- :class:`.CategoricalConversionWarning`, :class:`.InvalidComparison`, :class:`.InvalidVersion`, :class:`.LossySetitemError`, and :class:`.NoBufferPresent` are now exposed in ``pandas.errors`` (:issue:`27656`)
- Replace custom css parser with tinycss2 library (:issue:`48868`)
-

.. ---------------------------------------------------------------------------
Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ dependencies:
- scipy
- sqlalchemy
- tabulate
- tinycss2
- tzdata>=2022a
- xarray
- xlrd
Expand Down
1 change: 1 addition & 0 deletions pandas/compat/_optional.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
"sqlalchemy": "1.4.16",
"tables": "3.6.1",
"tabulate": "0.8.9",
"tinycss2": "1.0.2",
"xarray": "0.19.0",
"xlrd": "2.0.1",
"xlwt": "1.3.0",
Expand Down
52 changes: 39 additions & 13 deletions pandas/io/formats/css.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,12 @@
)
import warnings

from pandas.compat._optional import import_optional_dependency
from pandas.errors import CSSWarning
from pandas.util._exceptions import find_stack_level

tinycss2 = import_optional_dependency("tinycss2")


def _side_expander(prop_fmt: str) -> Callable:
"""
Expand Down Expand Up @@ -377,8 +380,20 @@ def _error():

def atomize(self, declarations: Iterable) -> Generator[tuple[str, str], None, None]:
for prop, value in declarations:
prop = prop.lower()
value = value.lower()
# Need to reparse the value here in case the prop was passed
# through as a dict from the styler rather than through this
# classes parse method
tokens = []
for token in tinycss2.parse_component_value_list(value):
if token.type == "ident":
# The old css parser normalized all identifier values, do
# so here to keep backwards compatibility
token.value = token.lower_value
Comment on lines +388 to +391
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See related discussion here #48660 (comment) regarding case normalization. Seems like its worthy of further investigation of whether this is truly necessary for backwards compatibility

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not 100%, I looked into this back in the 0.24 code. But I believe the reason was to allow users to set a font like Arial and have it work everywhere. The HTML output expects the upper case version, but the Excel library needs an all lower case version.


tokens.append(token)

value = tinycss2.serialize(tokens).strip()

if prop in self.CSS_EXPANSIONS:
expand = self.CSS_EXPANSIONS[prop]
yield from expand(self, prop, value)
Expand All @@ -395,18 +410,29 @@ def parse(self, declarations_str: str) -> Iterator[tuple[str, str]]:
----------
declarations_str : str
"""
for decl in declarations_str.split(";"):
if not decl.strip():
continue
prop, sep, val = decl.partition(":")
prop = prop.strip().lower()
# TODO: don't lowercase case sensitive parts of values (strings)
val = val.strip().lower()
if sep:
yield prop, val
else:
declarations = tinycss2.parse_declaration_list(
declarations_str, skip_comments=True, skip_whitespace=True
)

for decl in declarations:
if decl.type == "error":
warnings.warn(
f"Ill-formatted attribute: expected a colon in {repr(decl)}",
decl.message,
CSSWarning,
stacklevel=find_stack_level(inspect.currentframe()),
)
else:
tokens = []
for token in decl.value:
if token.type == "ident":
# The old css parser normalized all identifier values,
# do so here to keep backwards compatibility
token.value = token.lower_value

tokens.append(token)

value = tinycss2.serialize(tokens).strip()
if decl.important:
value = f"{value} !important"

yield decl.lower_name, value
20 changes: 20 additions & 0 deletions pandas/io/formats/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
IndexLabel,
StorageOptions,
)
from pandas.compat._optional import import_optional_dependency
from pandas.util._decorators import doc
from pandas.util._exceptions import find_stack_level

Expand All @@ -54,6 +55,8 @@
from pandas.io.formats.format import get_level_lengths
from pandas.io.formats.printing import pprint_thing

tinycss2 = import_optional_dependency("tinycss2")


class ExcelCell:
__fields__ = ("row", "col", "val", "style", "mergestart", "mergeend")
Expand Down Expand Up @@ -328,7 +331,24 @@ def build_fill(self, props: Mapping[str, str]):

def build_number_format(self, props: Mapping[str, str]) -> dict[str, str | None]:
fc = props.get("number-format")

# Old work around for getting a literal ';'
fc = fc.replace("§", ";") if isinstance(fc, str) else fc

# If a quoted string is passed as the value to number format, get the
# real value of the string. This will allow passing all the characters
# that would otherwise break css if unquoted.
if isinstance(fc, str):
tokens = tinycss2.parse_component_value_list(fc)

try:
(token,) = tokens
except ValueError:
pass
else:
if token.type == "string":
fc = token.value

return {"format_code": fc}

def build_font(
Expand Down
35 changes: 20 additions & 15 deletions pandas/io/formats/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -879,10 +879,10 @@ def to_latex(
.. code-block:: python

set_table_styles([
{"selector": "column_format", "props": f":{column_format};"},
{"selector": "position", "props": f":{position};"},
{"selector": "position_float", "props": f":{position_float};"},
{"selector": "label", "props": f":{{{label.replace(':','§')}}};"}
{"selector": "column_format", "props": f"value:{column_format};"},
{"selector": "position", "props": f"value:{position};"},
{"selector": "position_float", "props": f"value:{position_float};"},
{"selector": "label", "props": f"value:{{{label.replace(':','§')}}};"}
], overwrite=False)

Exception is made for the ``hrules`` argument which, in fact, controls all three
Expand All @@ -895,8 +895,8 @@ def to_latex(
.. code-block:: python

set_table_styles([
{'selector': 'toprule', 'props': ':toprule;'},
{'selector': 'bottomrule', 'props': ':hline;'},
{'selector': 'toprule', 'props': 'value:toprule;'},
{'selector': 'bottomrule', 'props': 'value:hline;'},
], overwrite=False)

If other ``commands`` are added to table styles they will be detected, and
Expand All @@ -907,7 +907,7 @@ def to_latex(
.. code-block:: python

set_table_styles([
{'selector': 'rowcolors', 'props': ':{1}{pink}{red};'}
{'selector': 'rowcolors', 'props': 'value:{1}{pink}{red};'}
], overwrite=False)

A more comprehensive example using these arguments is as follows:
Expand Down Expand Up @@ -1128,7 +1128,7 @@ def to_latex(
if column_format is not None:
# add more recent setting to table_styles
obj.set_table_styles(
[{"selector": "column_format", "props": f":{column_format}"}],
[{"selector": "column_format", "props": f"value:{column_format}"}],
overwrite=False,
)
elif "column_format" in table_selectors:
Expand All @@ -1148,13 +1148,13 @@ def to_latex(
("r" if not siunitx else "S") if ci in numeric_cols else "l"
)
obj.set_table_styles(
[{"selector": "column_format", "props": f":{column_format}"}],
[{"selector": "column_format", "props": f"value:{column_format}"}],
overwrite=False,
)

if position:
obj.set_table_styles(
[{"selector": "position", "props": f":{position}"}],
[{"selector": "position", "props": f"value:{position}"}],
overwrite=False,
)

Expand All @@ -1170,24 +1170,29 @@ def to_latex(
f"got: '{position_float}'"
)
obj.set_table_styles(
[{"selector": "position_float", "props": f":{position_float}"}],
[{"selector": "position_float", "props": f"value:{position_float}"}],
overwrite=False,
)

hrules = get_option("styler.latex.hrules") if hrules is None else hrules
if hrules:
obj.set_table_styles(
[
{"selector": "toprule", "props": ":toprule"},
{"selector": "midrule", "props": ":midrule"},
{"selector": "bottomrule", "props": ":bottomrule"},
{"selector": "toprule", "props": "value:toprule"},
{"selector": "midrule", "props": "value:midrule"},
{"selector": "bottomrule", "props": "value:bottomrule"},
],
overwrite=False,
)

if label:
obj.set_table_styles(
[{"selector": "label", "props": f":{{{label.replace(':', '§')}}}"}],
[
{
"selector": "label",
"props": f"value:{{{label.replace(':', '§')}}}",
}
],
overwrite=False,
)

Expand Down
39 changes: 25 additions & 14 deletions pandas/io/formats/style_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@
jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.")
from markupsafe import escape as escape_html # markupsafe is jinja2 dependency

tinycss2 = import_optional_dependency("tinycss2")

BaseFormatter = Union[str, Callable]
ExtFormatter = Union[BaseFormatter, Dict[Any, Optional[BaseFormatter]]]
CSSPair = Tuple[str, Union[str, float]]
Expand Down Expand Up @@ -1106,10 +1108,11 @@ def format(
method to create `to_excel` permissible formatting. Note that semi-colons are
CSS protected characters but used as separators in Excel's format string.
Replace semi-colons with the section separator character (ASCII-245) when
defining the formatting here.
defining the formatting here, or wrap the entire string in quotes to
have the value passed directly through to the writer.

>>> df = pd.DataFrame({"A": [1, 0, -1]})
>>> pseudo_css = "number-format: [Red](0)§-§@;"
>>> pseudo_css = "number-format: '0;[Red](0);-;@';"
>>> df.style.applymap(lambda v: css).to_excel("formatted_file.xlsx")
... # doctest: +SKIP

Expand Down Expand Up @@ -1853,18 +1856,26 @@ def maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList:
('border','1px solid red')]
"""
if isinstance(style, str):
s = style.split(";")
try:
return [
(x.split(":")[0].strip(), x.split(":")[1].strip())
for x in s
if x.strip() != ""
]
except IndexError:
raise ValueError(
"Styles supplied as string must follow CSS rule formats, "
f"for example 'attr: val;'. '{style}' was given."
)
declarations = tinycss2.parse_declaration_list(
style, skip_comments=True, skip_whitespace=True
)

parsed_styles = []
for decl in declarations:
if decl.type == "error":
raise ValueError(
"Styles supplied as string must follow CSS rule formats, "
f"for example 'attr: val;'. '{style}' was given. "
f"{decl.message}"
)

value = tinycss2.serialize(decl.value).strip()
if decl.important:
value = f"{value} !important"

parsed_styles.append((decl.name, value))

return parsed_styles
return style


Expand Down
1 change: 1 addition & 0 deletions pandas/tests/frame/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,7 @@ def test_constructor_expanddim(self):
df._constructor_expanddim(np.arange(27).reshape(3, 3, 3))

@skip_if_no("jinja2")
@skip_if_no("tinycss2")
def test_inspect_getmembers(self):
# GH38740
df = DataFrame()
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/io/excel/test_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
)

openpyxl = pytest.importorskip("openpyxl")
pytest.importorskip("tinycss2")

pytestmark = pytest.mark.parametrize("ext", [".xlsx"])

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/io/excel/test_style.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
)
import pandas._testing as tm

pytest.importorskip("tinycss2")
from pandas.io.excel import ExcelWriter
from pandas.io.formats.excel import ExcelFormatter

Expand Down
Loading