pandas-dev · marknsikora · Sep 29, 2022 · Sep 29, 2022 · Sep 29, 2022 · Sep 29, 2022
@@ -49,6 +49,7 @@ dependencies:
   - scipy=1.7.1
   - sqlalchemy=1.4.16
   - tabulate=0.8.9
+  - tinycss2=1.0.2
   - tzdata=2022a
   - xarray=0.19.0
   - xlrd=2.0.1

diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
@@ -296,6 +296,7 @@ Dependency                Minimum Version    Notes
 matplotlib                3.3.2              Plotting library
 Jinja2                    3.0.0              Conditional formatting with DataFrame.style
 tabulate                  0.8.9              Printing in Markdown-friendly format (see `tabulate`_)
+tinycss2                  1.0.0              Style formatting with DataFrame.style
 ========================= ================== =============================================================
 
 Computation

diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst
@@ -35,6 +35,7 @@ Other enhancements
 - Added ``index`` parameter to :meth:`DataFrame.to_dict` (:issue:`46398`)
 - Added metadata propagation for binary operators on :class:`DataFrame` (:issue:`28283`)
 - :class:`.CategoricalConversionWarning`, :class:`.InvalidComparison`, :class:`.InvalidVersion`, :class:`.LossySetitemError`, and :class:`.NoBufferPresent` are now exposed in ``pandas.errors`` (:issue:`27656`)
+- Replace custom css parser with tinycss2 library (:issue:`48868`)
 -
 
 .. ---------------------------------------------------------------------------

diff --git a/environment.yml b/environment.yml
@@ -49,6 +49,7 @@ dependencies:
   - scipy
   - sqlalchemy
   - tabulate
+  - tinycss2
   - tzdata>=2022a
   - xarray
   - xlrd

diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py
@@ -42,6 +42,7 @@
     "sqlalchemy": "1.4.16",
     "tables": "3.6.1",
     "tabulate": "0.8.9",
+    "tinycss2": "1.0.2",
     "xarray": "0.19.0",
     "xlrd": "2.0.1",
     "xlwt": "1.3.0",

diff --git a/pandas/io/formats/css.py b/pandas/io/formats/css.py
@@ -13,9 +13,12 @@
 )
 import warnings
 
+from pandas.compat._optional import import_optional_dependency
 from pandas.errors import CSSWarning
 from pandas.util._exceptions import find_stack_level
 
+tinycss2 = import_optional_dependency("tinycss2")
+
 
 def _side_expander(prop_fmt: str) -> Callable:
     """
@@ -377,8 +380,20 @@ def _error():
 
     def atomize(self, declarations: Iterable) -> Generator[tuple[str, str], None, None]:
         for prop, value in declarations:
-            prop = prop.lower()
-            value = value.lower()
+            # Need to reparse the value here in case the prop was passed
+            # through as a dict from the styler rather than through this
+            # classes parse method
+            tokens = []
+            for token in tinycss2.parse_component_value_list(value):
+                if token.type == "ident":
+                    # The old css parser normalized all identifier values, do
+                    # so here to keep backwards compatibility
+                    token.value = token.lower_value
+
+                tokens.append(token)
+
+            value = tinycss2.serialize(tokens).strip()
+
             if prop in self.CSS_EXPANSIONS:
                 expand = self.CSS_EXPANSIONS[prop]
                 yield from expand(self, prop, value)
@@ -395,18 +410,29 @@ def parse(self, declarations_str: str) -> Iterator[tuple[str, str]]:
         ----------
         declarations_str : str
         """
-        for decl in declarations_str.split(";"):
-            if not decl.strip():
-                continue
-            prop, sep, val = decl.partition(":")
-            prop = prop.strip().lower()
-            # TODO: don't lowercase case sensitive parts of values (strings)
-            val = val.strip().lower()
-            if sep:
-                yield prop, val
-            else:
+        declarations = tinycss2.parse_declaration_list(
+            declarations_str, skip_comments=True, skip_whitespace=True
+        )
+
+        for decl in declarations:
+            if decl.type == "error":
                 warnings.warn(
-                    f"Ill-formatted attribute: expected a colon in {repr(decl)}",
+                    decl.message,
                     CSSWarning,
                     stacklevel=find_stack_level(inspect.currentframe()),
                 )
+            else:
+                tokens = []
+                for token in decl.value:
+                    if token.type == "ident":
+                        # The old css parser normalized all identifier values,
+                        # do so here to keep backwards compatibility
+                        token.value = token.lower_value
+
+                    tokens.append(token)
+
+                value = tinycss2.serialize(tokens).strip()
+                if decl.important:
+                    value = f"{value} !important"
+
+                yield decl.lower_name, value
diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py
@@ -28,6 +28,7 @@
     IndexLabel,
     StorageOptions,
 )
+from pandas.compat._optional import import_optional_dependency
 from pandas.util._decorators import doc
 from pandas.util._exceptions import find_stack_level
 
@@ -54,6 +55,8 @@
 from pandas.io.formats.format import get_level_lengths
 from pandas.io.formats.printing import pprint_thing
 
+tinycss2 = import_optional_dependency("tinycss2")
+
 
 class ExcelCell:
     __fields__ = ("row", "col", "val", "style", "mergestart", "mergeend")
@@ -328,7 +331,24 @@ def build_fill(self, props: Mapping[str, str]):
 
     def build_number_format(self, props: Mapping[str, str]) -> dict[str, str | None]:
         fc = props.get("number-format")
+
+        # Old work around for getting a literal ';'
         fc = fc.replace("§", ";") if isinstance(fc, str) else fc
+
+        # If a quoted string is passed as the value to number format, get the
+        # real value of the string. This will allow passing all the characters
+        # that would otherwise break css if unquoted.
+        if isinstance(fc, str):
+            tokens = tinycss2.parse_component_value_list(fc)
+
+            try:
+                (token,) = tokens
+            except ValueError:
+                pass
+            else:
+                if token.type == "string":
+                    fc = token.value
+
         return {"format_code": fc}
 
     def build_font(

@@ -879,10 +879,10 @@ def to_latex(
         .. code-block:: python
 
             set_table_styles([
-                {"selector": "column_format", "props": f":{column_format};"},
-                {"selector": "position", "props": f":{position};"},
-                {"selector": "position_float", "props": f":{position_float};"},
-                {"selector": "label", "props": f":{{{label.replace(':','§')}}};"}
+                {"selector": "column_format", "props": f"value:{column_format};"},
+                {"selector": "position", "props": f"value:{position};"},
+                {"selector": "position_float", "props": f"value:{position_float};"},
+                {"selector": "label", "props": f"value:{{{label.replace(':','§')}}};"}
             ], overwrite=False)
 
         Exception is made for the ``hrules`` argument which, in fact, controls all three
@@ -895,8 +895,8 @@ def to_latex(
         .. code-block:: python
 
             set_table_styles([
-                {'selector': 'toprule', 'props': ':toprule;'},
-                {'selector': 'bottomrule', 'props': ':hline;'},
+                {'selector': 'toprule', 'props': 'value:toprule;'},
+                {'selector': 'bottomrule', 'props': 'value:hline;'},
             ], overwrite=False)
 
         If other ``commands`` are added to table styles they will be detected, and
@@ -907,7 +907,7 @@ def to_latex(
         .. code-block:: python
 
             set_table_styles([
-                {'selector': 'rowcolors', 'props': ':{1}{pink}{red};'}
+                {'selector': 'rowcolors', 'props': 'value:{1}{pink}{red};'}
             ], overwrite=False)
 
         A more comprehensive example using these arguments is as follows:
@@ -1128,7 +1128,7 @@ def to_latex(
         if column_format is not None:
             # add more recent setting to table_styles
             obj.set_table_styles(
-                [{"selector": "column_format", "props": f":{column_format}"}],
+                [{"selector": "column_format", "props": f"value:{column_format}"}],
                 overwrite=False,
             )
         elif "column_format" in table_selectors:
@@ -1148,13 +1148,13 @@ def to_latex(
                         ("r" if not siunitx else "S") if ci in numeric_cols else "l"
                     )
             obj.set_table_styles(
-                [{"selector": "column_format", "props": f":{column_format}"}],
+                [{"selector": "column_format", "props": f"value:{column_format}"}],
                 overwrite=False,
             )
 
         if position:
             obj.set_table_styles(
-                [{"selector": "position", "props": f":{position}"}],
+                [{"selector": "position", "props": f"value:{position}"}],
                 overwrite=False,
             )
 
@@ -1170,24 +1170,29 @@ def to_latex(
                     f"got: '{position_float}'"
                 )
             obj.set_table_styles(
-                [{"selector": "position_float", "props": f":{position_float}"}],
+                [{"selector": "position_float", "props": f"value:{position_float}"}],
                 overwrite=False,
             )
 
         hrules = get_option("styler.latex.hrules") if hrules is None else hrules
         if hrules:
             obj.set_table_styles(
                 [
-                    {"selector": "toprule", "props": ":toprule"},
-                    {"selector": "midrule", "props": ":midrule"},
-                    {"selector": "bottomrule", "props": ":bottomrule"},
+                    {"selector": "toprule", "props": "value:toprule"},
+                    {"selector": "midrule", "props": "value:midrule"},
+                    {"selector": "bottomrule", "props": "value:bottomrule"},
                 ],
                 overwrite=False,
             )
 
         if label:
             obj.set_table_styles(
-                [{"selector": "label", "props": f":{{{label.replace(':', '§')}}}"}],
+                [
+                    {
+                        "selector": "label",
+                        "props": f"value:{{{label.replace(':', '§')}}}",
+                    }
+                ],
                 overwrite=False,
             )
 

@@ -49,6 +49,8 @@
 jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.")
 from markupsafe import escape as escape_html  # markupsafe is jinja2 dependency
 
+tinycss2 = import_optional_dependency("tinycss2")
+
 BaseFormatter = Union[str, Callable]
 ExtFormatter = Union[BaseFormatter, Dict[Any, Optional[BaseFormatter]]]
 CSSPair = Tuple[str, Union[str, float]]
@@ -1106,10 +1108,11 @@ def format(
         method to create `to_excel` permissible formatting. Note that semi-colons are
         CSS protected characters but used as separators in Excel's format string.
         Replace semi-colons with the section separator character (ASCII-245) when
-        defining the formatting here.
+        defining the formatting here, or wrap the entire string in quotes to
+        have the value passed directly through to the writer.
 
         >>> df = pd.DataFrame({"A": [1, 0, -1]})
-        >>> pseudo_css = "number-format: 0§[Red](0)§-§@;"
+        >>> pseudo_css = "number-format: '0;[Red](0);-;@';"
         >>> df.style.applymap(lambda v: css).to_excel("formatted_file.xlsx")
         ...  # doctest: +SKIP
 
@@ -1853,18 +1856,26 @@ def maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList:
                                              ('border','1px solid red')]
     """
     if isinstance(style, str):
-        s = style.split(";")
-        try:
-            return [
-                (x.split(":")[0].strip(), x.split(":")[1].strip())
-                for x in s
-                if x.strip() != ""
-            ]
-        except IndexError:
-            raise ValueError(
-                "Styles supplied as string must follow CSS rule formats, "
-                f"for example 'attr: val;'. '{style}' was given."
-            )
+        declarations = tinycss2.parse_declaration_list(
+            style, skip_comments=True, skip_whitespace=True
+        )
+
+        parsed_styles = []
+        for decl in declarations:
+            if decl.type == "error":
+                raise ValueError(
+                    "Styles supplied as string must follow CSS rule formats, "
+                    f"for example 'attr: val;'. '{style}' was given. "
+                    f"{decl.message}"
+                )
+
+            value = tinycss2.serialize(decl.value).strip()
+            if decl.important:
+                value = f"{value} !important"
+
+            parsed_styles.append((decl.name, value))
+
+        return parsed_styles
     return style
 
 

diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
@@ -377,6 +377,7 @@ def test_constructor_expanddim(self):
             df._constructor_expanddim(np.arange(27).reshape(3, 3, 3))
 
     @skip_if_no("jinja2")
+    @skip_if_no("tinycss2")
     def test_inspect_getmembers(self):
         # GH38740
         df = DataFrame()

diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py
@@ -15,6 +15,7 @@
 )
 
 openpyxl = pytest.importorskip("openpyxl")
+pytest.importorskip("tinycss2")
 
 pytestmark = pytest.mark.parametrize("ext", [".xlsx"])
 

diff --git a/pandas/tests/io/excel/test_style.py b/pandas/tests/io/excel/test_style.py
@@ -12,6 +12,7 @@
 )
 import pandas._testing as tm
 
+pytest.importorskip("tinycss2")
 from pandas.io.excel import ExcelWriter
 from pandas.io.formats.excel import ExcelFormatter
-Original file line number
+Diff line change
@@ Expand Up / @@ -15,6 +15,7 @@ @@
     )
     openpyxl = pytest.importorskip("openpyxl")
+    pytest.importorskip("tinycss2")
     pytestmark = pytest.mark.parametrize("ext", [".xlsx"])
@@ Expand Down @@