From 2a1287caab65523bfc6baf4716ac8fab56640611 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 13 Sep 2020 13:08:16 +0200 Subject: [PATCH 1/6] styler uuid control and security --- pandas/io/formats/style.py | 12 +++++++++--- pandas/tests/io/formats/test_style.py | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index b27a4e036e137..b44b44fe25b05 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -18,7 +18,7 @@ Tuple, Union, ) -from uuid import uuid1 +from uuid import uuid4 import numpy as np @@ -73,6 +73,9 @@ class Styler: List of {selector: (attr, value)} dicts; see Notes. uuid : str, default None A unique identifier to avoid CSS collisions; generated automatically. + uuid_len : int, default 5 + If ``uuid`` is not specified, the length of the ``uuid`` to randomly generate + in characters, max is 32. caption : str, default None Caption to attach to the table. table_attributes : str, default None @@ -128,6 +131,7 @@ class Styler: * Blank cells include ``blank`` * Data cells include ``data`` + """ loader = jinja2.PackageLoader("pandas", "io/formats/templates") @@ -140,6 +144,7 @@ def __init__( precision: Optional[int] = None, table_styles: Optional[List[Dict[str, List[Tuple[str, str]]]]] = None, uuid: Optional[str] = None, + uuid_len: int = 5, caption: Optional[str] = None, table_attributes: Optional[str] = None, cell_ids: bool = True, @@ -159,7 +164,8 @@ def __init__( self.index = data.index self.columns = data.columns - self.uuid = uuid + self.uuid_len = uuid_len if uuid_len < 33 else 32 + self.uuid = uuid or uuid4().hex[: self.uuid_len] self.table_styles = table_styles self.caption = caption if precision is None: @@ -246,7 +252,7 @@ def _translate(self): precision = self.precision hidden_index = self.hidden_index hidden_columns = self.hidden_columns - uuid = self.uuid or str(uuid1()).replace("-", "_") + uuid = self.uuid ROW_HEADING_CLASS = "row_heading" COL_HEADING_CLASS = "col_heading" INDEX_NAME_CLASS = "index_name" diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index de549ec3eb75e..2dea28a1bbe7a 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -1697,6 +1697,24 @@ def test_colspan_w3(self): s = Styler(df, uuid="_", cell_ids=False) assert 'l0' in s.render() + @pytest.mark.parametrize("len_", [1, 5, 32]) + def test_uuid_len(self, len_): + # GH .. + df = pd.DataFrame(data=[["A"]]) + s = Styler(df, uuid_len=len_, cell_ids=False).render() + strt = s.find('id="T_') + end = s[strt + 6 :].find('"') + assert end == len_ + + @pytest.mark.parametrize("len_", [33, 100]) + def test_uuid_max_len(self, len_): + # GH .. + df = pd.DataFrame(data=[["A"]]) + s = Styler(df, uuid_len=len_, cell_ids=False).render() + strt = s.find('id="T_') + end = s[strt + 6 :].find('"') + assert end == 32 + @td.skip_if_no_mpl class TestStylerMatplotlibDep: From a8c8e6f532d10d3f727eabeb253bb584ca912b08 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 13 Sep 2020 22:07:01 +0200 Subject: [PATCH 2/6] uuid in Styler --- pandas/io/formats/style.py | 2 +- pandas/tests/io/formats/test_style.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index b44b44fe25b05..afd21bd315609 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -165,7 +165,7 @@ def __init__( self.columns = data.columns self.uuid_len = uuid_len if uuid_len < 33 else 32 - self.uuid = uuid or uuid4().hex[: self.uuid_len] + self.uuid = (uuid or uuid4().hex[: self.uuid_len]) + "_" self.table_styles = table_styles self.caption = caption if precision is None: diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index 2dea28a1bbe7a..1c99e7ada37c3 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -1699,21 +1699,21 @@ def test_colspan_w3(self): @pytest.mark.parametrize("len_", [1, 5, 32]) def test_uuid_len(self, len_): - # GH .. + # GH 36345 df = pd.DataFrame(data=[["A"]]) s = Styler(df, uuid_len=len_, cell_ids=False).render() strt = s.find('id="T_') end = s[strt + 6 :].find('"') - assert end == len_ + assert end == len_ + 1 @pytest.mark.parametrize("len_", [33, 100]) def test_uuid_max_len(self, len_): - # GH .. + # GH 36345 df = pd.DataFrame(data=[["A"]]) s = Styler(df, uuid_len=len_, cell_ids=False).render() strt = s.find('id="T_') end = s[strt + 6 :].find('"') - assert end == 32 + assert end == 32 + 1 @td.skip_if_no_mpl From 0e83a43ecdfd98ebebd4fb788d2f5d4ce6b86f01 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 13 Sep 2020 22:17:27 +0200 Subject: [PATCH 3/6] whats new --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index e577a8f26bd12..829fd073489ed 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -207,7 +207,7 @@ Performance improvements - Performance improvements when creating Series with dtype `str` or :class:`StringDtype` from array with many string elements (:issue:`36304`) - Performance improvement in :meth:`GroupBy.agg` with the ``numba`` engine (:issue:`35759`) -- +- ``Styler`` uuid method altered to compress data transmission over web whilst maintaining reasonably low table collision probability (:issue:`36345`) .. --------------------------------------------------------------------------- From 44aa5d22c9cae1b74dfa8db4663c0d3565e7b5ac Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 13 Sep 2020 22:35:53 +0200 Subject: [PATCH 4/6] typing fix --- pandas/io/formats/style.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 6a1d9eafbc189..2e5e5b686ae2b 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -131,7 +131,6 @@ class Styler: * Blank cells include ``blank`` * Data cells include ``data`` - """ loader = jinja2.PackageLoader("pandas", "io/formats/templates") From 2911d06a68223c710ac94dc5f6f14d0113312856 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 14 Sep 2020 07:48:43 +0200 Subject: [PATCH 5/6] jreback requests --- pandas/io/formats/style.py | 9 +++++++-- pandas/tests/io/formats/test_style.py | 18 ++++++++++-------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 2e5e5b686ae2b..9e18b3097710f 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -75,7 +75,10 @@ class Styler: A unique identifier to avoid CSS collisions; generated automatically. uuid_len : int, default 5 If ``uuid`` is not specified, the length of the ``uuid`` to randomly generate - in characters, max is 32. + expressed in hex characters, in range [0, 32]. + + .. versionadded:: 1.2.0 + caption : str, default None Caption to attach to the table. table_attributes : str, default None @@ -163,7 +166,9 @@ def __init__( self.index = data.index self.columns = data.columns - self.uuid_len = uuid_len if uuid_len < 33 else 32 + if not isinstance(uuid_len, int) or not uuid_len >= 0: + raise TypeError("``uuid_len`` must be an integer in range [0, 32].") + self.uuid_len = min(32, uuid_len) self.uuid = (uuid or uuid4().hex[: self.uuid_len]) + "_" self.table_styles = table_styles self.caption = caption diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index 987719757b8ee..8d66a16fc2b7a 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -1718,23 +1718,25 @@ def test_colspan_w3(self): s = Styler(df, uuid="_", cell_ids=False) assert 'l0' in s.render() - @pytest.mark.parametrize("len_", [1, 5, 32]) + @pytest.mark.parametrize("len_", [1, 5, 32, 33, 100]) def test_uuid_len(self, len_): # GH 36345 df = pd.DataFrame(data=[["A"]]) s = Styler(df, uuid_len=len_, cell_ids=False).render() strt = s.find('id="T_') end = s[strt + 6 :].find('"') - assert end == len_ + 1 + if len_ > 32: + assert end == 32 + 1 + else: + assert end == len_ + 1 - @pytest.mark.parametrize("len_", [33, 100]) - def test_uuid_max_len(self, len_): + @pytest.mark.parametrize("len_", [-2, "bad", None]) + def test_uuid_len_raises(self, len_): # GH 36345 df = pd.DataFrame(data=[["A"]]) - s = Styler(df, uuid_len=len_, cell_ids=False).render() - strt = s.find('id="T_') - end = s[strt + 6 :].find('"') - assert end == 32 + 1 + msg = "``uuid_len`` must be an integer in range \\[0, 32\\]." + with pytest.raises(TypeError, match=msg): + Styler(df, uuid_len=len_, cell_ids=False).render() @td.skip_if_no_mpl From 910f059718f8c8aeb06cf52d6aff1ab7d3b44982 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Fri, 18 Sep 2020 07:14:34 +0200 Subject: [PATCH 6/6] arg at end of signature --- pandas/io/formats/style.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index f30c1dc878999..1df37da3da8d0 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -73,12 +73,6 @@ class Styler: List of {selector: (attr, value)} dicts; see Notes. uuid : str, default None A unique identifier to avoid CSS collisions; generated automatically. - uuid_len : int, default 5 - If ``uuid`` is not specified, the length of the ``uuid`` to randomly generate - expressed in hex characters, in range [0, 32]. - - .. versionadded:: 1.2.0 - caption : str, default None Caption to attach to the table. table_attributes : str, default None @@ -95,6 +89,12 @@ class Styler: .. versionadded:: 1.0.0 + uuid_len : int, default 5 + If ``uuid`` is not specified, the length of the ``uuid`` to randomly generate + expressed in hex characters, in range [0, 32]. + + .. versionadded:: 1.2.0 + Attributes ---------- env : Jinja2 jinja2.Environment @@ -146,11 +146,11 @@ def __init__( precision: Optional[int] = None, table_styles: Optional[List[Dict[str, List[Tuple[str, str]]]]] = None, uuid: Optional[str] = None, - uuid_len: int = 5, caption: Optional[str] = None, table_attributes: Optional[str] = None, cell_ids: bool = True, na_rep: Optional[str] = None, + uuid_len: int = 5, ): self.ctx: DefaultDict[Tuple[int, int], List[str]] = defaultdict(list) self._todo: List[Tuple[Callable, Tuple, Dict]] = []