From a82f4f57eb4b874130f44781d9218c0d4699f8bf Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 14 Mar 2021 23:19:44 +0100 Subject: [PATCH 01/14] update escaping for html --- pandas/io/formats/style.py | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index cc5f3164385cb..959ed940fcf77 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -48,6 +48,7 @@ from pandas.core.indexes.api import Index jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.") +from jinja2.filters import escape as escape_func BaseFormatter = Union[str, Callable] ExtFormatter = Union[BaseFormatter, Dict[Any, Optional[BaseFormatter]]] @@ -113,6 +114,10 @@ class Styler: .. versionadded:: 1.2.0 + escape : bool, default False + Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display + strings with HTML-safe sequences. + Attributes ---------- env : Jinja2 jinja2.Environment @@ -169,6 +174,7 @@ def __init__( cell_ids: bool = True, na_rep: Optional[str] = None, uuid_len: int = 5, + escape: bool = False, ): # validate ordered args if isinstance(data, pd.Series): @@ -201,7 +207,7 @@ def __init__( ] = defaultdict(lambda: partial(_default_formatter, precision=None)) self.precision = precision # can be removed on set_precision depr cycle self.na_rep = na_rep # can be removed on set_na_rep depr cycle - self.format(formatter=None, precision=precision, na_rep=na_rep) + self.format(formatter=None, precision=precision, na_rep=na_rep, escape=escape) def _repr_html_(self) -> str: """ @@ -547,6 +553,7 @@ def format( subset: Optional[Union[slice, Sequence[Any]]] = None, na_rep: Optional[str] = None, precision: Optional[int] = None, + escape: bool = False, ) -> Styler: """ Format the text display value of cells. @@ -570,6 +577,12 @@ def format( .. versionadded:: 1.3.0 + escape : bool, default False + Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display + string with HTML-safe sequences. Escaping is done before ``formatter``. + + .. versionadded:: 1.3.0 + Returns ------- self : Styler @@ -640,7 +653,15 @@ def format( 0 MISS 1.0000 STRING 1 2.0 MISS FLOAT """ - if all((formatter is None, subset is None, precision is None, na_rep is None)): + if all( + ( + formatter is None, + subset is None, + precision is None, + na_rep is None, + escape is False, + ) + ): self._display_funcs.clear() return self # clear the formatter / revert to default and avoid looping @@ -658,7 +679,7 @@ def format( except KeyError: format_func = None format_func = _maybe_wrap_formatter( - format_func, na_rep=na_rep, precision=precision + format_func, na_rep=na_rep, precision=precision, escape=escape ) for row, value in data[[col]].itertuples(): @@ -2154,6 +2175,7 @@ def _maybe_wrap_formatter( formatter: Optional[BaseFormatter] = None, na_rep: Optional[str] = None, precision: Optional[int] = None, + escape: bool = False, ) -> Callable: """ Allows formatters to be expressed as str, callable or None, where None returns @@ -2170,9 +2192,11 @@ def _maybe_wrap_formatter( raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}") if na_rep is None: - return formatter_func + na_func = formatter_func else: - return lambda x: na_rep if pd.isna(x) else formatter_func(x) + na_func = lambda x: na_rep if pd.isna(x) else formatter_func(x) + + return lambda x: na_func(escape_func(x)) if escape else na_func def _maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList: From 51bda2cddef54411a738f6098144970800734335 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 14 Mar 2021 23:04:42 +0100 Subject: [PATCH 02/14] update escaping for html --- pandas/io/formats/style.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 959ed940fcf77..1705feff41d94 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -652,6 +652,17 @@ def format( 0 1 2 0 MISS 1.0000 STRING 1 2.0 MISS FLOAT + + Using a formatter with HTML ``escape``. + + >>> df = pd.DataFrame([['
', '"A&B"']]) + >>> s = df.style.format('{0}', escape=True) + >>> s.render() + ... + <div></div> + "A&B" + ... + """ if all( ( From a323f93d17f0229350ac9f2f519ff18d5c9b1e93 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 14 Mar 2021 23:34:27 +0100 Subject: [PATCH 03/14] tests for format_escape --- pandas/io/formats/style.py | 2 +- pandas/tests/io/formats/style/test_style.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 1705feff41d94..b39ffb1cb81c3 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -2207,7 +2207,7 @@ def _maybe_wrap_formatter( else: na_func = lambda x: na_rep if pd.isna(x) else formatter_func(x) - return lambda x: na_func(escape_func(x)) if escape else na_func + return (lambda x: na_func(escape_func(x))) if escape else na_func def _maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList: diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 977b92e217868..d0718e9e7f334 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -647,6 +647,17 @@ def test_format_clear(self): self.styler.format() assert (0, 0) not in self.styler._display_funcs # formatter cleared to default + def test_format_escape(self): + df = DataFrame([['<>&"']]) + s = Styler(df, uuid_len=0).format("X&{0}>X", escape=False) + ex = 'X&<>&">X' + assert ex in s.render() + + # only the value should be escaped before passing to the formatter + s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True) + ex = 'X&<>&">X' + assert ex in s.render() + def test_nonunique_raises(self): df = DataFrame([[1, 2]], columns=["A", "A"]) msg = "style is not supported for non-unique indices." From e01eeb51a33ada34235185ccb482413d3c081b80 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 15 Mar 2021 07:34:22 +0100 Subject: [PATCH 04/14] check and docs fix --- pandas/io/formats/style.py | 23 +++++++++++---------- pandas/tests/io/formats/style/test_style.py | 13 ++++++++++-- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index c5eb8306c0b70..607d2088a31ce 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -48,7 +48,7 @@ from pandas.core.indexes.api import Index jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.") -from jinja2.filters import escape as escape_func +from markupsafe import escape as escape_func # markupsafe is jinja2 dependency BaseFormatter = Union[str, Callable] ExtFormatter = Union[BaseFormatter, Dict[Any, Optional[BaseFormatter]]] @@ -624,7 +624,7 @@ def format( 0 MISS 1.000 A 1 2.000 MISS 3.000 - Using a format specification on consistent column dtypes + Using a ``formatter`` specification on consistent column dtypes >>> df.style.format('{:.2f}', na_rep='MISS', subset=[0,1]) 0 1 2 @@ -647,7 +647,7 @@ def format( 0 MISS 1.00 A 1 2.0 PASS 3.00 - Using a callable formatting function + Using a callable ``formatter`` function. >>> func = lambda s: 'STRING' if isinstance(s, str) else 'FLOAT' >>> df.style.format({0: '{:.1f}', 2: func}, precision=4, na_rep='MISS') @@ -655,16 +655,16 @@ def format( 0 MISS 1.0000 STRING 1 2.0 MISS FLOAT - Using a formatter with HTML ``escape``. + Using a ``formatter`` with HTML ``escape`` and ``na_rep``. - >>> df = pd.DataFrame([['
', '"A&B"']]) - >>> s = df.style.format('{0}', escape=True) + >>> df = pd.DataFrame([['
', '"A&B"', None]]) + >>> s = df.style.format('{0}', escape=True, na_rep="NA") >>> s.render() ... <div></div> "A&B" + NA ... - """ if all( ( @@ -2204,11 +2204,12 @@ def _maybe_wrap_formatter( raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}") if na_rep is None: - na_func = formatter_func + return (lambda x: formatter_func(escape_func(x))) if escape else formatter_func else: - na_func = lambda x: na_rep if pd.isna(x) else formatter_func(x) - - return (lambda x: na_func(escape_func(x))) if escape else na_func + if escape: + return lambda x: na_rep if pd.isna(x) else formatter_func(escape_func(x)) + else: + return lambda x: na_rep if pd.isna(x) else formatter_func(x) def _maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList: diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index d0718e9e7f334..272afb34ca3cf 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -650,14 +650,23 @@ def test_format_clear(self): def test_format_escape(self): df = DataFrame([['<>&"']]) s = Styler(df, uuid_len=0).format("X&{0}>X", escape=False) - ex = 'X&<>&">X' - assert ex in s.render() + expected = 'X&<>&">X' + assert expected in s.render() # only the value should be escaped before passing to the formatter s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True) ex = 'X&<>&">X' assert ex in s.render() + def test_format_escape_na_rep(self): + # tests the na_rep is not escaped + df = DataFrame([['<>&"', None]]) + s = Styler(df, uuid_len=0).format("X&{0}>X", escape=True, na_rep="&") + ex = 'X&<>&">X' + expected2 = '&' + assert ex in s.render() + assert expected2 in s.render() + def test_nonunique_raises(self): df = DataFrame([[1, 2]], columns=["A", "A"]) msg = "style is not supported for non-unique indices." From 2348165f891a821fc6bd9c705614d7055d11ebc6 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 15 Mar 2021 08:36:24 +0100 Subject: [PATCH 05/14] versionadded and whatsnew --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/io/formats/style.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index a9a5041e4a410..87f80ee29a95f 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -135,6 +135,7 @@ Other enhancements - :meth:`.Styler.set_table_styles` amended to optionally allow certain css-string input arguments (:issue:`39564`) - :meth:`.Styler.apply` now more consistently accepts ndarray function returns, i.e. in all cases for ``axis`` is ``0, 1 or None`` (:issue:`39359`) - :meth:`.Styler.apply` and :meth:`.Styler.applymap` now raise errors if wrong format CSS is passed on render (:issue:`39660`) +- :meth:`.Styler.format` adds keyword argument ``escape`` for optional HTML escaping (:issue:`40437`) - Builtin highlighting methods in :class:`Styler` have a more consistent signature and css customisability (:issue:`40242`) - :meth:`Series.loc.__getitem__` and :meth:`Series.loc.__setitem__` with :class:`MultiIndex` now raising helpful error message when indexer has too many dimensions (:issue:`35349`) - :meth:`pandas.read_stata` and :class:`StataReader` support reading data from compressed files. diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 607d2088a31ce..3b4bf07741145 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -118,6 +118,8 @@ class Styler: Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display strings with HTML-safe sequences. + ... versionadded:: 1.3.0 + Attributes ---------- env : Jinja2 jinja2.Environment From 27f39ebdb72ed020094d2e4113319faba64fb122 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 15 Mar 2021 14:40:16 +0100 Subject: [PATCH 06/14] only use escape on str --- pandas/io/formats/style.py | 8 ++++++-- pandas/tests/io/formats/style/test_style.py | 10 ++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 3b4bf07741145..78257fd72c3f9 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -2205,11 +2205,15 @@ def _maybe_wrap_formatter( else: raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}") + def _str_escape(x): + """only use escape_func on str, else return input""" + return escape_func(x) if isinstance(x, str) else x + if na_rep is None: - return (lambda x: formatter_func(escape_func(x))) if escape else formatter_func + return (lambda x: formatter_func(_str_escape(x))) if escape else formatter_func else: if escape: - return lambda x: na_rep if pd.isna(x) else formatter_func(escape_func(x)) + return lambda x: na_rep if pd.isna(x) else formatter_func(_str_escape(x)) else: return lambda x: na_rep if pd.isna(x) else formatter_func(x) diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 272afb34ca3cf..cb1db27099cd1 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -667,6 +667,16 @@ def test_format_escape_na_rep(self): assert ex in s.render() assert expected2 in s.render() + def test_format_escape_floats(self): + # test given formatter for number format is not impacted by escape + s = self.df.style.format("{:.1f}", escape=True) + for expected in [">0.0<", ">1.0<", ">-1.2<", ">-0.6<"]: + assert expected in s.render() + # tests precision of floats is not impacted by escape + s = self.df.style.format(precision=1, escape=True) + for expected in [">0<", ">1<", ">-1.2<", ">-0.6<"]: + assert expected in s.render() + def test_nonunique_raises(self): df = DataFrame([[1, 2]], columns=["A", "A"]) msg = "style is not supported for non-unique indices." From c1b29c3c5edc43b17a60931995677de5324ef983 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 15 Mar 2021 17:41:23 +0100 Subject: [PATCH 07/14] refactor --- pandas/io/formats/style.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 78257fd72c3f9..60fb854928f72 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -2205,17 +2205,19 @@ def _maybe_wrap_formatter( else: raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}") - def _str_escape(x): - """only use escape_func on str, else return input""" - return escape_func(x) if isinstance(x, str) else x + def _str_escape(x, escape: bool): + """if escaping: only use on str, else return input""" + if escape and isinstance(x, str): + return escape_func(x) + else: + return x + + display_func = lambda x: formatter_func(partial(_str_escape, escape=escape)(x)) if na_rep is None: - return (lambda x: formatter_func(_str_escape(x))) if escape else formatter_func + return display_func else: - if escape: - return lambda x: na_rep if pd.isna(x) else formatter_func(_str_escape(x)) - else: - return lambda x: na_rep if pd.isna(x) else formatter_func(x) + return lambda x: na_rep if pd.isna(x) else display_func(x) def _maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList: From 0008af2da39cbfe31d278b7f1be37cd520fc17ae Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (MBP)" Date: Sun, 21 Mar 2021 23:03:37 +0100 Subject: [PATCH 08/14] add decimal and thousands options to format function --- pandas/io/formats/style.py | 85 +++++++++++++++++++-- pandas/tests/io/formats/style/test_style.py | 29 +++++++ 2 files changed, 107 insertions(+), 7 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 60fb854928f72..54d96289cc571 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -176,6 +176,8 @@ def __init__( cell_ids: bool = True, na_rep: Optional[str] = None, uuid_len: int = 5, + decimal: str = ".", + thousands: Optional[str] = None, escape: bool = False, ): # validate ordered args @@ -210,7 +212,14 @@ def __init__( ] = defaultdict(lambda: partial(_default_formatter, precision=def_precision)) self.precision = precision # can be removed on set_precision depr cycle self.na_rep = na_rep # can be removed on set_na_rep depr cycle - self.format(formatter=None, precision=precision, na_rep=na_rep, escape=escape) + self.format( + formatter=None, + precision=precision, + na_rep=na_rep, + escape=escape, + decimal=decimal, + thousands=thousands, + ) def _repr_html_(self) -> str: """ @@ -557,6 +566,8 @@ def format( subset: Optional[Union[slice, Sequence[Any]]] = None, na_rep: Optional[str] = None, precision: Optional[int] = None, + decimal: str = ".", + thousands: Optional[str] = None, escape: bool = False, ) -> Styler: """ @@ -581,6 +592,14 @@ def format( .. versionadded:: 1.3.0 + decimal : str, default "." + Character used as decimal separator for floats and complex. + + .. versionadded:: 1.3.0 + + thousands : str, optional, default None + Character used as thousands separtor for floats, complex and integers + escape : bool, default False Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display string with HTML-safe sequences. Escaping is done before ``formatter``. @@ -674,6 +693,8 @@ def format( subset is None, precision is None, na_rep is None, + decimal == ".", + thousands is None, escape is False, ) ): @@ -694,7 +715,12 @@ def format( except KeyError: format_func = None format_func = _maybe_wrap_formatter( - format_func, na_rep=na_rep, precision=precision, escape=escape + format_func, + na_rep=na_rep, + precision=precision, + escape=escape, + decimal=decimal, + thousands=thousands, ) for row, value in data[[col]].itertuples(): @@ -2163,7 +2189,7 @@ def _get_level_lengths(index, hidden_elements=None): return non_zero_lengths -def _default_formatter(x: Any, precision: int) -> Any: +def _default_formatter(x: Any, precision: int, thousands: bool = False) -> Any: """ Format the display of a value @@ -2173,21 +2199,62 @@ def _default_formatter(x: Any, precision: int) -> Any: Input variable to be formatted precision : Int Floating point precision used if ``x`` is float or complex. + thousands : bool + Whether to group digits with thousands separated with ",". Returns ------- value : Any - Matches input type, or string if input is float or complex. + Matches input type, or string if input is float or complex or int with sep. """ if isinstance(x, (float, complex)): + if thousands: + return f"{x:,.{precision}f}" return f"{x:.{precision}f}" + elif isinstance(x, int) and thousands: + return f"{x:,.0f}" return x +def _maybe_wrap_deci_thou( + formatter: Callable, decimal: str = ".", thousands: Optional[str] = None +) -> Callable: + """ + Takes a string formatting function and wraps logic to deal with thousands and + decimal parameters, in the case that they are non-standard and that the input + is a (float, complex). + """ + + def wrapper(x): + std_thou = thousands is None or thousands == "," + std_deci = decimal == "." + if isinstance(x, (float, complex)): + if std_thou and std_deci: + return formatter(x) + elif std_thou and not std_deci: + return formatter(x).replace(".", decimal) + elif not std_thou and std_deci: + return formatter(x).replace(",", thousands) + else: + return ( + formatter(x) + .replace(",", "§_§-") # rare string to avoid "," <-> "." clash. + .replace(".", decimal) + .replace("§_§-", thousands) + ) + elif isinstance(x, int) and not std_thou: + return formatter(x).replace(",", thousands) + return formatter(x) + + return wrapper + + def _maybe_wrap_formatter( formatter: Optional[BaseFormatter] = None, na_rep: Optional[str] = None, precision: Optional[int] = None, + decimal: str = ".", + thousands: Optional[str] = None, escape: bool = False, ) -> Callable: """ @@ -2196,15 +2263,19 @@ def _maybe_wrap_formatter( available. """ if isinstance(formatter, str): - formatter_func = lambda x: formatter.format(x) + func = lambda x: formatter.format(x) elif callable(formatter): - formatter_func = formatter + func = formatter elif formatter is None: precision = get_option("display.precision") if precision is None else precision - formatter_func = partial(_default_formatter, precision=precision) + func = partial( + _default_formatter, precision=precision, thousands=(thousands is not None) + ) else: raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}") + formatter_func = _maybe_wrap_deci_thou(func, decimal=decimal, thousands=thousands) + def _str_escape(x, escape: bool): """if escaping: only use on str, else return input""" if escape and isinstance(x, str): diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index cb1db27099cd1..0317e6fc4fc4e 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -677,6 +677,35 @@ def test_format_escape_floats(self): for expected in [">0<", ">1<", ">-1.2<", ">-0.6<"]: assert expected in s.render() + def test_format_decimal_thousands(self): + # tests formatter handles decimals and thousands separator + + s = DataFrame([[1000000.123456789]]).style # test float + result = s.format(thousands=",")._translate() + assert result["body"][0][1]["display_value"] == "1,000,000.123457" + result = s.format(thousands=",", decimal=".")._translate() + assert result["body"][0][1]["display_value"] == "1,000,000.123457" + result = s.format(thousands=",", decimal=".", precision=3)._translate() + assert result["body"][0][1]["display_value"] == "1,000,000.123" + result = s.format(decimal="_", precision=3)._translate() + assert result["body"][0][1]["display_value"] == "1000000_123" + result = s.format(thousands=" ", decimal="_", precision=3)._translate() + assert result["body"][0][1]["display_value"] == "1 000 000_123" + result = s.format("{:,.4f}", thousands=" ", decimal="_")._translate() + assert result["body"][0][1]["display_value"] == "1 000 000_1235" + result = s.format( + "
{:,.4f}
", thousands=" ", decimal="_", escape=True + )._translate() + assert result["body"][0][1]["display_value"] == "
1 000 000_1235
" + + s = DataFrame([[1000000]]).style # test int + result = s.format(thousands=" ")._translate() + assert result["body"][0][1]["display_value"] == "1 000 000" + result = s.format( + "
{:,.0f}
", thousands=" ", decimal="_", escape=True + )._translate() + assert result["body"][0][1]["display_value"] == "
1 000 000
" + def test_nonunique_raises(self): df = DataFrame([[1, 2]], columns=["A", "A"]) msg = "style is not supported for non-unique indices." From 02ba51eb64cd4fc6c365cef93f59debac6fa2218 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (MBP)" Date: Sun, 21 Mar 2021 23:12:15 +0100 Subject: [PATCH 09/14] add decimal and thousands options to format function --- pandas/io/formats/style.py | 18 ++++++++++++++---- pandas/tests/io/formats/style/test_style.py | 4 ++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 54d96289cc571..044b199aa2ce0 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -114,6 +114,16 @@ class Styler: .. versionadded:: 1.2.0 + decimal : str, default "." + Character used as decimal separator for floats, complex and integers + + .. versionadded:: 1.3.0 + + thousands : str, optional, default None + Character used as thousands separator for floats, complex and integers + + .. versionadded:: 1.3.0 + escape : bool, default False Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display strings with HTML-safe sequences. @@ -598,7 +608,9 @@ def format( .. versionadded:: 1.3.0 thousands : str, optional, default None - Character used as thousands separtor for floats, complex and integers + Character used as thousands separator for floats, complex and integers + + .. versionadded:: 1.3.0 escape : bool, default False Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display @@ -2228,7 +2240,7 @@ def _maybe_wrap_deci_thou( def wrapper(x): std_thou = thousands is None or thousands == "," std_deci = decimal == "." - if isinstance(x, (float, complex)): + if isinstance(x, (float, complex, int)): if std_thou and std_deci: return formatter(x) elif std_thou and not std_deci: @@ -2242,8 +2254,6 @@ def wrapper(x): .replace(".", decimal) .replace("§_§-", thousands) ) - elif isinstance(x, int) and not std_thou: - return formatter(x).replace(",", thousands) return formatter(x) return wrapper diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 0317e6fc4fc4e..0e83fb18d7767 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -701,6 +701,10 @@ def test_format_decimal_thousands(self): s = DataFrame([[1000000]]).style # test int result = s.format(thousands=" ")._translate() assert result["body"][0][1]["display_value"] == "1 000 000" + result = s.format(thousands=" ", decimal="*")._translate() + assert result["body"][0][1]["display_value"] == "1 000 000" + result = s.format("{:,.1f}", thousands=" ", decimal="*")._translate() + assert result["body"][0][1]["display_value"] == "1 000 000*0" result = s.format( "
{:,.0f}
", thousands=" ", decimal="_", escape=True )._translate() From b07de093e1a06a2a3006e4e8fb24cc45493e0b57 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (MBP)" Date: Sun, 21 Mar 2021 23:40:44 +0100 Subject: [PATCH 10/14] docs --- pandas/io/formats/style.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 044b199aa2ce0..98161d03d4c12 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -636,7 +636,9 @@ def format( The default formatter currently expresses floats and complex numbers with the pandas display precision unless using the ``precision`` argument here. The default formatter does not adjust the representation of missing values unless - the ``na_rep`` argument is used. + the ``na_rep`` argument is used. The default formatter will alter the display + of ``decimal`` and ``thousands`` separators only if the respective arguments + are used. The ``subset`` argument defines which region to apply the formatting function to. If the ``formatter`` argument is given in dict form but does not include @@ -698,6 +700,19 @@ def format( "A&B" NA ... + + Using a given ``formatter`` and the default formatter with ``decimal`` and + ``thousands``. + + >>> df = pd.DataFrame([[1000, 10000], [100.123, 1000000]]) + >>> s = df.style.format(precision=2, thousands="-", decimal="*") + 0 1 + 0 1-000*00 10-000 + 1 100*12 1-000-000 + >>> s = df.style.format({0: "{:,.1f}", 1: "{:.0f}"}, thousands=" ", decimal=",") + 0 1 + 0 1 000,0 10000 + 1 100,1 1000000 """ if all( ( From 2202a7cb8bd12170cc52af059045ae52b28dbebd Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 23 Mar 2021 21:41:20 +0100 Subject: [PATCH 11/14] tests with parameters --- pandas/tests/io/formats/style/test_style.py | 55 +++++++++++---------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 77862f9a9f834..bf7e547cd252a 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -678,38 +678,43 @@ def test_format_escape_floats(self): for expected in [">0<", ">1<", ">-1.2<", ">-0.6<"]: assert expected in s.render() - def test_format_decimal_thousands(self): - # tests formatter handles decimals and thousands separator - + @pytest.mark.parametrize("formatter", [None, "{:,.1f}"]) + @pytest.mark.parametrize("decimal", [".", "*"]) + @pytest.mark.parametrize("precision", [None, 2]) + def test_format_thousands(self, formatter, decimal, precision): s = DataFrame([[1000000.123456789]]).style # test float - result = s.format(thousands=",")._translate() - assert result["body"][0][1]["display_value"] == "1,000,000.123457" - result = s.format(thousands=",", decimal=".")._translate() - assert result["body"][0][1]["display_value"] == "1,000,000.123457" - result = s.format(thousands=",", decimal=".", precision=3)._translate() - assert result["body"][0][1]["display_value"] == "1,000,000.123" - result = s.format(decimal="_", precision=3)._translate() - assert result["body"][0][1]["display_value"] == "1000000_123" - result = s.format(thousands=" ", decimal="_", precision=3)._translate() - assert result["body"][0][1]["display_value"] == "1 000 000_123" - result = s.format("{:,.4f}", thousands=" ", decimal="_")._translate() - assert result["body"][0][1]["display_value"] == "1 000 000_1235" result = s.format( - "
{:,.4f}
", thousands=" ", decimal="_", escape=True + thousands="_", formatter=formatter, decimal=decimal, precision=precision )._translate() - assert result["body"][0][1]["display_value"] == "
1 000 000_1235
" + assert "1_000_000" in result["body"][0][1]["display_value"] s = DataFrame([[1000000]]).style # test int - result = s.format(thousands=" ")._translate() - assert result["body"][0][1]["display_value"] == "1 000 000" - result = s.format(thousands=" ", decimal="*")._translate() - assert result["body"][0][1]["display_value"] == "1 000 000" - result = s.format("{:,.1f}", thousands=" ", decimal="*")._translate() - assert result["body"][0][1]["display_value"] == "1 000 000*0" result = s.format( - "
{:,.0f}
", thousands=" ", decimal="_", escape=True + thousands="_", formatter=formatter, decimal=decimal, precision=precision + )._translate() + assert "1_000_000" in result["body"][0][1]["display_value"] + + s = DataFrame([[1 + 1000000.123456789j]]).style # test complex + result = s.format( + thousands="_", formatter=formatter, decimal=decimal, precision=precision + )._translate() + assert "1_000_000" in result["body"][0][1]["display_value"] + + @pytest.mark.parametrize("formatter", [None, "{:,.4f}"]) + @pytest.mark.parametrize("thousands", [None, ",", "*"]) + @pytest.mark.parametrize("precision", [None, 4]) + def test_format_decimal(self, formatter, thousands, precision): + s = DataFrame([[1000000.123456789]]).style # test float + result = s.format( + decimal="_", formatter=formatter, thousands=thousands, precision=precision + )._translate() + assert "000_123" in result["body"][0][1]["display_value"] + + s = DataFrame([[1 + 1000000.123456789j]]).style # test complex + result = s.format( + decimal="_", formatter=formatter, thousands=thousands, precision=precision )._translate() - assert result["body"][0][1]["display_value"] == "
1 000 000
" + assert "000_123" in result["body"][0][1]["display_value"] def test_nonunique_raises(self): df = DataFrame([[1, 2]], columns=["A", "A"]) From 45dd2f10a98837d8c58c4ce0ab4bad29f4e4dfc7 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 23 Mar 2021 21:48:31 +0100 Subject: [PATCH 12/14] var names --- pandas/io/formats/style.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index eff9b8b75899c..dfb13a536af09 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -2291,14 +2291,14 @@ def _maybe_wrap_deci_thou( """ def wrapper(x): - std_thou = thousands is None or thousands == "," - std_deci = decimal == "." + default_thousands = thousands is None or thousands == "," + default_decimal = decimal == "." if isinstance(x, (float, complex, int)): - if std_thou and std_deci: + if default_thousands and default_decimal: return formatter(x) - elif std_thou and not std_deci: + elif default_thousands and not default_decimal: return formatter(x).replace(".", decimal) - elif not std_thou and std_deci: + elif not default_thousands and default_decimal: return formatter(x).replace(",", thousands) else: return ( From 863dd6c8cf3b12bad38e31706d63f07ed352d0e1 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (MBP)" Date: Sat, 27 Mar 2021 18:29:20 +0100 Subject: [PATCH 13/14] move format tests --- pandas/tests/io/formats/style/test_format.py | 40 ++++++++++++++++++++ pandas/tests/io/formats/style/test_style.py | 38 ------------------- 2 files changed, 40 insertions(+), 38 deletions(-) diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index 09b18e1f71d76..0f3e5863a4a99 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -197,3 +197,43 @@ def test_format_subset(): assert ctx["body"][1][1]["display_value"] == "1.1" assert ctx["body"][0][2]["display_value"] == "0.123400" assert ctx["body"][1][2]["display_value"] == raw_11 + + +@pytest.mark.parametrize("formatter", [None, "{:,.1f}"]) +@pytest.mark.parametrize("decimal", [".", "*"]) +@pytest.mark.parametrize("precision", [None, 2]) +def test_format_thousands(formatter, decimal, precision): + s = DataFrame([[1000000.123456789]]).style # test float + result = s.format( + thousands="_", formatter=formatter, decimal=decimal, precision=precision + )._translate() + assert "1_000_000" in result["body"][0][1]["display_value"] + + s = DataFrame([[1000000]]).style # test int + result = s.format( + thousands="_", formatter=formatter, decimal=decimal, precision=precision + )._translate() + assert "1_000_000" in result["body"][0][1]["display_value"] + + s = DataFrame([[1 + 1000000.123456789j]]).style # test complex + result = s.format( + thousands="_", formatter=formatter, decimal=decimal, precision=precision + )._translate() + assert "1_000_000" in result["body"][0][1]["display_value"] + + +@pytest.mark.parametrize("formatter", [None, "{:,.4f}"]) +@pytest.mark.parametrize("thousands", [None, ",", "*"]) +@pytest.mark.parametrize("precision", [None, 4]) +def test_format_decimal(formatter, thousands, precision): + s = DataFrame([[1000000.123456789]]).style # test float + result = s.format( + decimal="_", formatter=formatter, thousands=thousands, precision=precision + )._translate() + assert "000_123" in result["body"][0][1]["display_value"] + + s = DataFrame([[1 + 1000000.123456789j]]).style # test complex + result = s.format( + decimal="_", formatter=formatter, thousands=thousands, precision=precision + )._translate() + assert "000_123" in result["body"][0][1]["display_value"] diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 8aa35c571a9e0..302019b702829 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -601,44 +601,6 @@ def test_set_na_rep(self): assert ctx["body"][0][1]["display_value"] == "NA" assert ctx["body"][0][2]["display_value"] == "-" - @pytest.mark.parametrize("formatter", [None, "{:,.1f}"]) - @pytest.mark.parametrize("decimal", [".", "*"]) - @pytest.mark.parametrize("precision", [None, 2]) - def test_format_thousands(self, formatter, decimal, precision): - s = DataFrame([[1000000.123456789]]).style # test float - result = s.format( - thousands="_", formatter=formatter, decimal=decimal, precision=precision - )._translate() - assert "1_000_000" in result["body"][0][1]["display_value"] - - s = DataFrame([[1000000]]).style # test int - result = s.format( - thousands="_", formatter=formatter, decimal=decimal, precision=precision - )._translate() - assert "1_000_000" in result["body"][0][1]["display_value"] - - s = DataFrame([[1 + 1000000.123456789j]]).style # test complex - result = s.format( - thousands="_", formatter=formatter, decimal=decimal, precision=precision - )._translate() - assert "1_000_000" in result["body"][0][1]["display_value"] - - @pytest.mark.parametrize("formatter", [None, "{:,.4f}"]) - @pytest.mark.parametrize("thousands", [None, ",", "*"]) - @pytest.mark.parametrize("precision", [None, 4]) - def test_format_decimal(self, formatter, thousands, precision): - s = DataFrame([[1000000.123456789]]).style # test float - result = s.format( - decimal="_", formatter=formatter, thousands=thousands, precision=precision - )._translate() - assert "000_123" in result["body"][0][1]["display_value"] - - s = DataFrame([[1 + 1000000.123456789j]]).style # test complex - result = s.format( - decimal="_", formatter=formatter, thousands=thousands, precision=precision - )._translate() - assert "000_123" in result["body"][0][1]["display_value"] - def test_nonunique_raises(self): df = DataFrame([[1, 2]], columns=["A", "A"]) msg = "style is not supported for non-unique indices." From a0b7fd6084884c957e5b6d2e3721bd2d2ce3e2ee Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 13 Apr 2021 20:04:13 +0200 Subject: [PATCH 14/14] restructure formatter wrappers --- pandas/io/formats/style_render.py | 101 +++++++++++++++++++++++++----- 1 file changed, 85 insertions(+), 16 deletions(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 45b1d97b9694c..15557c993eab4 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -38,7 +38,7 @@ import pandas.core.common as com jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.") -from markupsafe import escape as escape_func # markupsafe is jinja2 dependency +from markupsafe import escape as escape_html # markupsafe is jinja2 dependency BaseFormatter = Union[str, Callable] ExtFormatter = Union[BaseFormatter, Dict[Any, Optional[BaseFormatter]]] @@ -366,6 +366,8 @@ def format( subset: slice | Sequence[Any] | None = None, na_rep: str | None = None, precision: int | None = None, + decimal: str = ".", + thousands: str | None = None, escape: bool = False, ) -> StylerRenderer: """ @@ -390,6 +392,16 @@ def format( .. versionadded:: 1.3.0 + decimal : str, default "." + Character used as decimal separator for floats, complex and integers + + .. versionadded:: 1.3.0 + + thousands : str, optional, default None + Character used as thousands separator for floats, complex and integers + + .. versionadded:: 1.3.0 + escape : bool, default False Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display string with HTML-safe sequences. Escaping is done before ``formatter``. @@ -482,6 +494,8 @@ def format( formatter is None, subset is None, precision is None, + decimal == ".", + thousands is None, na_rep is None, escape is False, ) @@ -502,8 +516,14 @@ def format( format_func = formatter[col] except KeyError: format_func = None + format_func = _maybe_wrap_formatter( - format_func, na_rep=na_rep, precision=precision, escape=escape + format_func, + na_rep=na_rep, + precision=precision, + decimal=decimal, + thousands=thousands, + escape=escape, ) for row, value in data[[col]].itertuples(): @@ -607,7 +627,7 @@ def _format_table_styles(styles: CSSStyles) -> CSSStyles: ] -def _default_formatter(x: Any, precision: int) -> Any: +def _default_formatter(x: Any, precision: int, thousands: bool = False) -> Any: """ Format the display of a value @@ -617,14 +637,54 @@ def _default_formatter(x: Any, precision: int) -> Any: Input variable to be formatted precision : Int Floating point precision used if ``x`` is float or complex. + thousands : bool, default False + Whether to group digits with thousands separated with ",". Returns ------- value : Any - Matches input type, or string if input is float or complex. + Matches input type, or string if input is float or complex or int with sep. """ if isinstance(x, (float, complex)): + if thousands: + return f"{x:,.{precision}f}" return f"{x:.{precision}f}" + elif isinstance(x, int) and thousands: + return f"{x:,.0f}" + return x + + +def _wrap_decimal_thousands( + formatter: Callable, decimal: str, thousands: str | None +) -> Callable: + """ + Takes a string formatting function and wraps logic to deal with thousands and + decimal parameters, in the case that they are non-standard and that the input + is a (float, complex, int). + """ + + def wrapper(x): + if isinstance(x, (float, complex, int)): + if decimal != "." and thousands is not None and thousands != ",": + return ( + formatter(x) + .replace(",", "§_§-") # rare string to avoid "," <-> "." clash. + .replace(".", decimal) + .replace("§_§-", thousands) + ) + elif decimal != "." and (thousands is None or thousands == ","): + return formatter(x).replace(".", decimal) + elif decimal == "." and thousands is not None and thousands != ",": + return formatter(x).replace(",", thousands) + return formatter(x) + + return wrapper + + +def _str_escape_html(x): + """if escaping html: only use on str, else return input""" + if isinstance(x, str): + return escape_html(x) return x @@ -632,6 +692,8 @@ def _maybe_wrap_formatter( formatter: BaseFormatter | None = None, na_rep: str | None = None, precision: int | None = None, + decimal: str = ".", + thousands: str | None = None, escape: bool = False, ) -> Callable: """ @@ -639,29 +701,36 @@ def _maybe_wrap_formatter( a default formatting function. wraps with na_rep, and precision where they are available. """ + # Get initial func from input string, input callable, or from default factory if isinstance(formatter, str): - formatter_func = lambda x: formatter.format(x) + func_0 = lambda x: formatter.format(x) elif callable(formatter): - formatter_func = formatter + func_0 = formatter elif formatter is None: precision = get_option("display.precision") if precision is None else precision - formatter_func = partial(_default_formatter, precision=precision) + func_0 = partial( + _default_formatter, precision=precision, thousands=(thousands is not None) + ) else: raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}") - def _str_escape(x, escape: bool): - """if escaping: only use on str, else return input""" - if escape and isinstance(x, str): - return escape_func(x) - else: - return x + # Replace HTML chars if escaping + if escape: + func_1 = lambda x: func_0(_str_escape_html(x)) + else: + func_1 = func_0 - display_func = lambda x: formatter_func(partial(_str_escape, escape=escape)(x)) + # Replace decimals and thousands if non-standard inputs detected + if decimal != "." or (thousands is not None and thousands != ","): + func_2 = _wrap_decimal_thousands(func_1, decimal=decimal, thousands=thousands) + else: + func_2 = func_1 + # Replace missing values if na_rep if na_rep is None: - return display_func + return func_2 else: - return lambda x: na_rep if isna(x) else display_func(x) + return lambda x: na_rep if isna(x) else func_2(x) def non_reducing_slice(slice_):