Skip to content

ENH: add decimal and thousands args to Styler.format() #40596

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Apr 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
a82f4f5
update escaping for html
attack68 Mar 14, 2021
51bda2c
update escaping for html
attack68 Mar 14, 2021
a323f93
tests for format_escape
attack68 Mar 14, 2021
64a54d3
Merge remote-tracking branch 'upstream/master' into escape_html_forma…
attack68 Mar 15, 2021
e01eeb5
check and docs fix
attack68 Mar 15, 2021
2348165
versionadded and whatsnew
attack68 Mar 15, 2021
27f39eb
only use escape on str
attack68 Mar 15, 2021
c1b29c3
refactor
attack68 Mar 15, 2021
0008af2
add decimal and thousands options to format function
attack68 Mar 21, 2021
02ba51e
add decimal and thousands options to format function
attack68 Mar 21, 2021
b07de09
docs
attack68 Mar 21, 2021
8ea4e0a
Merge remote-tracking branch 'upstream/master' into format_decimal_th…
attack68 Mar 23, 2021
2202a7c
tests with parameters
attack68 Mar 23, 2021
45dd2f1
var names
attack68 Mar 23, 2021
fb5dc2f
Merge remote-tracking branch 'upstream/master' into format_decimal_th…
attack68 Mar 27, 2021
863dd6c
move format tests
attack68 Mar 27, 2021
131b928
Merge remote-tracking branch 'upstream/master' into format_decimal_th…
attack68 Mar 31, 2021
b82e0b1
Merge remote-tracking branch 'upstream/master' into format_decimal_th…
attack68 Apr 13, 2021
a0b7fd6
restructure formatter wrappers
attack68 Apr 13, 2021
c35b1ee
Merge remote-tracking branch 'upstream/master' into format_decimal_th…
attack68 Apr 14, 2021
70f1e2b
Merge remote-tracking branch 'upstream/master' into format_decimal_th…
attack68 Apr 15, 2021
d834f8f
Merge remote-tracking branch 'upstream/master' into format_decimal_th…
attack68 Apr 18, 2021
19d862b
Merge remote-tracking branch 'upstream/master' into format_decimal_th…
attack68 Apr 20, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion pandas/io/formats/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,16 @@ class Styler(StylerRenderer):

.. versionadded:: 1.2.0

decimal : str, default "."
Character used as decimal separator for floats, complex and integers

.. versionadded:: 1.3.0

thousands : str, optional, default None
Character used as thousands separator for floats, complex and integers

.. versionadded:: 1.3.0

escape : bool, default False
Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display
strings with HTML-safe sequences.
Expand Down Expand Up @@ -160,6 +170,8 @@ def __init__(
cell_ids: bool = True,
na_rep: str | None = None,
uuid_len: int = 5,
decimal: str = ".",
thousands: str | None = None,
escape: bool = False,
):
super().__init__(
Expand All @@ -175,7 +187,14 @@ def __init__(
# validate ordered args
self.precision = precision # can be removed on set_precision depr cycle
self.na_rep = na_rep # can be removed on set_na_rep depr cycle
self.format(formatter=None, precision=precision, na_rep=na_rep, escape=escape)
self.format(
formatter=None,
precision=precision,
na_rep=na_rep,
escape=escape,
decimal=decimal,
thousands=thousands,
)

def _repr_html_(self) -> str:
"""
Expand Down
101 changes: 85 additions & 16 deletions pandas/io/formats/style_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
import pandas.core.common as com

jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.")
from markupsafe import escape as escape_func # markupsafe is jinja2 dependency
from markupsafe import escape as escape_html # markupsafe is jinja2 dependency

BaseFormatter = Union[str, Callable]
ExtFormatter = Union[BaseFormatter, Dict[Any, Optional[BaseFormatter]]]
Expand Down Expand Up @@ -366,6 +366,8 @@ def format(
subset: slice | Sequence[Any] | None = None,
na_rep: str | None = None,
precision: int | None = None,
decimal: str = ".",
thousands: str | None = None,
escape: bool = False,
) -> StylerRenderer:
"""
Expand All @@ -390,6 +392,16 @@ def format(

.. versionadded:: 1.3.0

decimal : str, default "."
Character used as decimal separator for floats, complex and integers

.. versionadded:: 1.3.0

thousands : str, optional, default None
Character used as thousands separator for floats, complex and integers

.. versionadded:: 1.3.0

escape : bool, default False
Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display
string with HTML-safe sequences. Escaping is done before ``formatter``.
Expand Down Expand Up @@ -482,6 +494,8 @@ def format(
formatter is None,
subset is None,
precision is None,
decimal == ".",
thousands is None,
na_rep is None,
escape is False,
)
Expand All @@ -502,8 +516,14 @@ def format(
format_func = formatter[col]
except KeyError:
format_func = None

format_func = _maybe_wrap_formatter(
format_func, na_rep=na_rep, precision=precision, escape=escape
format_func,
na_rep=na_rep,
precision=precision,
decimal=decimal,
thousands=thousands,
escape=escape,
)

for row, value in data[[col]].itertuples():
Expand Down Expand Up @@ -607,7 +627,7 @@ def _format_table_styles(styles: CSSStyles) -> CSSStyles:
]


def _default_formatter(x: Any, precision: int) -> Any:
def _default_formatter(x: Any, precision: int, thousands: bool = False) -> Any:
"""
Format the display of a value

Expand All @@ -617,51 +637,100 @@ def _default_formatter(x: Any, precision: int) -> Any:
Input variable to be formatted
precision : Int
Floating point precision used if ``x`` is float or complex.
thousands : bool, default False
Whether to group digits with thousands separated with ",".

Returns
-------
value : Any
Matches input type, or string if input is float or complex.
Matches input type, or string if input is float or complex or int with sep.
"""
if isinstance(x, (float, complex)):
if thousands:
return f"{x:,.{precision}f}"
return f"{x:.{precision}f}"
elif isinstance(x, int) and thousands:
return f"{x:,.0f}"
return x


def _wrap_decimal_thousands(
formatter: Callable, decimal: str, thousands: str | None
) -> Callable:
"""
Takes a string formatting function and wraps logic to deal with thousands and
decimal parameters, in the case that they are non-standard and that the input
is a (float, complex, int).
"""

def wrapper(x):
if isinstance(x, (float, complex, int)):
if decimal != "." and thousands is not None and thousands != ",":
return (
formatter(x)
.replace(",", "§_§-") # rare string to avoid "," <-> "." clash.
.replace(".", decimal)
.replace("§_§-", thousands)
)
elif decimal != "." and (thousands is None or thousands == ","):
return formatter(x).replace(".", decimal)
elif decimal == "." and thousands is not None and thousands != ",":
return formatter(x).replace(",", thousands)
return formatter(x)

return wrapper


def _str_escape_html(x):
"""if escaping html: only use on str, else return input"""
if isinstance(x, str):
return escape_html(x)
return x


def _maybe_wrap_formatter(
formatter: BaseFormatter | None = None,
na_rep: str | None = None,
precision: int | None = None,
decimal: str = ".",
thousands: str | None = None,
escape: bool = False,
) -> Callable:
"""
Allows formatters to be expressed as str, callable or None, where None returns
a default formatting function. wraps with na_rep, and precision where they are
available.
"""
# Get initial func from input string, input callable, or from default factory
if isinstance(formatter, str):
formatter_func = lambda x: formatter.format(x)
func_0 = lambda x: formatter.format(x)
elif callable(formatter):
formatter_func = formatter
func_0 = formatter
elif formatter is None:
precision = get_option("display.precision") if precision is None else precision
formatter_func = partial(_default_formatter, precision=precision)
func_0 = partial(
_default_formatter, precision=precision, thousands=(thousands is not None)
)
else:
raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}")

def _str_escape(x, escape: bool):
"""if escaping: only use on str, else return input"""
if escape and isinstance(x, str):
return escape_func(x)
else:
return x
# Replace HTML chars if escaping
if escape:
func_1 = lambda x: func_0(_str_escape_html(x))
else:
func_1 = func_0

display_func = lambda x: formatter_func(partial(_str_escape, escape=escape)(x))
# Replace decimals and thousands if non-standard inputs detected
if decimal != "." or (thousands is not None and thousands != ","):
func_2 = _wrap_decimal_thousands(func_1, decimal=decimal, thousands=thousands)
else:
func_2 = func_1

# Replace missing values if na_rep
if na_rep is None:
return display_func
return func_2
else:
return lambda x: na_rep if isna(x) else display_func(x)
return lambda x: na_rep if isna(x) else func_2(x)


def non_reducing_slice(slice_):
Expand Down
40 changes: 40 additions & 0 deletions pandas/tests/io/formats/style/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,3 +197,43 @@ def test_format_subset():
assert ctx["body"][1][1]["display_value"] == "1.1"
assert ctx["body"][0][2]["display_value"] == "0.123400"
assert ctx["body"][1][2]["display_value"] == raw_11


@pytest.mark.parametrize("formatter", [None, "{:,.1f}"])
@pytest.mark.parametrize("decimal", [".", "*"])
@pytest.mark.parametrize("precision", [None, 2])
def test_format_thousands(formatter, decimal, precision):
s = DataFrame([[1000000.123456789]]).style # test float
result = s.format(
thousands="_", formatter=formatter, decimal=decimal, precision=precision
)._translate()
assert "1_000_000" in result["body"][0][1]["display_value"]

s = DataFrame([[1000000]]).style # test int
result = s.format(
thousands="_", formatter=formatter, decimal=decimal, precision=precision
)._translate()
assert "1_000_000" in result["body"][0][1]["display_value"]

s = DataFrame([[1 + 1000000.123456789j]]).style # test complex
result = s.format(
thousands="_", formatter=formatter, decimal=decimal, precision=precision
)._translate()
assert "1_000_000" in result["body"][0][1]["display_value"]


@pytest.mark.parametrize("formatter", [None, "{:,.4f}"])
@pytest.mark.parametrize("thousands", [None, ",", "*"])
@pytest.mark.parametrize("precision", [None, 4])
def test_format_decimal(formatter, thousands, precision):
s = DataFrame([[1000000.123456789]]).style # test float
result = s.format(
decimal="_", formatter=formatter, thousands=thousands, precision=precision
)._translate()
assert "000_123" in result["body"][0][1]["display_value"]

s = DataFrame([[1 + 1000000.123456789j]]).style # test complex
result = s.format(
decimal="_", formatter=formatter, thousands=thousands, precision=precision
)._translate()
assert "000_123" in result["body"][0][1]["display_value"]