Skip to content

ENH: add decimal and thousands args to Styler.format() #40596

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Apr 20, 2021
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
a82f4f5
update escaping for html
attack68 Mar 14, 2021
51bda2c
update escaping for html
attack68 Mar 14, 2021
a323f93
tests for format_escape
attack68 Mar 14, 2021
64a54d3
Merge remote-tracking branch 'upstream/master' into escape_html_forma…
attack68 Mar 15, 2021
e01eeb5
check and docs fix
attack68 Mar 15, 2021
2348165
versionadded and whatsnew
attack68 Mar 15, 2021
27f39eb
only use escape on str
attack68 Mar 15, 2021
c1b29c3
refactor
attack68 Mar 15, 2021
0008af2
add decimal and thousands options to format function
attack68 Mar 21, 2021
02ba51e
add decimal and thousands options to format function
attack68 Mar 21, 2021
b07de09
docs
attack68 Mar 21, 2021
8ea4e0a
Merge remote-tracking branch 'upstream/master' into format_decimal_th…
attack68 Mar 23, 2021
2202a7c
tests with parameters
attack68 Mar 23, 2021
45dd2f1
var names
attack68 Mar 23, 2021
fb5dc2f
Merge remote-tracking branch 'upstream/master' into format_decimal_th…
attack68 Mar 27, 2021
863dd6c
move format tests
attack68 Mar 27, 2021
131b928
Merge remote-tracking branch 'upstream/master' into format_decimal_th…
attack68 Mar 31, 2021
b82e0b1
Merge remote-tracking branch 'upstream/master' into format_decimal_th…
attack68 Apr 13, 2021
a0b7fd6
restructure formatter wrappers
attack68 Apr 13, 2021
c35b1ee
Merge remote-tracking branch 'upstream/master' into format_decimal_th…
attack68 Apr 14, 2021
70f1e2b
Merge remote-tracking branch 'upstream/master' into format_decimal_th…
attack68 Apr 15, 2021
d834f8f
Merge remote-tracking branch 'upstream/master' into format_decimal_th…
attack68 Apr 18, 2021
19d862b
Merge remote-tracking branch 'upstream/master' into format_decimal_th…
attack68 Apr 20, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 104 additions & 8 deletions pandas/io/formats/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,16 @@ class Styler:

.. versionadded:: 1.2.0

decimal : str, default "."
Character used as decimal separator for floats, complex and integers

.. versionadded:: 1.3.0

thousands : str, optional, default None
Character used as thousands separator for floats, complex and integers

.. versionadded:: 1.3.0

escape : bool, default False
Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display
strings with HTML-safe sequences.
Expand Down Expand Up @@ -176,6 +186,8 @@ def __init__(
cell_ids: bool = True,
na_rep: Optional[str] = None,
uuid_len: int = 5,
decimal: str = ".",
thousands: Optional[str] = None,
escape: bool = False,
):
# validate ordered args
Expand Down Expand Up @@ -210,7 +222,14 @@ def __init__(
] = defaultdict(lambda: partial(_default_formatter, precision=def_precision))
self.precision = precision # can be removed on set_precision depr cycle
self.na_rep = na_rep # can be removed on set_na_rep depr cycle
self.format(formatter=None, precision=precision, na_rep=na_rep, escape=escape)
self.format(
formatter=None,
precision=precision,
na_rep=na_rep,
escape=escape,
decimal=decimal,
thousands=thousands,
)

def _repr_html_(self) -> str:
"""
Expand Down Expand Up @@ -552,6 +571,8 @@ def format(
subset: Optional[Union[slice, Sequence[Any]]] = None,
na_rep: Optional[str] = None,
precision: Optional[int] = None,
decimal: str = ".",
thousands: Optional[str] = None,
escape: bool = False,
) -> Styler:
"""
Expand All @@ -576,6 +597,16 @@ def format(

.. versionadded:: 1.3.0

decimal : str, default "."
Character used as decimal separator for floats and complex.

.. versionadded:: 1.3.0

thousands : str, optional, default None
Character used as thousands separator for floats, complex and integers

.. versionadded:: 1.3.0

escape : bool, default False
Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in cell display
string with HTML-safe sequences. Escaping is done before ``formatter``.
Expand All @@ -600,7 +631,9 @@ def format(
The default formatter currently expresses floats and complex numbers with the
pandas display precision unless using the ``precision`` argument here. The
default formatter does not adjust the representation of missing values unless
the ``na_rep`` argument is used.
the ``na_rep`` argument is used. The default formatter will alter the display
of ``decimal`` and ``thousands`` separators only if the respective arguments
are used.

The ``subset`` argument defines which region to apply the formatting function
to. If the ``formatter`` argument is given in dict form but does not include
Expand Down Expand Up @@ -662,13 +695,28 @@ def format(
<td .. ><a href="a.com/&#34;A&amp;B&#34;">&#34;A&amp;B&#34;</a></td>
<td .. >NA</td>
...

Using a given ``formatter`` and the default formatter with ``decimal`` and
``thousands``.

>>> df = pd.DataFrame([[1000, 10000], [100.123, 1000000]])
>>> s = df.style.format(precision=2, thousands="-", decimal="*")
0 1
0 1-000*00 10-000
1 100*12 1-000-000
>>> s = df.style.format({0: "{:,.1f}", 1: "{:.0f}"}, thousands=" ", decimal=",")
0 1
0 1 000,0 10000
1 100,1 1000000
"""
if all(
(
formatter is None,
subset is None,
precision is None,
na_rep is None,
decimal == ".",
thousands is None,
escape is False,
)
):
Expand All @@ -689,7 +737,12 @@ def format(
except KeyError:
format_func = None
format_func = _maybe_wrap_formatter(
format_func, na_rep=na_rep, precision=precision, escape=escape
format_func,
na_rep=na_rep,
precision=precision,
escape=escape,
decimal=decimal,
thousands=thousands,
)

for row, value in data[[col]].itertuples():
Expand Down Expand Up @@ -2201,7 +2254,7 @@ def _get_level_lengths(index, hidden_elements=None):
return non_zero_lengths


def _default_formatter(x: Any, precision: int) -> Any:
def _default_formatter(x: Any, precision: int, thousands: bool = False) -> Any:
"""
Format the display of a value

Expand All @@ -2211,21 +2264,60 @@ def _default_formatter(x: Any, precision: int) -> Any:
Input variable to be formatted
precision : Int
Floating point precision used if ``x`` is float or complex.
thousands : bool
Whether to group digits with thousands separated with ",".

Returns
-------
value : Any
Matches input type, or string if input is float or complex.
Matches input type, or string if input is float or complex or int with sep.
"""
if isinstance(x, (float, complex)):
if thousands:
return f"{x:,.{precision}f}"
return f"{x:.{precision}f}"
elif isinstance(x, int) and thousands:
return f"{x:,.0f}"
return x


def _maybe_wrap_deci_thou(
formatter: Callable, decimal: str = ".", thousands: Optional[str] = None
) -> Callable:
"""
Takes a string formatting function and wraps logic to deal with thousands and
decimal parameters, in the case that they are non-standard and that the input
is a (float, complex).
"""

def wrapper(x):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we share code with FloatArrayFormatter in pandas/io/format.py ?

may need to refactor to do that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

taking a quick look:

  • there doesn't seem to be an inherent option for thousands, just decimal replacement in that code.
  • the FloatArrayFormatter is vectorised but Styler does display_funcs element-wise (which it can't avoid for processing individual cell styles). I guess this makes it very difficult to refactor across module (unless the fomat_class was called element-wise)
  • other parameters such as escape and subset would have to be wrapped around the format.py code, so it might end up equally complicated anyway?

will have a think over next day.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jreback I think the refactor is too complicated at this stage since:

  • FloatArrayFormatter doesnt have the thousands argument which is main part of this PR.
  • the above problem about column vectorisation versus element-wise (and the styler subset argument)
  • the other arguments finding it difficult to exactly replicate existing behaviour.

default_thousands = thousands is None or thousands == ","
default_decimal = decimal == "."
if isinstance(x, (float, complex, int)):
if default_thousands and default_decimal:
return formatter(x)
elif default_thousands and not default_decimal:
return formatter(x).replace(".", decimal)
elif not default_thousands and default_decimal:
return formatter(x).replace(",", thousands)
else:
return (
formatter(x)
.replace(",", "§_§-") # rare string to avoid "," <-> "." clash.
.replace(".", decimal)
.replace("§_§-", thousands)
)
return formatter(x)

return wrapper


def _maybe_wrap_formatter(
formatter: Optional[BaseFormatter] = None,
na_rep: Optional[str] = None,
precision: Optional[int] = None,
decimal: str = ".",
thousands: Optional[str] = None,
escape: bool = False,
) -> Callable:
"""
Expand All @@ -2234,15 +2326,19 @@ def _maybe_wrap_formatter(
available.
"""
if isinstance(formatter, str):
formatter_func = lambda x: formatter.format(x)
func = lambda x: formatter.format(x)
elif callable(formatter):
formatter_func = formatter
func = formatter
elif formatter is None:
precision = get_option("display.precision") if precision is None else precision
formatter_func = partial(_default_formatter, precision=precision)
func = partial(
_default_formatter, precision=precision, thousands=(thousands is not None)
)
else:
raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}")

formatter_func = _maybe_wrap_deci_thou(func, decimal=decimal, thousands=thousands)

def _str_escape(x, escape: bool):
"""if escaping: only use on str, else return input"""
if escape and isinstance(x, str):
Expand Down
38 changes: 38 additions & 0 deletions pandas/tests/io/formats/style/test_style.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,44 @@ def test_format_escape_floats(self):
for expected in [">0<", ">1<", ">-1.2<", ">-0.6<"]:
assert expected in s.render()

@pytest.mark.parametrize("formatter", [None, "{:,.1f}"])
@pytest.mark.parametrize("decimal", [".", "*"])
@pytest.mark.parametrize("precision", [None, 2])
def test_format_thousands(self, formatter, decimal, precision):
s = DataFrame([[1000000.123456789]]).style # test float
result = s.format(
thousands="_", formatter=formatter, decimal=decimal, precision=precision
)._translate()
assert "1_000_000" in result["body"][0][1]["display_value"]

s = DataFrame([[1000000]]).style # test int
result = s.format(
thousands="_", formatter=formatter, decimal=decimal, precision=precision
)._translate()
assert "1_000_000" in result["body"][0][1]["display_value"]

s = DataFrame([[1 + 1000000.123456789j]]).style # test complex
result = s.format(
thousands="_", formatter=formatter, decimal=decimal, precision=precision
)._translate()
assert "1_000_000" in result["body"][0][1]["display_value"]

@pytest.mark.parametrize("formatter", [None, "{:,.4f}"])
@pytest.mark.parametrize("thousands", [None, ",", "*"])
@pytest.mark.parametrize("precision", [None, 4])
def test_format_decimal(self, formatter, thousands, precision):
s = DataFrame([[1000000.123456789]]).style # test float
result = s.format(
decimal="_", formatter=formatter, thousands=thousands, precision=precision
)._translate()
assert "000_123" in result["body"][0][1]["display_value"]

s = DataFrame([[1 + 1000000.123456789j]]).style # test complex
result = s.format(
decimal="_", formatter=formatter, thousands=thousands, precision=precision
)._translate()
assert "000_123" in result["body"][0][1]["display_value"]

def test_nonunique_raises(self):
df = DataFrame([[1, 2]], columns=["A", "A"])
msg = "style is not supported for non-unique indices."
Expand Down