Skip to content

ENH: Styler.bar accepts matplotlib colormap #43662

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Oct 10, 2021
13 changes: 8 additions & 5 deletions doc/source/user_guide/style.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib as mpl\n",
"\n",
"df = pd.DataFrame([[38.0, 2.0, 18.0, 22.0, 21, np.nan],[19, 439, 6, 452, 226,232]], \n",
" index=pd.Index(['Tumour (Positive)', 'Non-Tumour (Negative)'], name='Actual Label:'), \n",
Expand Down Expand Up @@ -1275,9 +1276,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Additional keyword arguments give more control on centering and positioning, and you can pass a list of `[color_negative, color_positive]` to highlight lower and higher values.\n",
"Additional keyword arguments give more control on centering and positioning, and you can pass a list of `[color_negative, color_positive]` to highlight lower and higher values or a matplotlib colormap.\n",
"\n",
"Here's how you can change the above with the new `align` option, combined with setting `vmin` and `vmax` limits, the `width` of the figure, and underlying css `props` of cells, leaving space to display the text and the bars:"
"To showcase an example here's how you can change the above with the new `align` option, combined with setting `vmin` and `vmax` limits, the `width` of the figure, and underlying css `props` of cells, leaving space to display the text and the bars. We also use `text_gradient` to color the text the same as the bars using a matplotlib colormap (although in this case the visualization is probably better without this additional effect)."
]
},
{
Expand All @@ -1286,8 +1287,10 @@
"metadata": {},
"outputs": [],
"source": [
"df2.style.bar(align=0, vmin=-2.5, vmax=2.5, color=['#d65f5f', '#5fba7d'], height=50,\n",
" width=60, props=\"width: 120px; border-right: 1px solid black;\").format('{:.3f}', na_rep=\"\")"
"df2.style.format('{:.3f}', na_rep=\"\")\\\n",
" .bar(align=0, vmin=-2.5, vmax=2.5, color=mpl.cm.get_cmap(\"bwr\"), height=50,\n",
" width=60, props=\"width: 120px; border-right: 1px solid black;\")\\\n",
" .text_gradient(cmap=\"bwr\", vmin=-2.5, vmax=2.5)"
]
},
{
Expand Down Expand Up @@ -2031,7 +2034,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.7"
"version": "3.8.6"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ Styler
:class:`.Styler` has been further developed in 1.4.0. The following enhancements have been made:

- Styling and formatting of indexes has been added, with :meth:`.Styler.apply_index`, :meth:`.Styler.applymap_index` and :meth:`.Styler.format_index`. These mirror the signature of the methods already used to style and format data values, and work with both HTML and LaTeX format (:issue:`41893`, :issue:`43101`).
- :meth:`.Styler.bar` introduces additional arguments to control alignment and display (:issue:`26070`, :issue:`36419`), and it also validates the input arguments ``width`` and ``height`` (:issue:`42511`).
- :meth:`.Styler.bar` introduces additional arguments to control alignment, display and colors (:issue:`26070`, :issue:`36419`, :issue:`43662`), and it also validates the input arguments ``width`` and ``height`` (:issue:`42511`).
- :meth:`.Styler.to_latex` introduces keyword argument ``environment``, which also allows a specific "longtable" entry through a separate jinja2 template (:issue:`41866`).
- :meth:`.Styler.to_html` introduces keyword arguments ``sparse_index``, ``sparse_columns``, ``bold_headers``, ``caption``, ``max_rows`` and ``max_columns`` (:issue:`41946`, :issue:`43149`, :issue:`42972`).
- Keyword arguments ``level`` and ``names`` added to :meth:`.Styler.hide_index` and :meth:`.Styler.hide_columns` for additional control of visibility of MultiIndexes and index names (:issue:`25475`, :issue:`43404`, :issue:`43346`)
Expand Down
106 changes: 77 additions & 29 deletions pandas/io/formats/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
IndexSlice,
RangeIndex,
)
from pandas.api.types import is_list_like
from pandas.core import generic
import pandas.core.common as com
from pandas.core.frame import (
Expand All @@ -60,7 +59,7 @@
)

try:
from matplotlib import colors
import matplotlib as mpl
import matplotlib.pyplot as plt

has_mpl = True
Expand All @@ -72,7 +71,7 @@
@contextmanager
def _mpl(func: Callable):
if has_mpl:
yield plt, colors
yield plt, mpl
else:
raise ImportError(no_mpl_message.format(func.__name__))

Expand Down Expand Up @@ -2608,7 +2607,8 @@ def bar(
subset: Subset | None = None,
axis: Axis | None = 0,
*,
color="#d65f5f",
color: str | list | tuple | None = None,
cmap: Any | None = None,
width: float = 100,
height: float = 100,
align: str | float | int | Callable = "mid",
Expand Down Expand Up @@ -2636,6 +2636,11 @@ def bar(
negative and positive numbers. If 2-tuple/list is used, the
first element is the color_negative and the second is the
color_positive (eg: ['#d65f5f', '#5fba7d']).
cmap : str, matplotlib.cm.ColorMap
A string name of a matplotlib Colormap, or a Colormap object. Cannot be
used together with ``color``.

.. versionadded:: 1.4.0
width : float, default 100
The percentage of the cell, measured from the left, in which to draw the
bars, in [0, 100].
Expand Down Expand Up @@ -2678,17 +2683,25 @@ def bar(
Returns
-------
self : Styler
"""
if not (is_list_like(color)):
color = [color, color]
elif len(color) == 1:
color = [color[0], color[0]]
elif len(color) > 2:
raise ValueError(
"`color` must be string or a list-like "
"of length 2: [`color_neg`, `color_pos`] "
"(eg: color=['#d65f5f', '#5fba7d'])"
)

Notes
-----
This section of the user guide:
`Table Visualization <../../user_guide/style.ipynb>`_ gives
a number of examples for different settings and color coordination.
"""
if color is None and cmap is None:
color = "#d65f5f"
elif color is not None and cmap is not None:
raise ValueError("`color` and `cmap` cannot both be given")
elif color is not None:
if (isinstance(color, (list, tuple)) and len(color) > 2) or not isinstance(
color, (str, list, tuple)
):
raise ValueError(
"`color` must be string or list or tuple of 2 strings,"
"(eg: color=['#d65f5f', '#5fba7d'])"
)

if not (0 <= width <= 100):
raise ValueError(f"`width` must be a value in [0, 100], got {width}")
Expand All @@ -2704,6 +2717,7 @@ def bar(
axis=axis,
align=align,
colors=color,
cmap=cmap,
width=width / 100,
height=height / 100,
vmin=vmin,
Expand Down Expand Up @@ -3260,12 +3274,12 @@ def _background_gradient(
else: # else validate gmap against the underlying data
gmap = _validate_apply_axis_arg(gmap, "gmap", float, data)

with _mpl(Styler.background_gradient) as (plt, colors):
with _mpl(Styler.background_gradient) as (plt, mpl):
smin = np.nanmin(gmap) if vmin is None else vmin
smax = np.nanmax(gmap) if vmax is None else vmax
rng = smax - smin
# extend lower / upper bounds, compresses color range
norm = colors.Normalize(smin - (rng * low), smax + (rng * high))
norm = mpl.colors.Normalize(smin - (rng * low), smax + (rng * high))
rgbas = plt.cm.get_cmap(cmap)(norm(gmap))

def relative_luminance(rgba) -> float:
Expand Down Expand Up @@ -3294,9 +3308,11 @@ def css(rgba, text_only) -> str:
if not text_only:
dark = relative_luminance(rgba) < text_color_threshold
text_color = "#f1f1f1" if dark else "#000000"
return f"background-color: {colors.rgb2hex(rgba)};color: {text_color};"
return (
f"background-color: {mpl.colors.rgb2hex(rgba)};color: {text_color};"
)
else:
return f"color: {colors.rgb2hex(rgba)};"
return f"color: {mpl.colors.rgb2hex(rgba)};"

if data.ndim == 1:
return [css(rgba, text_only) for rgba in rgbas]
Expand Down Expand Up @@ -3369,7 +3385,8 @@ def _highlight_value(data: DataFrame | Series, op: str, props: str) -> np.ndarra
def _bar(
data: NDFrame,
align: str | float | int | Callable,
colors: list[str],
colors: str | list | tuple,
cmap: Any,
width: float,
height: float,
vmin: float | None,
Expand Down Expand Up @@ -3431,7 +3448,7 @@ def css_bar(start: float, end: float, color: str) -> str:
cell_css += f" {color} {end*100:.1f}%, transparent {end*100:.1f}%)"
return cell_css

def css_calc(x, left: float, right: float, align: str):
def css_calc(x, left: float, right: float, align: str, color: str | list | tuple):
"""
Return the correct CSS for bar placement based on calculated values.

Expand Down Expand Up @@ -3462,7 +3479,10 @@ def css_calc(x, left: float, right: float, align: str):
if pd.isna(x):
return base_css

color = colors[0] if x < 0 else colors[1]
if isinstance(color, (list, tuple)):
color = color[0] if x < 0 else color[1]
assert isinstance(color, str) # mypy redefinition

x = left if x < left else x
x = right if x > right else x # trim data if outside of the window

Expand Down Expand Up @@ -3525,15 +3545,43 @@ def css_calc(x, left: float, right: float, align: str):
"value defining the center line or a callable that returns a float"
)

rgbas = None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't we have a similar utlity in the plotting routines?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

had a quick look, this is converting mpl colors to hex codes, which is not the usual direction for plotting.

ignoring the arg validation, though which is specific to the method, this op is just a single loop comprehension: [mpl_colors.rgb2hex(rgba) for rgba in rgbas]

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

an alternative is to add a cmap arg, which accepts a string or mpl ColorMap, similar to background_gradient and text_gradient.

Advantage is: i) consistency ii) autoloading a colormap from string rather than user have to create a Colormap instance.

Disadvatange is: having a color and a cmap arg which is slightly confusing although can b documented away.

?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm i do like the consisteny of i), can we kill the color arg (may need to deprecate)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not really because you need differentiate between a color which is a valid css str e.g. "#ee3ee" or "rgb(10,200,10)" or "salmon" and between a string which is a matplotlib colormap, i.e. "inferno" or "PuBu",

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we have this color, cmap api anywhere else?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The following use color: highligh_min/max/betwen/quantile/null, and these use cmap: text/background_gradient. But the use is specific where either a specific color or a color gradient is useful.
For bar either is useful.

Matplot lib also separates the arguments on some functions, e.g. scatter, where it has a c, color and cmap argument: https://matplotlib.org/stable/api/_as_gen/matplotlib.axes.Axes.scatter.html?highlight=scatter#matplotlib.axes.Axes.scatter

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hah, mpl api is not great here

if cmap is not None:
# use the matplotlib colormap input
with _mpl(Styler.bar) as (plt, mpl):
cmap = (
mpl.cm.get_cmap(cmap)
if isinstance(cmap, str)
else cmap # assumed to be a Colormap instance as documented
)
norm = mpl.colors.Normalize(left, right)
rgbas = cmap(norm(values))
if data.ndim == 1:
rgbas = [mpl.colors.rgb2hex(rgba) for rgba in rgbas]
else:
rgbas = [[mpl.colors.rgb2hex(rgba) for rgba in row] for row in rgbas]

assert isinstance(align, str) # mypy: should now be in [left, right, mid, zero]
if data.ndim == 1:
return [css_calc(x - z, left - z, right - z, align) for x in values]
return [
css_calc(
x - z, left - z, right - z, align, colors if rgbas is None else rgbas[i]
)
for i, x in enumerate(values)
]
else:
return DataFrame(
return np.array(
[
[css_calc(x - z, left - z, right - z, align) for x in row]
for row in values
],
index=data.index,
columns=data.columns,
[
css_calc(
x - z,
left - z,
right - z,
align,
colors if rgbas is None else rgbas[i][j],
)
for j, x in enumerate(row)
]
for i, row in enumerate(values)
]
)
28 changes: 28 additions & 0 deletions pandas/tests/io/formats/style/test_matplotlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
pytest.importorskip("matplotlib")
pytest.importorskip("jinja2")

import matplotlib as mpl

from pandas.io.formats.style import Styler


Expand Down Expand Up @@ -256,3 +258,29 @@ def test_background_gradient_gmap_wrong_series(styler_blank):
gmap = Series([1, 2], index=["X", "Y"])
with pytest.raises(ValueError, match=msg):
styler_blank.background_gradient(gmap=gmap, axis=None)._compute()


@pytest.mark.parametrize("cmap", ["PuBu", mpl.cm.get_cmap("PuBu")])
def test_bar_colormap(cmap):
data = DataFrame([[1, 2], [3, 4]])
ctx = data.style.bar(cmap=cmap, axis=None)._compute().ctx
pubu_colors = {
(0, 0): "#d0d1e6",
(1, 0): "#056faf",
(0, 1): "#73a9cf",
(1, 1): "#023858",
}
for k, v in pubu_colors.items():
assert v in ctx[k][1][1]


def test_bar_color_raises(df):
msg = "`color` must be string or list or tuple of 2 strings"
with pytest.raises(ValueError, match=msg):
df.style.bar(color={"a", "b"}).to_html()
with pytest.raises(ValueError, match=msg):
df.style.bar(color=["a", "b", "c"]).to_html()

msg = "`color` and `cmap` cannot both be given"
with pytest.raises(ValueError, match=msg):
df.style.bar(color="something", cmap="something else").to_html()