diff --git a/doc/source/_static/style/bg_ax0.png b/doc/source/_static/style/bg_ax0.png new file mode 100644 index 0000000000000..1767d34136a02 Binary files /dev/null and b/doc/source/_static/style/bg_ax0.png differ diff --git a/doc/source/_static/style/bg_axNone.png b/doc/source/_static/style/bg_axNone.png new file mode 100644 index 0000000000000..8882c6f689773 Binary files /dev/null and b/doc/source/_static/style/bg_axNone.png differ diff --git a/doc/source/_static/style/bg_axNone_gmap.png b/doc/source/_static/style/bg_axNone_gmap.png new file mode 100644 index 0000000000000..bdd2b55e8c6b4 Binary files /dev/null and b/doc/source/_static/style/bg_axNone_gmap.png differ diff --git a/doc/source/_static/style/bg_axNone_lowhigh.png b/doc/source/_static/style/bg_axNone_lowhigh.png new file mode 100644 index 0000000000000..c37a707e73692 Binary files /dev/null and b/doc/source/_static/style/bg_axNone_lowhigh.png differ diff --git a/doc/source/_static/style/bg_axNone_vminvmax.png b/doc/source/_static/style/bg_axNone_vminvmax.png new file mode 100644 index 0000000000000..4ca958de15ec3 Binary files /dev/null and b/doc/source/_static/style/bg_axNone_vminvmax.png differ diff --git a/doc/source/_static/style/bg_gmap.png b/doc/source/_static/style/bg_gmap.png new file mode 100644 index 0000000000000..039ff6b78958e Binary files /dev/null and b/doc/source/_static/style/bg_gmap.png differ diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 5e95cd6e5ee10..3a7f938f70338 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -186,6 +186,7 @@ Other enhancements - :meth:`.Styler.apply` now more consistently accepts ndarray function returns, i.e. in all cases for ``axis`` is ``0, 1 or None`` (:issue:`39359`) - :meth:`.Styler.apply` and :meth:`.Styler.applymap` now raise errors if wrong format CSS is passed on render (:issue:`39660`) - :meth:`.Styler.format` adds keyword argument ``escape`` for optional HTML escaping (:issue:`40437`) +- :meth:`.Styler.background_gradient` now allows the ability to supply a specific gradient map (:issue:`22727`) - :meth:`.Styler.clear` now clears :attr:`Styler.hidden_index` and :attr:`Styler.hidden_columns` as well (:issue:`40484`) - Builtin highlighting methods in :class:`Styler` have a more consistent signature and css customisability (:issue:`40242`) - :meth:`Series.loc.__getitem__` and :meth:`Series.loc.__setitem__` with :class:`MultiIndex` now raising helpful error message when indexer has too many dimensions (:issue:`35349`) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index ba17e44fc66e0..267606461f003 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -43,7 +43,10 @@ from pandas.api.types import is_list_like from pandas.core import generic import pandas.core.common as com -from pandas.core.frame import DataFrame +from pandas.core.frame import ( + DataFrame, + Series, +) from pandas.core.generic import NDFrame from pandas.core.indexes.api import Index @@ -179,7 +182,7 @@ def __init__( escape: bool = False, ): # validate ordered args - if isinstance(data, pd.Series): + if isinstance(data, Series): data = data.to_frame() if not isinstance(data, DataFrame): raise TypeError("``data`` must be a Series or DataFrame") @@ -1438,21 +1441,27 @@ def background_gradient( text_color_threshold: float = 0.408, vmin: float | None = None, vmax: float | None = None, + gmap: Sequence | None = None, ) -> Styler: """ Color the background in a gradient style. The background color is determined according - to the data in each column (optionally row). Requires matplotlib. + to the data in each column, row or frame, or by a given + gradient map. Requires matplotlib. Parameters ---------- cmap : str or colormap Matplotlib colormap. low : float - Compress the range by the low. + Compress the color range at the low end. This is a multiple of the data + range to extend below the minimum; good values usually in [0, 1], + defaults to 0. high : float - Compress the range by the high. + Compress the color range at the high end. This is a multiple of the data + range to extend above the maximum; good values usually in [0, 1], + defaults to 0. axis : {0 or 'index', 1 or 'columns', None}, default 0 Apply to each column (``axis=0`` or ``'index'``), to each row (``axis=1`` or ``'columns'``), or to the entire DataFrame at once @@ -1460,45 +1469,108 @@ def background_gradient( subset : IndexSlice A valid slice for ``data`` to limit the style application to. text_color_threshold : float or int - Luminance threshold for determining text color. Facilitates text - visibility across varying background colors. From 0 to 1. - 0 = all text is dark colored, 1 = all text is light colored. + Luminance threshold for determining text color in [0, 1]. Facilitates text + visibility across varying background colors. All text is dark if 0, and + light if 1, defaults to 0.408. .. versionadded:: 0.24.0 vmin : float, optional Minimum data value that corresponds to colormap minimum value. - When None (default): the minimum value of the data will be used. + If not specified the minimum value of the data (or gmap) will be used. .. versionadded:: 1.0.0 vmax : float, optional Maximum data value that corresponds to colormap maximum value. - When None (default): the maximum value of the data will be used. + If not specified the maximum value of the data (or gmap) will be used. .. versionadded:: 1.0.0 + gmap : array-like, optional + Gradient map for determining the background colors. If not supplied + will use the underlying data from rows, columns or frame. If given as an + ndarray or list-like must be an identical shape to the underlying data + considering ``axis`` and ``subset``. If given as DataFrame or Series must + have same index and column labels considering ``axis`` and ``subset``. + If supplied, ``vmin`` and ``vmax`` should be given relative to this + gradient map. + + .. versionadded:: 1.3.0 + Returns ------- self : Styler - Raises - ------ - ValueError - If ``text_color_threshold`` is not a value from 0 to 1. - Notes ----- - Set ``text_color_threshold`` or tune ``low`` and ``high`` to keep the - text legible by not using the entire range of the color map. The range - of the data is extended by ``low * (x.max() - x.min())`` and ``high * - (x.max() - x.min())`` before normalizing. + When using ``low`` and ``high`` the range + of the gradient, given by the data if ``gmap`` is not given or by ``gmap``, + is extended at the low end effectively by + `map.min - low * map.range` and at the high end by + `map.max + high * map.range` before the colors are normalized and determined. + + If combining with ``vmin`` and ``vmax`` the `map.min`, `map.max` and + `map.range` are replaced by values according to the values derived from + ``vmin`` and ``vmax``. + + This method will preselect numeric columns and ignore non-numeric columns + unless a ``gmap`` is supplied in which case no preselection occurs. + + Examples + -------- + >>> df = pd.DataFrame({ + ... 'City': ['Stockholm', 'Oslo', 'Copenhagen'], + ... 'Temp (c)': [21.6, 22.4, 24.5], + ... 'Rain (mm)': [5.0, 13.3, 0.0], + ... 'Wind (m/s)': [3.2, 3.1, 6.7] + ... }) + + Shading the values column-wise, with ``axis=0``, preselecting numeric columns + + >>> df.style.background_gradient(axis=0) + + .. figure:: ../../_static/style/bg_ax0.png + + Shading all values collectively using ``axis=None`` + + >>> df.style.background_gradient(axis=None) + + .. figure:: ../../_static/style/bg_axNone.png + + Compress the color map from the both ``low`` and ``high`` ends + + >>> df.style.background_gradient(axis=None, low=0.75, high=1.0) + + .. figure:: ../../_static/style/bg_axNone_lowhigh.png + + Manually setting ``vmin`` and ``vmax`` gradient thresholds + + >>> df.style.background_gradient(axis=None, vmin=6.7, vmax=21.6) + + .. figure:: ../../_static/style/bg_axNone_vminvmax.png + + Setting a ``gmap`` and applying to all columns with another ``cmap`` + + >>> df.style.background_gradient(axis=0, gmap=df['Temp (c)'], cmap='YlOrRd') + + .. figure:: ../../_static/style/bg_gmap.png + + Setting the gradient map for a dataframe (i.e. ``axis=None``), we need to + explicitly state ``subset`` to match the ``gmap`` shape + + >>> gmap = np.array([[1,2,3], [2,3,4], [3,4,5]]) + >>> df.style.background_gradient(axis=None, gmap=gmap, + ... cmap='YlOrRd', subset=['Temp (c)', 'Rain (mm)', 'Wind (m/s)'] + ... ) + + .. figure:: ../../_static/style/bg_axNone_gmap.png """ - if subset is None: + if subset is None and gmap is None: subset = self.data.select_dtypes(include=np.number).columns self.apply( - self._background_gradient, + _background_gradient, cmap=cmap, subset=subset, axis=axis, @@ -1507,75 +1579,10 @@ def background_gradient( text_color_threshold=text_color_threshold, vmin=vmin, vmax=vmax, + gmap=gmap, ) return self - @staticmethod - def _background_gradient( - s, - cmap="PuBu", - low: float = 0, - high: float = 0, - text_color_threshold: float = 0.408, - vmin: float | None = None, - vmax: float | None = None, - ): - """ - Color background in a range according to the data. - """ - if ( - not isinstance(text_color_threshold, (float, int)) - or not 0 <= text_color_threshold <= 1 - ): - msg = "`text_color_threshold` must be a value from 0 to 1." - raise ValueError(msg) - - with _mpl(Styler.background_gradient) as (plt, colors): - smin = np.nanmin(s.to_numpy()) if vmin is None else vmin - smax = np.nanmax(s.to_numpy()) if vmax is None else vmax - rng = smax - smin - # extend lower / upper bounds, compresses color range - norm = colors.Normalize(smin - (rng * low), smax + (rng * high)) - # matplotlib colors.Normalize modifies inplace? - # https://github.com/matplotlib/matplotlib/issues/5427 - rgbas = plt.cm.get_cmap(cmap)(norm(s.to_numpy(dtype=float))) - - def relative_luminance(rgba) -> float: - """ - Calculate relative luminance of a color. - - The calculation adheres to the W3C standards - (https://www.w3.org/WAI/GL/wiki/Relative_luminance) - - Parameters - ---------- - color : rgb or rgba tuple - - Returns - ------- - float - The relative luminance as a value from 0 to 1 - """ - r, g, b = ( - x / 12.92 if x <= 0.04045 else ((x + 0.055) / 1.055) ** 2.4 - for x in rgba[:3] - ) - return 0.2126 * r + 0.7152 * g + 0.0722 * b - - def css(rgba) -> str: - dark = relative_luminance(rgba) < text_color_threshold - text_color = "#f1f1f1" if dark else "#000000" - return f"background-color: {colors.rgb2hex(rgba)};color: {text_color};" - - if s.ndim == 1: - return [css(rgba) for rgba in rgbas] - else: - return DataFrame( - [[css(rgba) for rgba in row] for row in rgbas], - index=s.index, - columns=s.columns, - ) - def set_properties(self, subset=None, **kwargs) -> Styler: """ Set defined CSS-properties to each ```` HTML element within the given @@ -2346,3 +2353,119 @@ def pred(part) -> bool: else: slice_ = [part if pred(part) else [part] for part in slice_] return tuple(slice_) + + +def _validate_apply_axis_arg( + arg: FrameOrSeries | Sequence | np.ndarray, + arg_name: str, + dtype: Any | None, + data: FrameOrSeries, +) -> np.ndarray: + """ + For the apply-type methods, ``axis=None`` creates ``data`` as DataFrame, and for + ``axis=[1,0]`` it creates a Series. Where ``arg`` is expected as an element + of some operator with ``data`` we must make sure that the two are compatible shapes, + or raise. + + Parameters + ---------- + arg : sequence, Series or DataFrame + the user input arg + arg_name : string + name of the arg for use in error messages + dtype : numpy dtype, optional + forced numpy dtype if given + data : Series or DataFrame + underling subset of Styler data on which operations are performed + + Returns + ------- + ndarray + """ + dtype = {"dtype": dtype} if dtype else {} + # raise if input is wrong for axis: + if isinstance(arg, Series) and isinstance(data, DataFrame): + raise ValueError( + f"'{arg_name}' is a Series but underlying data for operations " + f"is a DataFrame since 'axis=None'" + ) + elif isinstance(arg, DataFrame) and isinstance(data, Series): + raise ValueError( + f"'{arg_name}' is a DataFrame but underlying data for " + f"operations is a Series with 'axis in [0,1]'" + ) + elif isinstance(arg, (Series, DataFrame)): # align indx / cols to data + arg = arg.reindex_like(data, method=None).to_numpy(**dtype) + else: + arg = np.asarray(arg, **dtype) + assert isinstance(arg, np.ndarray) # mypy requirement + if arg.shape != data.shape: # check valid input + raise ValueError( + f"supplied '{arg_name}' is not correct shape for data over " + f"selected 'axis': got {arg.shape}, " + f"expected {data.shape}" + ) + return arg + + +def _background_gradient( + data, + cmap="PuBu", + low: float = 0, + high: float = 0, + text_color_threshold: float = 0.408, + vmin: float | None = None, + vmax: float | None = None, + gmap: Sequence | np.ndarray | FrameOrSeries | None = None, +): + """ + Color background in a range according to the data or a gradient map + """ + if gmap is None: # the data is used the gmap + gmap = data.to_numpy(dtype=float) + else: # else validate gmap against the underlying data + gmap = _validate_apply_axis_arg(gmap, "gmap", float, data) + + with _mpl(Styler.background_gradient) as (plt, colors): + smin = np.nanmin(gmap) if vmin is None else vmin + smax = np.nanmax(gmap) if vmax is None else vmax + rng = smax - smin + # extend lower / upper bounds, compresses color range + norm = colors.Normalize(smin - (rng * low), smax + (rng * high)) + rgbas = plt.cm.get_cmap(cmap)(norm(gmap)) + + def relative_luminance(rgba) -> float: + """ + Calculate relative luminance of a color. + + The calculation adheres to the W3C standards + (https://www.w3.org/WAI/GL/wiki/Relative_luminance) + + Parameters + ---------- + color : rgb or rgba tuple + + Returns + ------- + float + The relative luminance as a value from 0 to 1 + """ + r, g, b = ( + x / 12.92 if x <= 0.04045 else ((x + 0.055) / 1.055) ** 2.4 + for x in rgba[:3] + ) + return 0.2126 * r + 0.7152 * g + 0.0722 * b + + def css(rgba) -> str: + dark = relative_luminance(rgba) < text_color_threshold + text_color = "#f1f1f1" if dark else "#000000" + return f"background-color: {colors.rgb2hex(rgba)};color: {text_color};" + + if data.ndim == 1: + return [css(rgba) for rgba in rgbas] + else: + return DataFrame( + [[css(rgba) for rgba in row] for row in rgbas], + index=data.index, + columns=data.columns, + ) diff --git a/pandas/tests/io/formats/style/test_matplotlib.py b/pandas/tests/io/formats/style/test_matplotlib.py index f01e818e40b22..f0158711664ce 100644 --- a/pandas/tests/io/formats/style/test_matplotlib.py +++ b/pandas/tests/io/formats/style/test_matplotlib.py @@ -57,15 +57,6 @@ def test_text_color_threshold(self, cmap, expected): for k in expected.keys(): assert result[k] == expected[k] - @pytest.mark.parametrize("text_color_threshold", [1.1, "1", -1, [2, 2]]) - def test_text_color_threshold_raises(self, text_color_threshold): - df = DataFrame([[1, 2], [2, 4]], columns=["A", "B"]) - msg = "`text_color_threshold` must be a value from 0 to 1." - with pytest.raises(ValueError, match=msg): - df.style.background_gradient( - text_color_threshold=text_color_threshold - )._compute() - def test_background_gradient_axis(self): df = DataFrame([[1, 2], [2, 4]], columns=["A", "B"]) @@ -106,3 +97,131 @@ def test_background_gradient_int64(self): assert ctx2[(0, 0)] == ctx1[(0, 0)] assert ctx2[(1, 0)] == ctx1[(1, 0)] assert ctx2[(2, 0)] == ctx1[(2, 0)] + + @pytest.mark.parametrize( + "axis, gmap, expected", + [ + ( + 0, + [1, 2], + { + (0, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 0): [("background-color", "#023858"), ("color", "#f1f1f1")], + (0, 1): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + }, + ), + ( + 1, + [1, 2], + { + (0, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (0, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + }, + ), + ( + None, + np.array([[2, 1], [1, 2]]), + { + (0, 0): [("background-color", "#023858"), ("color", "#f1f1f1")], + (1, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (0, 1): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + }, + ), + ], + ) + def test_background_gradient_gmap_array(self, axis, gmap, expected): + # tests when gmap is given as a sequence and converted to ndarray + df = DataFrame([[0, 0], [0, 0]]) + result = df.style.background_gradient(axis=axis, gmap=gmap)._compute().ctx + assert result == expected + + @pytest.mark.parametrize( + "gmap, axis", [([1, 2, 3], 0), ([1, 2], 1), (np.array([[1, 2], [1, 2]]), None)] + ) + def test_background_gradient_gmap_array_raises(self, gmap, axis): + # test when gmap as converted ndarray is bad shape + df = DataFrame([[0, 0, 0], [0, 0, 0]]) + msg = "supplied 'gmap' is not correct shape" + with pytest.raises(ValueError, match=msg): + df.style.background_gradient(gmap=gmap, axis=axis)._compute() + + @pytest.mark.parametrize( + "gmap", + [ + DataFrame( # reverse the columns + [[2, 1], [1, 2]], columns=["B", "A"], index=["X", "Y"] + ), + DataFrame( # reverse the index + [[2, 1], [1, 2]], columns=["A", "B"], index=["Y", "X"] + ), + DataFrame( # reverse the index and columns + [[1, 2], [2, 1]], columns=["B", "A"], index=["Y", "X"] + ), + DataFrame( # add unnecessary columns + [[1, 2, 3], [2, 1, 3]], columns=["A", "B", "C"], index=["X", "Y"] + ), + DataFrame( # add unnecessary index + [[1, 2], [2, 1], [3, 3]], columns=["A", "B"], index=["X", "Y", "Z"] + ), + ], + ) + @pytest.mark.parametrize( + "subset, exp_gmap", # exp_gmap is underlying map DataFrame should conform to + [ + (None, [[1, 2], [2, 1]]), + (["A"], [[1], [2]]), # slice only column "A" in data and gmap + (["B", "A"], [[2, 1], [1, 2]]), # reverse the columns in data + (IndexSlice["X", :], [[1, 2]]), # slice only index "X" in data and gmap + (IndexSlice[["Y", "X"], :], [[2, 1], [1, 2]]), # reverse the index in data + ], + ) + def test_background_gradient_gmap_dataframe_align(self, gmap, subset, exp_gmap): + # test gmap given as DataFrame that it aligns to the the data including subset + df = DataFrame([[0, 0], [0, 0]], columns=["A", "B"], index=["X", "Y"]) + + expected = df.style.background_gradient(axis=None, gmap=exp_gmap, subset=subset) + result = df.style.background_gradient(axis=None, gmap=gmap, subset=subset) + assert expected._compute().ctx == result._compute().ctx + + @pytest.mark.parametrize( + "gmap, axis, exp_gmap", + [ + (Series([2, 1], index=["Y", "X"]), 0, [[1, 1], [2, 2]]), # revrse the index + (Series([2, 1], index=["B", "A"]), 1, [[1, 2], [1, 2]]), # revrse the cols + (Series([1, 2, 3], index=["X", "Y", "Z"]), 0, [[1, 1], [2, 2]]), # add idx + (Series([1, 2, 3], index=["A", "B", "C"]), 1, [[1, 2], [1, 2]]), # add col + ], + ) + def test_background_gradient_gmap_series_align(self, gmap, axis, exp_gmap): + # test gmap given as Series that it aligns to the the data including subset + df = DataFrame([[0, 0], [0, 0]], columns=["A", "B"], index=["X", "Y"]) + + expected = df.style.background_gradient(axis=None, gmap=exp_gmap)._compute() + result = df.style.background_gradient(axis=axis, gmap=gmap)._compute() + assert expected.ctx == result.ctx + + @pytest.mark.parametrize( + "gmap, axis", + [ + (DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"]), 1), + (DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"]), 0), + ], + ) + def test_background_gradient_gmap_wrong_dataframe(self, gmap, axis): + # test giving a gmap in DataFrame but with wrong axis + df = DataFrame([[0, 0], [0, 0]], columns=["A", "B"], index=["X", "Y"]) + msg = "'gmap' is a DataFrame but underlying data for operations is a Series" + with pytest.raises(ValueError, match=msg): + df.style.background_gradient(gmap=gmap, axis=axis)._compute() + + def test_background_gradient_gmap_wrong_series(self): + # test giving a gmap in Series form but with wrong axis + df = DataFrame([[0, 0], [0, 0]], columns=["A", "B"], index=["X", "Y"]) + msg = "'gmap' is a Series but underlying data for operations is a DataFrame" + gmap = Series([1, 2], index=["X", "Y"]) + with pytest.raises(ValueError, match=msg): + df.style.background_gradient(gmap=gmap, axis=None)._compute()