From c31b0b2fe99ffff560c74a7f7670dccdcca6febe Mon Sep 17 00:00:00 2001 From: bherwerth <108834862+bherwerth@users.noreply.github.com> Date: Mon, 25 Jul 2022 11:58:24 +0000 Subject: [PATCH 1/5] TST: Add test for defaults arg 'subset' --- pandas/tests/io/formats/style/test_style.py | 66 +++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 23b05c8242274..52b5cbe400bca 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -1,4 +1,5 @@ import copy +from itertools import combinations import re from textwrap import dedent @@ -461,6 +462,31 @@ def test_apply_map_header_raises(mi_styler): mi_styler.applymap_index(lambda v: "attr: val;", axis="bad")._compute() +def _gen_test_cases_subset_defaults(df_test_subset_defaults, styled_cols_expected): + "generate list of test cases from all subsets of columns of `df`" + # Iterate over all possible column in df + for n_cols in range(1, df_test_subset_defaults.shape[1] + 1): + for input_cols in combinations(df_test_subset_defaults.columns, n_cols): + # Use 'isin' to work around the difficulty with indexing + # when a column label is boolean + df_test_case = df_test_subset_defaults.loc[ + :, df_test_subset_defaults.columns.isin(input_cols) + ] + styled_cols_expected_subset = list( + set(styled_cols_expected).intersection(df_test_case) + ) + + styled_elements_expected = { + (i, j) + for i in range(df_test_case.shape[0]) + for j in np.where( + df_test_case.columns.isin(styled_cols_expected_subset) + )[0] + } + + yield df_test_case, styled_elements_expected + + class TestStyler: def test_init_non_pandas(self): msg = "``data`` must be a Series or DataFrame" @@ -645,6 +671,46 @@ def test_apply_dataframe_return(self, index, columns): assert (result[(1, 0)] == [("color", "red")]) is columns # (Y,X) only if cols assert (result[(0, 0)] == [("color", "red")]) is (index and columns) # (X,X) + @pytest.mark.parametrize( + # styled_elements_expected is a set of tuples (i, j) with integer indices, + # where we expect .ctx to be set after calling _compute on the styler + "df_test_case, styled_elements_expected", + ( + list( + _gen_test_cases_subset_defaults( + DataFrame( + { + True: [1, 2], + False: [3, 4], + "num_column": [5, 6], + "non_num_column": ["a", "b"], + } + ), + [True, False, "num_column"], + ) + ) + + list( + _gen_test_cases_subset_defaults( + DataFrame( + { + True: ["a", "b"], + False: [3, 4], + "num_column": [5, 6], + } + ), + [False, "num_column"], + ) + ) + ), + ) + @pytest.mark.parametrize( + "stylefunc", ["background_gradient", "bar", "text_gradient"] + ) + def test_subset_defaults(self, df_test_case, styled_elements_expected, stylefunc): + styled = getattr(df_test_case.style, stylefunc)() + styled._compute() + assert set(styled.ctx) == set(styled_elements_expected) + @pytest.mark.parametrize( "slice_", [ From c2f24ac83653c2712ae47c0509884b9c76983089 Mon Sep 17 00:00:00 2001 From: bherwerth <108834862+bherwerth@users.noreply.github.com> Date: Mon, 25 Jul 2022 12:10:01 +0000 Subject: [PATCH 2/5] BUG: Fix GH47838 by using boolean mask as default for 'subset' --- pandas/io/formats/style.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index fbee64771cd9a..93a6e62100b3e 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -2977,6 +2977,9 @@ def hide( # A collection of "builtin" styles # ----------------------------------------------------------------------- + def _get_subset_default(self): + return self.data.columns.isin(self.data.select_dtypes(include=np.number)) + @doc( name="background", alt="text", @@ -3120,7 +3123,7 @@ def background_gradient( .. figure:: ../../_static/style/{image_prefix}_axNone_gmap.png """ if subset is None and gmap is None: - subset = self.data.select_dtypes(include=np.number).columns + subset = self._get_subset_default() self.apply( _background_gradient, @@ -3155,7 +3158,7 @@ def text_gradient( gmap: Sequence | None = None, ) -> Styler: if subset is None and gmap is None: - subset = self.data.select_dtypes(include=np.number).columns + subset = self._get_subset_default() return self.apply( _background_gradient, @@ -3308,7 +3311,7 @@ def bar( raise ValueError(f"`height` must be a value in [0, 100], got {height}") if subset is None: - subset = self.data.select_dtypes(include=np.number).columns + subset = self._get_subset_default() self.apply( _bar, From ed4e8d87b3d40d1a9a2398225870135e692f9d50 Mon Sep 17 00:00:00 2001 From: bherwerth <108834862+bherwerth@users.noreply.github.com> Date: Tue, 26 Jul 2022 11:54:01 +0000 Subject: [PATCH 3/5] TST: Reduce tests to one dataframe with boolean columns --- pandas/tests/io/formats/style/test_style.py | 82 ++++----------------- 1 file changed, 16 insertions(+), 66 deletions(-) diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 52b5cbe400bca..192fec048a930 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -1,5 +1,4 @@ import copy -from itertools import combinations import re from textwrap import dedent @@ -462,31 +461,6 @@ def test_apply_map_header_raises(mi_styler): mi_styler.applymap_index(lambda v: "attr: val;", axis="bad")._compute() -def _gen_test_cases_subset_defaults(df_test_subset_defaults, styled_cols_expected): - "generate list of test cases from all subsets of columns of `df`" - # Iterate over all possible column in df - for n_cols in range(1, df_test_subset_defaults.shape[1] + 1): - for input_cols in combinations(df_test_subset_defaults.columns, n_cols): - # Use 'isin' to work around the difficulty with indexing - # when a column label is boolean - df_test_case = df_test_subset_defaults.loc[ - :, df_test_subset_defaults.columns.isin(input_cols) - ] - styled_cols_expected_subset = list( - set(styled_cols_expected).intersection(df_test_case) - ) - - styled_elements_expected = { - (i, j) - for i in range(df_test_case.shape[0]) - for j in np.where( - df_test_case.columns.isin(styled_cols_expected_subset) - )[0] - } - - yield df_test_case, styled_elements_expected - - class TestStyler: def test_init_non_pandas(self): msg = "``data`` must be a Series or DataFrame" @@ -671,46 +645,6 @@ def test_apply_dataframe_return(self, index, columns): assert (result[(1, 0)] == [("color", "red")]) is columns # (Y,X) only if cols assert (result[(0, 0)] == [("color", "red")]) is (index and columns) # (X,X) - @pytest.mark.parametrize( - # styled_elements_expected is a set of tuples (i, j) with integer indices, - # where we expect .ctx to be set after calling _compute on the styler - "df_test_case, styled_elements_expected", - ( - list( - _gen_test_cases_subset_defaults( - DataFrame( - { - True: [1, 2], - False: [3, 4], - "num_column": [5, 6], - "non_num_column": ["a", "b"], - } - ), - [True, False, "num_column"], - ) - ) - + list( - _gen_test_cases_subset_defaults( - DataFrame( - { - True: ["a", "b"], - False: [3, 4], - "num_column": [5, 6], - } - ), - [False, "num_column"], - ) - ) - ), - ) - @pytest.mark.parametrize( - "stylefunc", ["background_gradient", "bar", "text_gradient"] - ) - def test_subset_defaults(self, df_test_case, styled_elements_expected, stylefunc): - styled = getattr(df_test_case.style, stylefunc)() - styled._compute() - assert set(styled.ctx) == set(styled_elements_expected) - @pytest.mark.parametrize( "slice_", [ @@ -812,6 +746,22 @@ def color_negative_red(val): df.loc[pct_subset] df.style.applymap(color_negative_red, subset=pct_subset) + @pytest.mark.parametrize( + "stylefunc", ["background_gradient", "bar", "text_gradient"] + ) + def test_subset_for_boolean_cols(self, stylefunc): + # GH47838 + df = DataFrame( + [ + [1, 2], + [3, 4], + ], + columns=[False, True], + ) + styled = getattr(df.style, stylefunc)() + styled._compute() + assert set(styled.ctx) == {(0, 0), (0, 1), (1, 0), (1, 1)} + def test_empty(self): df = DataFrame({"A": [1, 0]}) s = df.style From c51d7ea1ec49c6baa4cf90cb17a0457b2c80dd1d Mon Sep 17 00:00:00 2001 From: bherwerth <108834862+bherwerth@users.noreply.github.com> Date: Tue, 26 Jul 2022 12:04:21 +0000 Subject: [PATCH 4/5] CLN: Rename method and add comment --- pandas/io/formats/style.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 93a6e62100b3e..f19c4d04f059e 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -2977,7 +2977,10 @@ def hide( # A collection of "builtin" styles # ----------------------------------------------------------------------- - def _get_subset_default(self): + def _get_numeric_subset_default(self): + # Returns a boolean mask indicating where `self.data` has numerical columns. + # Choosing a mask as opposed to the column names also works for + # boolean column labels (GH47838). return self.data.columns.isin(self.data.select_dtypes(include=np.number)) @doc( @@ -3123,7 +3126,7 @@ def background_gradient( .. figure:: ../../_static/style/{image_prefix}_axNone_gmap.png """ if subset is None and gmap is None: - subset = self._get_subset_default() + subset = self._get_numeric_subset_default() self.apply( _background_gradient, @@ -3158,7 +3161,7 @@ def text_gradient( gmap: Sequence | None = None, ) -> Styler: if subset is None and gmap is None: - subset = self._get_subset_default() + subset = self._get_numeric_subset_default() return self.apply( _background_gradient, @@ -3311,7 +3314,7 @@ def bar( raise ValueError(f"`height` must be a value in [0, 100], got {height}") if subset is None: - subset = self._get_subset_default() + subset = self._get_numeric_subset_default() self.apply( _bar, From b1062bf9b1054790fb44ee51d3a38715c73494cc Mon Sep 17 00:00:00 2001 From: bherwerth <108834862+bherwerth@users.noreply.github.com> Date: Tue, 26 Jul 2022 12:22:41 +0000 Subject: [PATCH 5/5] DOC: Add comment to 'whatsnew' --- doc/source/whatsnew/v1.5.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 7f07187e34c78..cc44f43ba8acf 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -1042,6 +1042,7 @@ Styler - Bug in :meth:`Styler.set_sticky` leading to white text on white background in dark mode (:issue:`46984`) - Bug in :meth:`Styler.to_latex` causing ``UnboundLocalError`` when ``clines="all;data"`` and the ``DataFrame`` has no rows. (:issue:`47203`) - Bug in :meth:`Styler.to_excel` when using ``vertical-align: middle;`` with ``xlsxwriter`` engine (:issue:`30107`) +- Bug when applying styles to a DataFrame with boolean column labels (:issue:`47838`) Metadata ^^^^^^^^