From fc0a7a3f2922e51922acf1bc6e33172fc7ca8156 Mon Sep 17 00:00:00 2001 From: attack68 <24256554+attack68@users.noreply.github.com> Date: Wed, 16 Jun 2021 02:22:53 +0200 Subject: [PATCH] Backport PR #41266: API: make `hide_columns` and `hide_index` have a consistent signature and function in `Styler` --- pandas/io/formats/style.py | 173 +++++++++++++++++--- pandas/io/formats/style_render.py | 97 ++++++----- pandas/tests/io/formats/style/test_style.py | 33 +++- 3 files changed, 235 insertions(+), 68 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index c5aba539bd2dd..e4f0825d24828 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -31,7 +31,10 @@ from pandas.util._decorators import doc import pandas as pd -from pandas import RangeIndex +from pandas import ( + IndexSlice, + RangeIndex, +) from pandas.api.types import is_list_like from pandas.core import generic import pandas.core.common as com @@ -682,7 +685,7 @@ def to_latex( self.data.columns = RangeIndex(stop=len(self.data.columns)) numeric_cols = self.data._get_numeric_data().columns.to_list() self.data.columns = _original_columns - column_format = "" if self.hidden_index else "l" * self.data.index.nlevels + column_format = "" if self.hide_index_ else "l" * self.data.index.nlevels for ci, _ in enumerate(self.data.columns): if ci not in self.hidden_columns: column_format += ( @@ -926,7 +929,7 @@ def _copy(self, deepcopy: bool = False) -> Styler: ) styler.uuid = self.uuid - styler.hidden_index = self.hidden_index + styler.hide_index_ = self.hide_index_ if deepcopy: styler.ctx = copy.deepcopy(self.ctx) @@ -965,7 +968,7 @@ def clear(self) -> None: self.cell_context.clear() self._todo.clear() - self.hidden_index = False + self.hide_index_ = False self.hidden_columns = [] # self.format and self.table_styles may be dependent on user # input in self.__init__() @@ -1096,7 +1099,7 @@ def _applymap( ) -> Styler: func = partial(func, **kwargs) # applymap doesn't take kwargs? if subset is None: - subset = pd.IndexSlice[:] + subset = IndexSlice[:] subset = non_reducing_slice(subset) result = self.data.loc[subset].applymap(func) self._update_ctx(result) @@ -1511,37 +1514,169 @@ def set_na_rep(self, na_rep: str) -> StylerRenderer: self.na_rep = na_rep return self.format(na_rep=na_rep, precision=self.precision) - def hide_index(self) -> Styler: + def hide_index(self, subset: Subset | None = None) -> Styler: """ - Hide any indices from rendering. + Hide the entire index, or specific keys in the index from rendering. + + This method has dual functionality: + + - if ``subset`` is ``None`` then the entire index will be hidden whilst + displaying all data-rows. + - if a ``subset`` is given then those specific rows will be hidden whilst the + index itself remains visible. + + .. versionchanged:: 1.3.0 + + Parameters + ---------- + subset : label, array-like, IndexSlice, optional + A valid 1d input or single key along the index axis within + `DataFrame.loc[, :]`, to limit ``data`` to *before* applying + the function. Returns ------- self : Styler + + See Also + -------- + Styler.hide_columns: Hide the entire column headers row, or specific columns. + + Examples + -------- + Simple application hiding specific rows: + + >>> df = pd.DataFrame([[1,2], [3,4], [5,6]], index=["a", "b", "c"]) + >>> df.style.hide_index(["a", "b"]) + 0 1 + c 5 6 + + Hide the index and retain the data values: + + >>> midx = pd.MultiIndex.from_product([["x", "y"], ["a", "b", "c"]]) + >>> df = pd.DataFrame(np.random.randn(6,6), index=midx, columns=midx) + >>> df.style.format("{:.1f}").hide_index() + x y + a b c a b c + 0.1 0.0 0.4 1.3 0.6 -1.4 + 0.7 1.0 1.3 1.5 -0.0 -0.2 + 1.4 -0.8 1.6 -0.2 -0.4 -0.3 + 0.4 1.0 -0.2 -0.8 -1.2 1.1 + -0.6 1.2 1.8 1.9 0.3 0.3 + 0.8 0.5 -0.3 1.2 2.2 -0.8 + + Hide specific rows but retain the index: + + >>> df.style.format("{:.1f}").hide_index(subset=(slice(None), ["a", "c"])) + x y + a b c a b c + x b 0.7 1.0 1.3 1.5 -0.0 -0.2 + y b -0.6 1.2 1.8 1.9 0.3 0.3 + + Hide specific rows and the index: + + >>> df.style.format("{:.1f}").hide_index(subset=(slice(None), ["a", "c"])) + ... .hide_index() + x y + a b c a b c + 0.7 1.0 1.3 1.5 -0.0 -0.2 + -0.6 1.2 1.8 1.9 0.3 0.3 """ - self.hidden_index = True + if subset is None: + self.hide_index_ = True + else: + subset_ = IndexSlice[subset, :] # new var so mypy reads not Optional + subset = non_reducing_slice(subset_) + hide = self.data.loc[subset] + hrows = self.index.get_indexer_for(hide.index) + # error: Incompatible types in assignment (expression has type + # "ndarray", variable has type "Sequence[int]") + self.hidden_rows = hrows # type: ignore[assignment] return self - def hide_columns(self, subset: Subset) -> Styler: + def hide_columns(self, subset: Subset | None = None) -> Styler: """ - Hide columns from rendering. + Hide the column headers or specific keys in the columns from rendering. + + This method has dual functionality: + + - if ``subset`` is ``None`` then the entire column headers row will be hidden + whilst the data-values remain visible. + - if a ``subset`` is given then those specific columns, including the + data-values will be hidden, whilst the column headers row remains visible. + + .. versionchanged:: 1.3.0 Parameters ---------- - subset : label, array-like, IndexSlice - A valid 1d input or single key along the appropriate axis within - `DataFrame.loc[]`, to limit ``data`` to *before* applying the function. + subset : label, array-like, IndexSlice, optional + A valid 1d input or single key along the columns axis within + `DataFrame.loc[:, ]`, to limit ``data`` to *before* applying + the function. Returns ------- self : Styler + + See Also + -------- + Styler.hide_index: Hide the entire index, or specific keys in the index. + + Examples + -------- + Simple application hiding specific columns: + + >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c"]) + >>> df.style.hide_columns(["a", "b"]) + c + 0 3 + 1 6 + + Hide column headers and retain the data values: + + >>> midx = pd.MultiIndex.from_product([["x", "y"], ["a", "b", "c"]]) + >>> df = pd.DataFrame(np.random.randn(6,6), index=midx, columns=midx) + >>> df.style.format("{:.1f}").hide_columns() + x d 0.1 0.0 0.4 1.3 0.6 -1.4 + e 0.7 1.0 1.3 1.5 -0.0 -0.2 + f 1.4 -0.8 1.6 -0.2 -0.4 -0.3 + y d 0.4 1.0 -0.2 -0.8 -1.2 1.1 + e -0.6 1.2 1.8 1.9 0.3 0.3 + f 0.8 0.5 -0.3 1.2 2.2 -0.8 + + Hide specific columns but retain the column headers: + + >>> df.style.format("{:.1f}").hide_columns(subset=(slice(None), ["a", "c"])) + x y + b b + x a 0.0 0.6 + b 1.0 -0.0 + c -0.8 -0.4 + y a 1.0 -1.2 + b 1.2 0.3 + c 0.5 2.2 + + Hide specific columns and the column headers: + + >>> df.style.format("{:.1f}").hide_columns(subset=(slice(None), ["a", "c"])) + ... .hide_columns() + x a 0.0 0.6 + b 1.0 -0.0 + c -0.8 -0.4 + y a 1.0 -1.2 + b 1.2 0.3 + c 0.5 2.2 """ - subset = non_reducing_slice(subset) - hidden_df = self.data.loc[subset] - hcols = self.columns.get_indexer_for(hidden_df.columns) - # error: Incompatible types in assignment (expression has type - # "ndarray", variable has type "Sequence[int]") - self.hidden_columns = hcols # type: ignore[assignment] + if subset is None: + self.hide_columns_ = True + else: + subset_ = IndexSlice[:, subset] # new var so mypy reads not Optional + subset = non_reducing_slice(subset_) + hide = self.data.loc[subset] + hcols = self.columns.get_indexer_for(hide.columns) + # error: Incompatible types in assignment (expression has type + # "ndarray", variable has type "Sequence[int]") + self.hidden_columns = hcols # type: ignore[assignment] return self # ----------------------------------------------------------------------- diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 7686d8a340c37..514597d27a92b 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -97,7 +97,9 @@ def __init__( self.cell_ids = cell_ids # add rendering variables - self.hidden_index: bool = False + self.hide_index_: bool = False # bools for hiding col/row headers + self.hide_columns_: bool = False + self.hidden_rows: Sequence[int] = [] # sequence for specific hidden rows/cols self.hidden_columns: Sequence[int] = [] self.ctx: DefaultDict[tuple[int, int], CSSList] = defaultdict(list) self.cell_context: DefaultDict[tuple[int, int], str] = defaultdict(str) @@ -297,55 +299,56 @@ def _translate_header( head = [] # 1) column headers - for r in range(self.data.columns.nlevels): - index_blanks = [ - _element("th", blank_class, blank_value, not self.hidden_index) - ] * (self.data.index.nlevels - 1) - - name = self.data.columns.names[r] - column_name = [ - _element( - "th", - f"{blank_class if name is None else index_name_class} level{r}", - name if name is not None else blank_value, - not self.hidden_index, - ) - ] - - if clabels: - column_headers = [ + if not self.hide_columns_: + for r in range(self.data.columns.nlevels): + index_blanks = [ + _element("th", blank_class, blank_value, not self.hide_index_) + ] * (self.data.index.nlevels - 1) + + name = self.data.columns.names[r] + column_name = [ _element( "th", - f"{col_heading_class} level{r} col{c}", - value, - _is_visible(c, r, col_lengths), - attributes=( - f'colspan="{col_lengths.get((r, c), 0)}"' - if col_lengths.get((r, c), 0) > 1 - else "" - ), + f"{blank_class if name is None else index_name_class} level{r}", + name if name is not None else blank_value, + not self.hide_index_, ) - for c, value in enumerate(clabels[r]) ] - if len(self.data.columns) > max_cols: - # add an extra column with `...` value to indicate trimming - column_headers.append( + if clabels: + column_headers = [ _element( "th", - f"{col_heading_class} level{r} {trimmed_col_class}", - "...", - True, - attributes="", + f"{col_heading_class} level{r} col{c}", + value, + _is_visible(c, r, col_lengths), + attributes=( + f'colspan="{col_lengths.get((r, c), 0)}"' + if col_lengths.get((r, c), 0) > 1 + else "" + ), ) - ) - head.append(index_blanks + column_name + column_headers) + for c, value in enumerate(clabels[r]) + ] + + if len(self.data.columns) > max_cols: + # add an extra column with `...` value to indicate trimming + column_headers.append( + _element( + "th", + f"{col_heading_class} level{r} {trimmed_col_class}", + "...", + True, + attributes="", + ) + ) + head.append(index_blanks + column_name + column_headers) # 2) index names if ( self.data.index.names and com.any_not_none(*self.data.index.names) - and not self.hidden_index + and not self.hide_index_ ): index_names = [ _element( @@ -411,7 +414,9 @@ def _translate_body( The associated HTML elements needed for template rendering. """ # for sparsifying a MultiIndex - idx_lengths = _get_level_lengths(self.index, sparsify_index, max_rows) + idx_lengths = _get_level_lengths( + self.index, sparsify_index, max_rows, self.hidden_rows + ) rlabels = self.data.index.tolist()[:max_rows] # slice to allow trimming if self.data.index.nlevels == 1: @@ -425,7 +430,7 @@ def _translate_body( "th", f"{row_heading_class} level{c} {trimmed_row_class}", "...", - not self.hidden_index, + not self.hide_index_, attributes="", ) for c in range(self.data.index.nlevels) @@ -462,7 +467,7 @@ def _translate_body( "th", f"{row_heading_class} level{c} row{r}", value, - (_is_visible(r, c, idx_lengths) and not self.hidden_index), + (_is_visible(r, c, idx_lengths) and not self.hide_index_), id=f"level{c}_row{r}", attributes=( f'rowspan="{idx_lengths.get((c, r), 0)}"' @@ -496,7 +501,7 @@ def _translate_body( "td", f"{data_class} row{r} col{c}{cls}", value, - (c not in self.hidden_columns), + (c not in self.hidden_columns and r not in self.hidden_rows), attributes="", display_value=self._display_funcs[(r, c)](value), ) @@ -527,7 +532,7 @@ def _translate_latex(self, d: dict) -> None: d["head"] = [[col for col in row if col["is_visible"]] for row in d["head"]] body = [] for r, row in enumerate(d["body"]): - if self.hidden_index: + if self.hide_index_: row_body_headers = [] else: row_body_headers = [ @@ -842,7 +847,13 @@ def _get_level_lengths( last_label = j lengths[(i, last_label)] = 0 elif j not in hidden_elements: - lengths[(i, last_label)] += 1 + if lengths[(i, last_label)] == 0: + # if the previous iteration was first-of-kind but hidden then offset + last_label = j + lengths[(i, last_label)] = 1 + else: + # else add to previous iteration + lengths[(i, last_label)] += 1 non_zero_lengths = { element: length for element, length in lengths.items() if length >= 1 diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 281170ab6c7cb..0516aa6029487 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -221,7 +221,7 @@ def test_copy(self, do_changes, do_render): [{"selector": "th", "props": [("foo", "bar")]}] ) self.styler.set_table_attributes('class="foo" data-bar') - self.styler.hidden_index = not self.styler.hidden_index + self.styler.hide_index_ = not self.styler.hide_index_ self.styler.hide_columns("A") classes = DataFrame( [["favorite-val red", ""], [None, "blue my-val"]], @@ -292,7 +292,7 @@ def test_copy(self, do_changes, do_render): "table_styles", "table_attributes", "cell_ids", - "hidden_index", + "hide_index_", "hidden_columns", "cell_context", ] @@ -317,7 +317,7 @@ def test_clear(self): assert len(s._todo) > 0 assert s.tooltips assert len(s.cell_context) > 0 - assert s.hidden_index is True + assert s.hide_index_ is True assert len(s.hidden_columns) > 0 s = s._compute() @@ -331,7 +331,7 @@ def test_clear(self): assert len(s._todo) == 0 assert not s.tooltips assert len(s.cell_context) == 0 - assert s.hidden_index is False + assert s.hide_index_ is False assert len(s.hidden_columns) == 0 def test_render(self): @@ -1127,6 +1127,14 @@ def test_mi_sparse_column_names(self): ] assert head == expected + def test_hide_column_headers(self): + ctx = self.styler.hide_columns()._translate(True, True) + assert len(ctx["head"]) == 0 # no header entries with an unnamed index + + self.df.index.name = "some_name" + ctx = self.df.style.hide_columns()._translate(True, True) + assert len(ctx["head"]) == 1 # only a single row for index names: no col heads + def test_hide_single_index(self): # GH 14194 # single unnamed index @@ -1195,7 +1203,7 @@ def test_hide_columns_single_level(self): assert not ctx["body"][0][1]["is_visible"] # col A, row 1 assert not ctx["body"][1][2]["is_visible"] # col B, row 1 - def test_hide_columns_mult_levels(self): + def test_hide_columns_index_mult_levels(self): # GH 14194 # setup dataframe with multiple column levels and indices i1 = MultiIndex.from_arrays( @@ -1227,7 +1235,8 @@ def test_hide_columns_mult_levels(self): # hide first column only ctx = df.style.hide_columns([("b", 0)])._translate(True, True) - assert ctx["head"][0][2]["is_visible"] # b + assert not ctx["head"][0][2]["is_visible"] # b + assert ctx["head"][0][3]["is_visible"] # b assert not ctx["head"][1][2]["is_visible"] # 0 assert not ctx["body"][1][2]["is_visible"] # 3 assert ctx["body"][1][3]["is_visible"] @@ -1243,6 +1252,18 @@ def test_hide_columns_mult_levels(self): assert ctx["body"][1][2]["is_visible"] assert ctx["body"][1][2]["display_value"] == 3 + # hide top row level, which hides both rows + ctx = df.style.hide_index("a")._translate(True, True) + for i in [0, 1, 2, 3]: + assert not ctx["body"][0][i]["is_visible"] + assert not ctx["body"][1][i]["is_visible"] + + # hide first row only + ctx = df.style.hide_index(("a", 0))._translate(True, True) + for i in [0, 1, 2, 3]: + assert not ctx["body"][0][i]["is_visible"] + assert ctx["body"][1][i]["is_visible"] + def test_pipe(self): def set_caption_from_template(styler, a, b): return styler.set_caption(f"Dataframe with a = {a} and b = {b}")