Skip to content

API: make hide_columns and hide_index have a consistent signature and function in Styler #41266

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 33 commits into from
Jun 16, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
b0bfab7
ENH: make hide_index and hide_columns consistent in functionality
attack68 May 2, 2021
c615b08
DOC: improve specificity
attack68 May 2, 2021
38b7407
TST: add tests for hide_index(subset=)
attack68 May 2, 2021
40504db
TST: add tests for hide_columns(subset=None)
attack68 May 2, 2021
5199248
Merge remote-tracking branch 'upstream/master' into hiding_data_colum…
attack68 May 2, 2021
fc423dc
Merge remote-tracking branch 'upstream/master' into hiding_data_colum…
attack68 May 5, 2021
67598a4
remove show option
attack68 May 5, 2021
f05757a
remove show option
attack68 May 5, 2021
d870dc6
Merge remote-tracking branch 'upstream/master' into hiding_data_colum…
attack68 May 6, 2021
1dfad74
Merge remote-tracking branch 'upstream/master' into hiding_data_colum…
attack68 May 12, 2021
57aa0fc
type subset
attack68 May 12, 2021
0f05bc1
Merge remote-tracking branch 'upstream/master' into hiding_data_colum…
attack68 May 18, 2021
515707d
docstring for subset
attack68 May 20, 2021
fa8ecee
simpler examples
attack68 May 20, 2021
0704210
nomenclature: hide_columns_
attack68 May 20, 2021
cb58422
nomenclature: hide_index_
attack68 May 20, 2021
4d9626d
nomenclature: comments
attack68 May 20, 2021
461ad30
Merge remote-tracking branch 'upstream/master' into hiding_data_colum…
attack68 May 20, 2021
54bb183
Merge remote-tracking branch 'upstream/master' into hiding_data_colum…
attack68 May 22, 2021
c6c1086
merge upstream master
attack68 May 22, 2021
16a4da3
fix spelling
attack68 May 23, 2021
f890888
Merge remote-tracking branch 'upstream/master' into hiding_data_colum…
attack68 May 24, 2021
ec5817c
adjust to_lext components after recent merge
attack68 May 24, 2021
6da4e1f
Merge remote-tracking branch 'upstream/master' into hiding_data_colum…
attack68 May 26, 2021
35c6b65
Merge remote-tracking branch 'upstream/master' into hiding_data_colum…
attack68 May 29, 2021
7d55fc4
Merge remote-tracking branch 'upstream/master' into hiding_data_colum…
attack68 Jun 1, 2021
d51a3d7
Merge remote-tracking branch 'upstream/master' into hiding_data_colum…
attack68 Jun 3, 2021
8095415
Merge remote-tracking branch 'upstream/master' into hiding_data_colum…
attack68 Jun 5, 2021
0921804
merge upstream master
attack68 Jun 5, 2021
5e0cc0f
Merge remote-tracking branch 'upstream/master' into hiding_data_colum…
attack68 Jun 5, 2021
a6f811b
Merge remote-tracking branch 'upstream/master' into hiding_data_colum…
attack68 Jun 9, 2021
dca039d
Merge remote-tracking branch 'upstream/master' into hiding_data_colum…
attack68 Jun 11, 2021
b309358
Merge branch 'rls1.3.0' into hiding_data_columns_and_index
attack68 Jun 15, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 158 additions & 14 deletions pandas/io/formats/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from pandas.util._decorators import doc

import pandas as pd
from pandas import IndexSlice
from pandas.api.types import is_list_like
from pandas.core import generic
import pandas.core.common as com
Expand Down Expand Up @@ -640,7 +641,7 @@ def apply(
def _applymap(self, func: Callable, subset=None, **kwargs) -> Styler:
func = partial(func, **kwargs) # applymap doesn't take kwargs?
if subset is None:
subset = pd.IndexSlice[:]
subset = IndexSlice[:]
subset = non_reducing_slice(subset)
result = self.data.loc[subset].applymap(func)
self._update_ctx(result)
Expand Down Expand Up @@ -1035,37 +1036,180 @@ def set_na_rep(self, na_rep: str) -> StylerRenderer:
self.na_rep = na_rep
return self.format(na_rep=na_rep, precision=self.precision)

def hide_index(self) -> Styler:
def hide_index(self, subset=None, show: bool = False) -> Styler:
"""
Hide any indices from rendering.
Hide the entire index, or specific keys in the index from rendering.

This method has dual functionality:

- if ``subset`` is ``None`` then the entire index will be hidden whilst
displaying all data-rows.
- if a ``subset`` is given then those specific rows will be hidden whilst the
index itself remains visible.

Parameters
----------
subset : IndexSlice
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is pretty confusing, why is this not just a list-like (of row labels)? e.g. similar to the argument for .dropna() for example

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changed. pls review

An argument to ``DataFrame.loc[subset, :]``, i.e. along the index, that
identifies which index key rows will be hidden.
show : bool
Indicates whether the function `hides` the index or selected rows,
by default, or operates inversely by exclusively showing them.

Returns
-------
self : Styler

See Also
--------
Styler.hide_columns: Hide the entire column headers row, or specific columns.

Examples
--------
Hide the index and retain the data values:

>>> midx = pd.MultiIndex.from_product([["x", "y"], ["a", "b", "c"]])
>>> df = pd.DataFrame(np.random.randn(6,6), index=midx, columns=midx)
>>> df.style.format("{:.1f}").hide_index()
x y
a b c a b c
0.1 0.0 0.4 1.3 0.6 -1.4
0.7 1.0 1.3 1.5 -0.0 -0.2
1.4 -0.8 1.6 -0.2 -0.4 -0.3
0.4 1.0 -0.2 -0.8 -1.2 1.1
-0.6 1.2 1.8 1.9 0.3 0.3
0.8 0.5 -0.3 1.2 2.2 -0.8

Hide specific rows but retain the index:

>>> df.style.format("{:.1f}").hide_index(subset=(slice(None), ["a", "c"]))
x y
a b c a b c
x b 0.7 1.0 1.3 1.5 -0.0 -0.2
y b -0.6 1.2 1.8 1.9 0.3 0.3

Hide specific rows and the index:

>>> df.style.format("{:.1f}").hide_index(subset=(slice(None), ["a", "c"]))
... .hide_index()
x y
a b c a b c
0.7 1.0 1.3 1.5 -0.0 -0.2
-0.6 1.2 1.8 1.9 0.3 0.3

Exclusively show specific rows:

>>> df.style.format("{:.1f}")
... .hide_index(subset=(slice(None), ["b"]), show=True)
x y
a b c a b c
x b 0.7 1.0 1.3 1.5 -0.0 -0.2
y b -0.6 1.2 1.8 1.9 0.3 0.3
"""
self.hidden_index = True
if subset is None:
self.hidden_index = not show
else:
subset = IndexSlice[subset, :]
subset = non_reducing_slice(subset)
hide = self.data.loc[subset]
if show: # invert the display
hide = self.data.loc[~self.data.index.isin(hide.index.to_list()), :]
hrows = self.index.get_indexer_for(hide.index)
# error: Incompatible types in assignment (expression has type
# "ndarray", variable has type "Sequence[int]")
self.hidden_rows = hrows # type: ignore[assignment]
return self

def hide_columns(self, subset) -> Styler:
def hide_columns(self, subset=None, show: bool = False) -> Styler:
"""
Hide columns from rendering.
Hide the column headers or specific keys in the columns from rendering.

This method has dual functionality:

- if ``subset`` is ``None`` then the entire column headers row will be hidden
whilst the data-values remain visible.
- if a ``subset`` is given then those specific columns, including the
data-values will be hidden, whilst the column headers row remains visible.

Parameters
----------
subset : IndexSlice
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment as above

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

now I wouldn't object to also allowing an IndexSlice here (in addition to a list-like of columns labels)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changed. pls review

An argument to ``DataFrame.loc`` that identifies which columns
are hidden.
An argument to ``DataFrame.loc[:, subset]``, i.e. along the columns, that
identifies which columns keys will be hidden.
show : bool
Indicates whether the function `hides` the columns headers or selected
columns, by default, or operates inversely by exclusively showing them.

Returns
-------
self : Styler

See Also
--------
Styler.hide_index: Hide the entire index, or specific keys in the index.

Examples
--------
Hide column headers and retain the data values:

>>> midx = pd.MultiIndex.from_product([["x", "y"], ["a", "b", "c"]])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you show an example first that has a single level index.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added

>>> df = pd.DataFrame(np.random.randn(6,6), index=midx, columns=midx)
>>> df.style.format("{:.1f}").hide_columns()
x d 0.1 0.0 0.4 1.3 0.6 -1.4
e 0.7 1.0 1.3 1.5 -0.0 -0.2
f 1.4 -0.8 1.6 -0.2 -0.4 -0.3
y d 0.4 1.0 -0.2 -0.8 -1.2 1.1
e -0.6 1.2 1.8 1.9 0.3 0.3
f 0.8 0.5 -0.3 1.2 2.2 -0.8

Hide specific columns but retain the column headers:

>>> df.style.format("{:.1f}").hide_columns(subset=(slice(None), ["a", "c"]))
x y
b b
x a 0.0 0.6
b 1.0 -0.0
c -0.8 -0.4
y a 1.0 -1.2
b 1.2 0.3
c 0.5 2.2

Hide specific columns and the column headers:

>>> df.style.format("{:.1f}").hide_columns(subset=(slice(None), ["a", "c"]))
... .hide_columns()
x a 0.0 0.6
b 1.0 -0.0
c -0.8 -0.4
y a 1.0 -1.2
b 1.2 0.3
c 0.5 2.2

Exclusively show specific columns:

>>> df.style.format("{:.1f}")
... .hide_columns(subset=(slice(None), ["b"]), show=True)
x y
b b
x a 0.0 0.6
b 1.0 -0.0
c -0.8 -0.4
y a 1.0 -1.2
b 1.2 0.3
c 0.5 2.2
"""
subset = non_reducing_slice(subset)
hidden_df = self.data.loc[subset]
hcols = self.columns.get_indexer_for(hidden_df.columns)
# error: Incompatible types in assignment (expression has type
# "ndarray", variable has type "Sequence[int]")
self.hidden_columns = hcols # type: ignore[assignment]
if subset is None:
self.hidden_colheads = not show
else:
subset = IndexSlice[:, subset]
subset = non_reducing_slice(subset)
hide = self.data.loc[subset]
if show: # invert the display
hide = self.data.loc[:, ~self.data.columns.isin(hide.columns.to_list())]
hcols = self.columns.get_indexer_for(hide.columns)
# error: Incompatible types in assignment (expression has type
# "ndarray", variable has type "Sequence[int]")
self.hidden_columns = hcols # type: ignore[assignment]
return self

# -----------------------------------------------------------------------
Expand Down
69 changes: 39 additions & 30 deletions pandas/io/formats/style_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ def __init__(

# add rendering variables
self.hidden_index: bool = False
self.hidden_rows: Sequence[int] = []
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you change the impl to use the same nomenclature, e.g. hidden_index, hidden_columns (or similar)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you give your input to the suggestion below:

self.hide_index_: bool
self.hide_columns_: bool
self.hidden_indexes: Sequence[int]
self.hidden_columns: Sequence[int]

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated (used rows instead of indexes)

self.hidden_colheads: bool = False
self.hidden_columns: Sequence[int] = []
self.ctx: DefaultDict[tuple[int, int], CSSList] = defaultdict(list)
self.cell_context: DefaultDict[tuple[int, int], str] = defaultdict(str)
Expand Down Expand Up @@ -209,37 +211,38 @@ def _translate_header(

head = []
# 1) column headers
for r in range(self.data.columns.nlevels):
index_blanks = [
_element("th", blank_class, blank_value, not self.hidden_index)
] * (self.data.index.nlevels - 1)

name = self.data.columns.names[r]
column_name = [
_element(
"th",
f"{blank_class if name is None else index_name_class} level{r}",
name if name is not None else blank_value,
not self.hidden_index,
)
]

if clabels:
column_headers = [
if not self.hidden_colheads:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all that was changed here was to add if not self.hidden_colheads:, the other additions and removals in comparison window are misleading.

for r in range(self.data.columns.nlevels):
index_blanks = [
_element("th", blank_class, blank_value, not self.hidden_index)
] * (self.data.index.nlevels - 1)

name = self.data.columns.names[r]
column_name = [
_element(
"th",
f"{col_heading_class} level{r} col{c}",
value,
_is_visible(c, r, col_lengths),
attributes=(
f'colspan="{col_lengths.get((r, c), 0)}"'
if col_lengths.get((r, c), 0) > 1
else ""
),
f"{blank_class if name is None else index_name_class} level{r}",
name if name is not None else blank_value,
not self.hidden_index,
)
for c, value in enumerate(clabels[r])
]
head.append(index_blanks + column_name + column_headers)

if clabels:
column_headers = [
_element(
"th",
f"{col_heading_class} level{r} col{c}",
value,
_is_visible(c, r, col_lengths),
attributes=(
f'colspan="{col_lengths.get((r, c), 0)}"'
if col_lengths.get((r, c), 0) > 1
else ""
),
)
for c, value in enumerate(clabels[r])
]
head.append(index_blanks + column_name + column_headers)

# 2) index names
if (
Expand Down Expand Up @@ -281,7 +284,7 @@ def _translate_body(self, data_class, row_heading_class):
<style></style> block
"""
# for sparsifying a MultiIndex
idx_lengths = _get_level_lengths(self.index)
idx_lengths = _get_level_lengths(self.index, self.hidden_rows)

rlabels = self.data.index.tolist()
if self.data.index.nlevels == 1:
Expand Down Expand Up @@ -316,7 +319,7 @@ def _translate_body(self, data_class, row_heading_class):
"td",
f"{data_class} row{r} col{c}{cls}",
value,
(c not in self.hidden_columns),
(c not in self.hidden_columns and r not in self.hidden_rows),
attributes="",
display_value=self._display_funcs[(r, c)](value),
)
Expand Down Expand Up @@ -565,7 +568,13 @@ def _get_level_lengths(index, hidden_elements=None):
last_label = j
lengths[(i, last_label)] = 0
elif j not in hidden_elements:
lengths[(i, last_label)] += 1
if lengths[(i, last_label)] == 0:
# if the previous iteration was first-of-kind but hidden then offset
last_label = j
lengths[(i, last_label)] = 1
else:
# else add to previous iteration
lengths[(i, last_label)] += 1

non_zero_lengths = {
element: length for element, length in lengths.items() if length >= 1
Expand Down
25 changes: 23 additions & 2 deletions pandas/tests/io/formats/style/test_style.py
Original file line number Diff line number Diff line change
Expand Up @@ -1052,6 +1052,14 @@ def test_mi_sparse_column_names(self):
]
assert head == expected

def test_hide_column_headers(self):
ctx = self.styler.hide_columns()._translate()
assert len(ctx["head"]) == 0 # no header entries with an unnamed index

self.df.index.name = "some_name"
ctx = self.df.style.hide_columns()._translate()
assert len(ctx["head"]) == 1 # only a single row for index names: no col heads

def test_hide_single_index(self):
# GH 14194
# single unnamed index
Expand Down Expand Up @@ -1120,7 +1128,7 @@ def test_hide_columns_single_level(self):
assert not ctx["body"][0][1]["is_visible"] # col A, row 1
assert not ctx["body"][1][2]["is_visible"] # col B, row 1

def test_hide_columns_mult_levels(self):
def test_hide_columns_index_mult_levels(self):
# GH 14194
# setup dataframe with multiple column levels and indices
i1 = pd.MultiIndex.from_arrays(
Expand Down Expand Up @@ -1152,7 +1160,8 @@ def test_hide_columns_mult_levels(self):

# hide first column only
ctx = df.style.hide_columns([("b", 0)])._translate()
assert ctx["head"][0][2]["is_visible"] # b
assert not ctx["head"][0][2]["is_visible"] # b
assert ctx["head"][0][3]["is_visible"] # b
assert not ctx["head"][1][2]["is_visible"] # 0
assert not ctx["body"][1][2]["is_visible"] # 3
assert ctx["body"][1][3]["is_visible"]
Expand All @@ -1168,6 +1177,18 @@ def test_hide_columns_mult_levels(self):
assert ctx["body"][1][2]["is_visible"]
assert ctx["body"][1][2]["display_value"] == 3

# hide top row level, which hides both rows
ctx = df.style.hide_index("a")._translate()
for i in [0, 1, 2, 3]:
assert not ctx["body"][0][i]["is_visible"]
assert not ctx["body"][1][i]["is_visible"]

# hide first row only
ctx = df.style.hide_index(("a", 0))._translate()
for i in [0, 1, 2, 3]:
assert not ctx["body"][0][i]["is_visible"]
assert ctx["body"][1][i]["is_visible"]

def test_pipe(self):
def set_caption_from_template(styler, a, b):
return styler.set_caption(f"Dataframe with a = {a} and b = {b}")
Expand Down