Skip to content

API: Styler.hide_columns replaced by .hide_values which also operates row-wise #41158

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
79 changes: 72 additions & 7 deletions pandas/io/formats/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from pandas.util._decorators import doc

import pandas as pd
from pandas import IndexSlice
from pandas.api.types import is_list_like
from pandas.core import generic
import pandas.core.common as com
Expand Down Expand Up @@ -640,7 +641,7 @@ def apply(
def _applymap(self, func: Callable, subset=None, **kwargs) -> Styler:
func = partial(func, **kwargs) # applymap doesn't take kwargs?
if subset is None:
subset = pd.IndexSlice[:]
subset = IndexSlice[:]
subset = non_reducing_slice(subset)
result = self.data.loc[subset].applymap(func)
self._update_ctx(result)
Expand Down Expand Up @@ -1060,12 +1061,76 @@ def hide_columns(self, subset) -> Styler:
-------
self : Styler
"""
subset = non_reducing_slice(subset)
hidden_df = self.data.loc[subset]
hcols = self.columns.get_indexer_for(hidden_df.columns)
# error: Incompatible types in assignment (expression has type
# "ndarray", variable has type "Sequence[int]")
self.hidden_columns = hcols # type: ignore[assignment]
return self.hide_values(subset)

def hide_values(self, subset, axis: Axis = "columns", show: bool = False) -> Styler:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i find this a very confusing name, hide_axis is more appropriate

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The trick is to distinguish between the cases of:

a) hiding select rows or columns of data (whilst other index keys or column headers are visible),
b) or displaying the data values but just hiding the index or column headers row in their entirety.

Currently hide_columns does a) whilst hide_index does b). The complete idea was that:

i) hide_values would do a) for either the index or columns axes.
ii) hide_headers (a new method) would do b) for either the index or columns axes.

?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jreback i created an alternative in #41266 which seeks to achieve the same functionality in a different way re-using exiting methods and kwargs. Close this PR if you prefer the alternative,

"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i wouldn't make this user visible instead prefer hide_index, hide_columns (this can be the impl)

Hide (or exclusively show) columns or rows upon rendering.

Parameters
----------
subset : IndexSlice
An valid input to a specific ``axis`` in ``DataFrame.loc`` that identifies
which columns or rows are hidden/shown.
axis : {0 or 'index', 1 or 'columns'}
Axis along which the ``subset`` is applied.
show : bool
Indicates whether the supplied subset should be hidden, or exclusively
shown.

Returns
-------
self : Styler

Examples
--------
>>> df = DataFrame([[1, 2], [3, 4]], columns=["c1", "c2"], index=["i1", "i2"])
>>> df.style.hide_values("c1")
c2
i1 2
i2 4

>>> df.style.hide_values("i1", axis="index")
c1 c2
i2 3 4

>>> df.style.hide_values("i1", axis="index", show=True)
c1 c2
i1 1 2

>>> mcols = MultiIndex.from_product([["c1", "c2"], ["d1", "d2", "d3"]])
>>> data = np.arange(12).reshape((2,6))
>>> df = DataFrame(data, columns=mcols, index=["i1", "i2"])
>>> df.style.hide_values(subset=(slice(None), "d2":"d3"))
c1 c2
d1 d1
i1 0 6
i2 3 9
"""
if axis in [0, "index"]:
subset = IndexSlice[subset, :]
subset = non_reducing_slice(subset)
hide = self.data.loc[subset]
if show: # invert the display
hide = self.data.loc[~self.data.index.isin(hide.index.to_list()), :]
hrows = self.index.get_indexer_for(hide.index)
# error: Incompatible types in assignment (expression has type
# "ndarray", variable has type "Sequence[int]")
self.hidden_rows = hrows # type: ignore[assignment]
elif axis in [1, "columns"]:
subset = IndexSlice[:, subset]
subset = non_reducing_slice(subset)
hide = self.data.loc[subset]
if show: # invert the display
hide = self.data.loc[:, ~self.data.columns.isin(hide.columns.to_list())]
hcols = self.columns.get_indexer_for(hide.columns)
# error: Incompatible types in assignment (expression has type
# "ndarray", variable has type "Sequence[int]")
self.hidden_columns = hcols # type: ignore[assignment]
else:
raise ValueError(
f"`axis` must be one of [0, 1] or 'index' or 'columns', got: {axis}"
)
return self

# -----------------------------------------------------------------------
Expand Down
5 changes: 3 additions & 2 deletions pandas/io/formats/style_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ def __init__(
# add rendering variables
self.hidden_index: bool = False
self.hidden_columns: Sequence[int] = []
self.hidden_rows: Sequence[int] = []
self.ctx: DefaultDict[tuple[int, int], CSSList] = defaultdict(list)
self.cell_context: DefaultDict[tuple[int, int], str] = defaultdict(str)
self._todo: list[tuple[Callable, tuple, dict]] = []
Expand Down Expand Up @@ -281,7 +282,7 @@ def _translate_body(self, data_class, row_heading_class):
<style></style> block
"""
# for sparsifying a MultiIndex
idx_lengths = _get_level_lengths(self.index)
idx_lengths = _get_level_lengths(self.index, self.hidden_rows)

rlabels = self.data.index.tolist()
if self.data.index.nlevels == 1:
Expand Down Expand Up @@ -316,7 +317,7 @@ def _translate_body(self, data_class, row_heading_class):
"td",
f"{data_class} row{r} col{c}{cls}",
value,
(c not in self.hidden_columns),
(c not in self.hidden_columns and r not in self.hidden_rows),
attributes="",
display_value=self._display_funcs[(r, c)](value),
)
Expand Down
111 changes: 111 additions & 0 deletions pandas/tests/io/formats/style/test_style.py
Original file line number Diff line number Diff line change
Expand Up @@ -1423,6 +1423,117 @@ def test_non_reducing_multi_slice_on_multiindex(self, slice_):
result = df.loc[non_reducing_slice(slice_)]
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"subset",
[
pd.Series(["i1", "i2"]),
np.array(["i1", "i2"]),
pd.Index(["i1", "i2"]),
["i1", "i2"],
pd.IndexSlice["i1":"i2"], # type: ignore[misc]
],
)
def test_hide_values_index(self, subset):
df = DataFrame(
[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
index=["i1", "i2", "i3"],
columns=["c1", "c2", "c3"],
)
styler = Styler(df, uuid_len=0, cell_ids=False)
styler.hide_values(subset=subset, axis="index")
result = styler.render()

assert (
'<th id="T__level0_row0" class="row_heading level0 row0" >i1</th>'
not in result
)
assert (
'<th id="T__level0_row1" class="row_heading level0 row1" >i2</th>'
not in result
)
assert (
'<th id="T__level0_row2" class="row_heading level0 row2" >i3</th>' in result
)

assert '<td class="data row0 col0" >1</td>' not in result
assert '<td class="data row0 col1" >2</td>' not in result
assert '<td class="data row0 col2" >3</td>' not in result
assert '<td class="data row1 col0" >4</td>' not in result
assert '<td class="data row1 col1" >5</td>' not in result
assert '<td class="data row1 col2" >6</td>' not in result

assert '<td class="data row2 col0" >7</td>' in result
assert '<td class="data row2 col1" >8</td>' in result
assert '<td class="data row2 col2" >9</td>' in result

@pytest.mark.parametrize(
"subset",
[
pd.Series(["c1", "c2"]),
np.array(["c1", "c2"]),
pd.Index(["c1", "c2"]),
["c1", "c2"],
pd.IndexSlice["c1":"c2"], # type: ignore[misc]
],
)
def test_hide_values_columns(self, subset):
df = DataFrame(
[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
index=["i1", "i2", "i3"],
columns=["c1", "c2", "c3"],
)
styler = Styler(df, uuid_len=0, cell_ids=False)
styler.hide_values(subset=subset, axis="columns")
result = styler.render()

assert '<th class="col_heading level0 col0" >c1</th>' not in result
assert '<th class="col_heading level0 col1" >c2</th>' not in result
assert '<th class="col_heading level0 col2" >c3</th>' in result

assert '<td class="data row0 col2" >3</td>' in result
assert '<td class="data row1 col2" >6</td>' in result
assert '<td class="data row2 col2" >9</td>' in result

assert '<td class="data row0 col0" >1</td>' not in result
assert '<td class="data row0 col1" >2</td>' not in result
assert '<td class="data row1 col0" >4</td>' not in result
assert '<td class="data row1 col1" >5</td>' not in result
assert '<td class="data row2 col0" >7</td>' not in result
assert '<td class="data row2 col1" >8</td>' not in result

def test_hide_values_multiindex(self):
idx = pd.MultiIndex.from_product([["i1", "i2"], ["j1", "j2"]])
col = pd.MultiIndex.from_product([["c1", "c2"], ["d1", "d2"]])
df = DataFrame(np.arange(16).reshape((4, 4)), columns=col, index=idx)

# test hide
styler = (
Styler(df, uuid_len=0, cell_ids=False)
.hide_values(subset=(slice(None), "j1"), axis="index")
.hide_values(subset="c1", axis="columns")
)
result = styler.render()
for header in [">c1<", ">j1<"]:
assert header not in result
for data in [0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 12, 13]:
assert f">{data}<" not in result
for data in [6, 7, 14, 15]:
assert f">{data}<" in result

# test show
styler = (
Styler(df, uuid_len=0, cell_ids=False)
.hide_values(subset=(slice(None), "j1"), axis="index", show=True)
.hide_values(subset="c1", axis="columns", show=True)
)
result = styler.render()
for header in [">c2<", ">j2<"]:
assert header not in result
for data in [2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15]:
assert f">{data}<" not in result
for data in [0, 1, 8, 9]:
assert f">{data}<" in result


def test_block_names():
# catch accidental removal of a block
Expand Down