diff --git a/doc/source/user_guide/options.rst b/doc/source/user_guide/options.rst index 1193dff4361b4..59319fda8045c 100644 --- a/doc/source/user_guide/options.rst +++ b/doc/source/user_guide/options.rst @@ -38,11 +38,11 @@ and so passing in a substring will work - as long as it is unambiguous: .. ipython:: python - pd.get_option("display.max_rows") - pd.set_option("display.max_rows", 101) - pd.get_option("display.max_rows") - pd.set_option("max_r", 102) - pd.get_option("display.max_rows") + pd.get_option("display.chop_threshold") + pd.set_option("display.chop_threshold", 2) + pd.get_option("display.chop_threshold") + pd.set_option("chop", 4) + pd.get_option("display.chop_threshold") The following will **not work** because it matches multiple option names, e.g. @@ -52,7 +52,7 @@ The following will **not work** because it matches multiple option names, e.g. :okexcept: try: - pd.get_option("column") + pd.get_option("max") except KeyError as e: print(e) @@ -153,27 +153,27 @@ lines are replaced by an ellipsis. .. ipython:: python df = pd.DataFrame(np.random.randn(7, 2)) - pd.set_option("max_rows", 7) + pd.set_option("display.max_rows", 7) df - pd.set_option("max_rows", 5) + pd.set_option("display.max_rows", 5) df - pd.reset_option("max_rows") + pd.reset_option("display.max_rows") Once the ``display.max_rows`` is exceeded, the ``display.min_rows`` options determines how many rows are shown in the truncated repr. .. ipython:: python - pd.set_option("max_rows", 8) - pd.set_option("min_rows", 4) + pd.set_option("display.max_rows", 8) + pd.set_option("display.min_rows", 4) # below max_rows -> all rows shown df = pd.DataFrame(np.random.randn(7, 2)) df # above max_rows -> only min_rows (4) rows shown df = pd.DataFrame(np.random.randn(9, 2)) df - pd.reset_option("max_rows") - pd.reset_option("min_rows") + pd.reset_option("display.max_rows") + pd.reset_option("display.min_rows") ``display.expand_frame_repr`` allows for the representation of dataframes to stretch across pages, wrapped over the full column vs row-wise. @@ -193,13 +193,13 @@ dataframes to stretch across pages, wrapped over the full column vs row-wise. .. ipython:: python df = pd.DataFrame(np.random.randn(10, 10)) - pd.set_option("max_rows", 5) + pd.set_option("display.max_rows", 5) pd.set_option("large_repr", "truncate") df pd.set_option("large_repr", "info") df pd.reset_option("large_repr") - pd.reset_option("max_rows") + pd.reset_option("display.max_rows") ``display.max_colwidth`` sets the maximum width of columns. Cells of this length or longer will be truncated with an ellipsis. @@ -491,6 +491,10 @@ styler.render.repr html Standard output format for Should be one of "html" or "latex". styler.render.max_elements 262144 Maximum number of datapoints that Styler will render trimming either rows, columns or both to fit. +styler.render.max_rows None Maximum number of rows that Styler will render. By default + this is dynamic based on ``max_elements``. +styler.render.max_columns None Maximum number of columns that Styler will render. By default + this is dynamic based on ``max_elements``. styler.render.encoding utf-8 Default encoding for output HTML or LaTeX files. styler.format.formatter None Object to specify formatting functions to ``Styler.format``. styler.format.na_rep None String representation for missing data. diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index cac22fc06b89b..f183bb1fe391e 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -75,7 +75,7 @@ Styler - Styling of indexing has been added, with :meth:`.Styler.apply_index` and :meth:`.Styler.applymap_index`. These mirror the signature of the methods already used to style data values, and work with both HTML and LaTeX format (:issue:`41893`). - :meth:`.Styler.bar` introduces additional arguments to control alignment and display (:issue:`26070`, :issue:`36419`), and it also validates the input arguments ``width`` and ``height`` (:issue:`42511`). - :meth:`.Styler.to_latex` introduces keyword argument ``environment``, which also allows a specific "longtable" entry through a separate jinja2 template (:issue:`41866`). - - :meth:`.Styler.to_html` introduces keyword arguments ``sparse_index``, ``sparse_columns``, ``bold_headers``, ``caption`` (:issue:`41946`, :issue:`43149`). + - :meth:`.Styler.to_html` introduces keyword arguments ``sparse_index``, ``sparse_columns``, ``bold_headers``, ``caption``, ``max_rows`` and ``max_columns`` (:issue:`41946`, :issue:`43149`, :issue:`42972`). - Keyword arguments ``level`` and ``names`` added to :meth:`.Styler.hide_index` and :meth:`.Styler.hide_columns` for additional control of visibility of MultiIndexes and index names (:issue:`25475`, :issue:`43404`, :issue:`43346`) - Global options have been extended to configure default ``Styler`` properties including formatting and encoding and mathjax options and LaTeX (:issue:`41395`) - Naive sparsification is now possible for LaTeX without the multirow package (:issue:`43369`) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 09da9f04f8360..cf41bcff3d0c8 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -769,6 +769,18 @@ def register_converter_cb(key): trimming will occur over columns, rows or both if needed. """ +styler_max_rows = """ +: int, optional + The maximum number of rows that will be rendered. May still be reduced to + satsify ``max_elements``, which takes precedence. +""" + +styler_max_columns = """ +: int, optional + The maximum number of columns that will be rendered. May still be reduced to + satsify ``max_elements``, which takes precedence. +""" + styler_precision = """ : int The precision for floats and complex numbers. @@ -847,6 +859,20 @@ def register_converter_cb(key): validator=is_nonnegative_int, ) + cf.register_option( + "render.max_rows", + None, + styler_max_rows, + validator=is_nonnegative_int, + ) + + cf.register_option( + "render.max_columns", + None, + styler_max_columns, + validator=is_nonnegative_int, + ) + cf.register_option("render.encoding", "utf-8", styler_encoding, validator=is_str) cf.register_option("format.decimal", ".", styler_decimal, validator=is_str) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 2a063501976da..c10ac07d452a8 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -885,6 +885,8 @@ def to_html( sparse_columns: bool | None = None, bold_headers: bool = False, caption: str | None = None, + max_rows: int | None = None, + max_columns: int | None = None, encoding: str | None = None, doctype_html: bool = False, exclude_styles: bool = False, @@ -930,6 +932,20 @@ def to_html( caption : str, optional Set, or overwrite, the caption on Styler before rendering. + .. versionadded:: 1.4.0 + max_rows : int, optional + The maximum number of rows that will be rendered. Defaults to + ``pandas.options.styler.render.max_rows/max_columns``. + + .. versionadded:: 1.4.0 + max_columns : int, optional + The maximum number of columns that will be rendered. Defaults to + ``pandas.options.styler.render.max_columns``, which is None. + + Rows and columns may be reduced if the number of total elements is + large. This value is set to ``pandas.options.styler.render.max_elements``, + which is 262144 (18 bit browser rendering). + .. versionadded:: 1.4.0 encoding : str, optional Character encoding setting for file output, and HTML meta tags. @@ -981,6 +997,8 @@ def to_html( html = obj._render_html( sparse_index=sparse_index, sparse_columns=sparse_columns, + max_rows=max_rows, + max_cols=max_columns, exclude_styles=exclude_styles, encoding=encoding, doctype_html=doctype_html, diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 8c5af730a5fc7..bd360e266f897 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -117,14 +117,21 @@ def __init__( tuple[int, int], Callable[[Any], str] ] = defaultdict(lambda: partial(_default_formatter, precision=precision)) - def _render_html(self, sparse_index: bool, sparse_columns: bool, **kwargs) -> str: + def _render_html( + self, + sparse_index: bool, + sparse_columns: bool, + max_rows: int | None = None, + max_cols: int | None = None, + **kwargs, + ) -> str: """ Renders the ``Styler`` including all applied styles to HTML. Generates a dict with necessary kwargs passed to jinja2 template. """ self._compute() # TODO: namespace all the pandas keys - d = self._translate(sparse_index, sparse_columns) + d = self._translate(sparse_index, sparse_columns, max_rows, max_cols) d.update(kwargs) return self.template_html.render( **d, @@ -166,7 +173,14 @@ def _compute(self): r = func(self)(*args, **kwargs) return r - def _translate(self, sparse_index: bool, sparse_cols: bool, blank: str = " "): + def _translate( + self, + sparse_index: bool, + sparse_cols: bool, + max_rows: int | None = None, + max_cols: int | None = None, + blank: str = " ", + ): """ Process Styler data and settings into a dict for template rendering. @@ -181,6 +195,10 @@ def _translate(self, sparse_index: bool, sparse_cols: bool, blank: str = "  sparse_cols : bool Whether to sparsify the columns or print all hierarchical column elements. Upstream defaults are typically to `pandas.options.styler.sparse.columns`. + blank : str + Entry to top-left blank cells. + max_rows, max_cols : int, optional + Specific max rows and cols. max_elements always take precedence in render. Returns ------- @@ -206,8 +224,14 @@ def _translate(self, sparse_index: bool, sparse_cols: bool, blank: str = "  } max_elements = get_option("styler.render.max_elements") + max_rows = max_rows if max_rows else get_option("styler.render.max_rows") + max_cols = max_cols if max_cols else get_option("styler.render.max_columns") max_rows, max_cols = _get_trimming_maximums( - len(self.data.index), len(self.data.columns), max_elements + len(self.data.index), + len(self.data.columns), + max_elements, + max_rows, + max_cols, ) self.cellstyle_map_columns: DefaultDict[ @@ -831,7 +855,14 @@ def _element( } -def _get_trimming_maximums(rn, cn, max_elements, scaling_factor=0.8): +def _get_trimming_maximums( + rn, + cn, + max_elements, + max_rows=None, + max_cols=None, + scaling_factor=0.8, +) -> tuple[int, int]: """ Recursively reduce the number of rows and columns to satisfy max elements. @@ -841,6 +872,10 @@ def _get_trimming_maximums(rn, cn, max_elements, scaling_factor=0.8): The number of input rows / columns max_elements : int The number of allowable elements + max_rows, max_cols : int, optional + Directly specify an initial maximum rows or columns before compression. + scaling_factor : float + Factor at which to reduce the number of rows / columns to fit. Returns ------- @@ -854,6 +889,11 @@ def scale_down(rn, cn): else: return int(rn * scaling_factor), cn + if max_rows: + rn = max_rows if rn > max_rows else rn + if max_cols: + cn = max_cols if cn > max_cols else cn + while rn * cn > max_elements: rn, cn = scale_down(rn, cn) diff --git a/pandas/tests/io/formats/style/test_html.py b/pandas/tests/io/formats/style/test_html.py index 9898f4a598bb7..d0b7e288332e2 100644 --- a/pandas/tests/io/formats/style/test_html.py +++ b/pandas/tests/io/formats/style/test_html.py @@ -452,3 +452,16 @@ def test_applymap_header_cell_ids(styler, index, columns): 'A' in result ) is columns assert ("#T__level0_col0 {\n attr: val;\n}" in result) is columns + + +@pytest.mark.parametrize("rows", [True, False]) +@pytest.mark.parametrize("cols", [True, False]) +def test_maximums(styler_mi, rows, cols): + result = styler_mi.to_html( + max_rows=2 if rows else None, + max_columns=2 if cols else None, + ) + + assert ">5" in result # [[0,1], [4,5]] always visible + assert (">8" in result) is not rows # first trimmed vertical element + assert (">2" in result) is not cols # first trimmed horizontal element diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index e5c3d9ae14bdd..295a07049c280 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -150,28 +150,56 @@ def test_mi_styler_sparsify_options(mi_styler): assert html1 != html2 -def test_trimming_maximum(): - rn, cn = _get_trimming_maximums(100, 100, 100, scaling_factor=0.5) - assert (rn, cn) == (12, 6) - - rn, cn = _get_trimming_maximums(1000, 3, 750, scaling_factor=0.5) - assert (rn, cn) == (250, 3) +@pytest.mark.parametrize( + "rn, cn, max_els, max_rows, max_cols, exp_rn, exp_cn", + [ + (100, 100, 100, None, None, 12, 6), # reduce to (12, 6) < 100 elements + (1000, 3, 750, None, None, 250, 3), # dynamically reduce rows to 250, keep cols + (4, 1000, 500, None, None, 4, 125), # dynamically reduce cols to 125, keep rows + (1000, 3, 750, 10, None, 10, 3), # overwrite above dynamics with max_row + (4, 1000, 500, None, 5, 4, 5), # overwrite above dynamics with max_col + (100, 100, 700, 50, 50, 25, 25), # rows cols below given maxes so < 700 elmts + ], +) +def test_trimming_maximum(rn, cn, max_els, max_rows, max_cols, exp_rn, exp_cn): + rn, cn = _get_trimming_maximums( + rn, cn, max_els, max_rows, max_cols, scaling_factor=0.5 + ) + assert (rn, cn) == (exp_rn, exp_cn) -def test_render_trimming(): +@pytest.mark.parametrize( + "option, val", + [ + ("styler.render.max_elements", 6), + ("styler.render.max_rows", 3), + ], +) +def test_render_trimming_rows(option, val): + # test auto and specific trimming of rows df = DataFrame(np.arange(120).reshape(60, 2)) - with pd.option_context("styler.render.max_elements", 6): + with pd.option_context(option, val): ctx = df.style._translate(True, True) assert len(ctx["head"][0]) == 3 # index + 2 data cols assert len(ctx["body"]) == 4 # 3 data rows + trimming row assert len(ctx["body"][0]) == 3 # index + 2 data cols - df = DataFrame(np.arange(120).reshape(12, 10)) - with pd.option_context("styler.render.max_elements", 6): + +@pytest.mark.parametrize( + "option, val", + [ + ("styler.render.max_elements", 6), + ("styler.render.max_columns", 2), + ], +) +def test_render_trimming_cols(option, val): + # test auto and specific trimming of cols + df = DataFrame(np.arange(30).reshape(3, 10)) + with pd.option_context(option, val): ctx = df.style._translate(True, True) - assert len(ctx["head"][0]) == 4 # index + 2 data cols + trimming row - assert len(ctx["body"]) == 4 # 3 data rows + trimming row - assert len(ctx["body"][0]) == 4 # index + 2 data cols + trimming row + assert len(ctx["head"][0]) == 4 # index + 2 data cols + trimming col + assert len(ctx["body"]) == 3 # 3 data rows + assert len(ctx["body"][0]) == 4 # index + 2 data cols + trimming col def test_render_trimming_mi(): diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 500f8bf5ff159..95da68510be6b 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -542,26 +542,26 @@ def test_auto_detect(self): index = range(10) df = DataFrame(index=index, columns=cols) with option_context("mode.sim_interactive", True): - with option_context("max_rows", None): - with option_context("max_columns", None): + with option_context("display.max_rows", None): + with option_context("display.max_columns", None): # Wrap around with None assert has_expanded_repr(df) - with option_context("max_rows", 0): - with option_context("max_columns", 0): + with option_context("display.max_rows", 0): + with option_context("display.max_columns", 0): # Truncate with auto detection. assert has_horizontally_truncated_repr(df) index = range(int(term_height * fac)) df = DataFrame(index=index, columns=cols) - with option_context("max_rows", 0): - with option_context("max_columns", None): + with option_context("display.max_rows", 0): + with option_context("display.max_columns", None): # Wrap around with None assert has_expanded_repr(df) # Truncate vertically assert has_vertically_truncated_repr(df) - with option_context("max_rows", None): - with option_context("max_columns", 0): + with option_context("display.max_rows", None): + with option_context("display.max_columns", 0): assert has_horizontally_truncated_repr(df) def test_to_string_repr_unicode(self): diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index 0d5c3bc21c609..555342dd39005 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -169,7 +169,7 @@ def test_repr_should_return_str(self): def test_repr_max_rows(self): # GH 6863 - with option_context("max_rows", None): + with option_context("display.max_rows", None): str(Series(range(1001))) # should not raise exception def test_unicode_string_with_unicode(self):