Skip to content

Commit 96c7ab5

Browse files
jorisvandenbosschejreback
authored andcommitted
Shorter truncated Series/DataFrame repr: introduce min_rows (#27095)
1 parent 23099f7 commit 96c7ab5

File tree

7 files changed

+145
-10
lines changed

7 files changed

+145
-10
lines changed

doc/source/user_guide/options.rst

+21-1
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,22 @@ lines are replaced by an ellipsis.
157157
df
158158
pd.reset_option('max_rows')
159159
160+
Once the ``display.max_rows`` is exceeded, the ``display.min_rows`` options
161+
determines how many rows are shown in the truncated repr.
162+
163+
.. ipython:: python
164+
165+
pd.set_option('max_rows', 8)
166+
pd.set_option('max_rows', 4)
167+
# below max_rows -> all rows shown
168+
df = pd.DataFrame(np.random.randn(7, 2))
169+
df
170+
# above max_rows -> only min_rows (4) rows shown
171+
df = pd.DataFrame(np.random.randn(9, 2))
172+
df
173+
pd.reset_option('max_rows')
174+
pd.reset_option('min_rows')
175+
160176
``display.expand_frame_repr`` allows for the representation of
161177
dataframes to stretch across pages, wrapped over the full column vs row-wise.
162178

@@ -352,8 +368,12 @@ display.max_rows 60 This sets the maximum numbe
352368
out various output. For example,
353369
this value determines whether the
354370
repr() for a dataframe prints out
355-
fully or just a summary repr.
371+
fully or just a truncated or summary repr.
356372
'None' value means unlimited.
373+
display.min_rows 10 The numbers of rows to show in a truncated
374+
repr (when `max_rows` is exceeded). Ignored
375+
when `max_rows` is set to None or 0. When set
376+
to None, follows the value of `max_rows`.
357377
display.max_seq_items 100 when pretty-printing a long sequence,
358378
no more then `max_seq_items` will
359379
be printed. If items are omitted,

doc/source/whatsnew/v0.25.0.rst

+24
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,30 @@ than :attr:`options.display.max_seq_items` (default: 100 items). Horizontally,
134134
the output will truncate, if it's wider than :attr:`options.display.width`
135135
(default: 80 characters).
136136

137+
.. _whatsnew_0250.enhancements.shorter_truncated_repr:
138+
139+
Shorter truncated repr for Series and DataFrame
140+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
141+
142+
Currently, the default display options of pandas ensure that when a Series
143+
or DataFrame has more than 60 rows, its repr gets truncated to this maximum
144+
of 60 rows (the ``display.max_rows`` option). However, this still gives
145+
a repr that takes up a large part of the vertical screen estate. Therefore,
146+
a new option ``display.min_rows`` is introduced with a default of 10 which
147+
determines the number of rows showed in the truncated repr:
148+
149+
- For small Series or DataFrames, up to ``max_rows`` number of rows is shown
150+
(default: 60).
151+
- For larger Series of DataFrame with a length above ``max_rows``, only
152+
``min_rows`` number of rows is shown (default: 10, i.e. the first and last
153+
5 rows).
154+
155+
This dual option allows to still see the full content of relatively small
156+
objects (e.g. ``df.head(20)`` shows all 20 rows), while giving a brief repr
157+
for large objects.
158+
159+
To restore the previous behaviour of a single threshold, set
160+
``pd.options.display.min_rows = None``.
137161

138162
.. _whatsnew_0250.enhancements.json_normalize_with_max_level:
139163

pandas/core/config_init.py

+9
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,13 @@ def use_numexpr_cb(key):
7777
correct auto-detection.
7878
"""
7979

80+
pc_min_rows_doc = """
81+
: int
82+
The numbers of rows to show in a truncated view (when `max_rows` is
83+
exceeded). Ignored when `max_rows` is set to None or 0. When set to
84+
None, follows the value of `max_rows`.
85+
"""
86+
8087
pc_max_cols_doc = """
8188
: int
8289
If max_cols is exceeded, switch to truncate view. Depending on
@@ -306,6 +313,8 @@ def is_terminal():
306313
validator=is_instance_factory((int, type(None))))
307314
cf.register_option('max_rows', 60, pc_max_rows_doc,
308315
validator=is_instance_factory([type(None), int]))
316+
cf.register_option('min_rows', 10, pc_min_rows_doc,
317+
validator=is_instance_factory([type(None), int]))
309318
cf.register_option('max_categories', 8, pc_max_categories_doc,
310319
validator=is_int)
311320
cf.register_option('max_colwidth', 50, max_colwidth_doc, validator=is_int)

pandas/core/frame.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -588,14 +588,16 @@ def __repr__(self):
588588
return buf.getvalue()
589589

590590
max_rows = get_option("display.max_rows")
591+
min_rows = get_option("display.min_rows")
591592
max_cols = get_option("display.max_columns")
592593
show_dimensions = get_option("display.show_dimensions")
593594
if get_option("display.expand_frame_repr"):
594595
width, _ = console.get_console_size()
595596
else:
596597
width = None
597-
self.to_string(buf=buf, max_rows=max_rows, max_cols=max_cols,
598-
line_width=width, show_dimensions=show_dimensions)
598+
self.to_string(buf=buf, max_rows=max_rows, min_rows=min_rows,
599+
max_cols=max_cols, line_width=width,
600+
show_dimensions=show_dimensions)
599601

600602
return buf.getvalue()
601603

@@ -633,8 +635,8 @@ def _repr_html_(self):
633635
def to_string(self, buf=None, columns=None, col_space=None, header=True,
634636
index=True, na_rep='NaN', formatters=None, float_format=None,
635637
sparsify=None, index_names=True, justify=None,
636-
max_rows=None, max_cols=None, show_dimensions=False,
637-
decimal='.', line_width=None):
638+
max_rows=None, min_rows=None, max_cols=None,
639+
show_dimensions=False, decimal='.', line_width=None):
638640
"""
639641
Render a DataFrame to a console-friendly tabular output.
640642
%(shared_params)s
@@ -663,6 +665,7 @@ def to_string(self, buf=None, columns=None, col_space=None, header=True,
663665
sparsify=sparsify, justify=justify,
664666
index_names=index_names,
665667
header=header, index=index,
668+
min_rows=min_rows,
666669
max_rows=max_rows,
667670
max_cols=max_cols,
668671
show_dimensions=show_dimensions,

pandas/core/series.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -1505,17 +1505,20 @@ def __repr__(self):
15051505
width, height = get_terminal_size()
15061506
max_rows = (height if get_option("display.max_rows") == 0 else
15071507
get_option("display.max_rows"))
1508+
min_rows = (height if get_option("display.max_rows") == 0 else
1509+
get_option("display.min_rows"))
15081510
show_dimensions = get_option("display.show_dimensions")
15091511

15101512
self.to_string(buf=buf, name=self.name, dtype=self.dtype,
1511-
max_rows=max_rows, length=show_dimensions)
1513+
min_rows=min_rows, max_rows=max_rows,
1514+
length=show_dimensions)
15121515
result = buf.getvalue()
15131516

15141517
return result
15151518

15161519
def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True,
15171520
index=True, length=False, dtype=False, name=False,
1518-
max_rows=None):
1521+
max_rows=None, min_rows=None):
15191522
"""
15201523
Render a string representation of the Series.
15211524
@@ -1541,6 +1544,9 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True,
15411544
max_rows : int, optional
15421545
Maximum number of rows to show before truncating. If None, show
15431546
all.
1547+
min_rows : int, optional
1548+
The number of rows to display in a truncated repr (when number
1549+
of rows is above `max_rows`).
15441550
15451551
Returns
15461552
-------
@@ -1552,6 +1558,7 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True,
15521558
header=header, index=index,
15531559
dtype=dtype, na_rep=na_rep,
15541560
float_format=float_format,
1561+
min_rows=min_rows,
15551562
max_rows=max_rows)
15561563
result = formatter.to_string()
15571564

pandas/io/formats/format.py

+19-3
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@
7979
* unset.
8080
max_rows : int, optional
8181
Maximum number of rows to display in the console.
82+
min_rows : int, optional
83+
The number of rows to display in the console in a truncated repr
84+
(when number of rows is above `max_rows`).
8285
max_cols : int, optional
8386
Maximum number of columns to display in the console.
8487
show_dimensions : bool, default False
@@ -159,7 +162,7 @@ class SeriesFormatter:
159162

160163
def __init__(self, series, buf=None, length=True, header=True, index=True,
161164
na_rep='NaN', name=False, float_format=None, dtype=True,
162-
max_rows=None):
165+
max_rows=None, min_rows=None):
163166
self.series = series
164167
self.buf = buf if buf is not None else StringIO()
165168
self.name = name
@@ -168,6 +171,7 @@ def __init__(self, series, buf=None, length=True, header=True, index=True,
168171
self.length = length
169172
self.index = index
170173
self.max_rows = max_rows
174+
self.min_rows = min_rows
171175

172176
if float_format is None:
173177
float_format = get_option("display.float_format")
@@ -179,10 +183,17 @@ def __init__(self, series, buf=None, length=True, header=True, index=True,
179183

180184
def _chk_truncate(self):
181185
from pandas.core.reshape.concat import concat
186+
min_rows = self.min_rows
182187
max_rows = self.max_rows
188+
# truncation determined by max_rows, actual truncated number of rows
189+
# used below by min_rows
183190
truncate_v = max_rows and (len(self.series) > max_rows)
184191
series = self.series
185192
if truncate_v:
193+
if min_rows:
194+
# if min_rows is set (not None or 0), set max_rows to minimum
195+
# of both
196+
max_rows = min(min_rows, max_rows)
186197
if max_rows == 1:
187198
row_num = max_rows
188199
series = series.iloc[:max_rows]
@@ -391,8 +402,8 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
391402
header=True, index=True, na_rep='NaN', formatters=None,
392403
justify=None, float_format=None, sparsify=None,
393404
index_names=True, line_width=None, max_rows=None,
394-
max_cols=None, show_dimensions=False, decimal='.',
395-
table_id=None, render_links=False, **kwds):
405+
min_rows=None, max_cols=None, show_dimensions=False,
406+
decimal='.', table_id=None, render_links=False, **kwds):
396407
self.frame = frame
397408
if buf is not None:
398409
self.buf = _expand_user(_stringify_path(buf))
@@ -414,6 +425,7 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
414425
self.index = index
415426
self.line_width = line_width
416427
self.max_rows = max_rows
428+
self.min_rows = min_rows
417429
self.max_cols = max_cols
418430
self.max_rows_displayed = min(max_rows or len(self.frame),
419431
len(self.frame))
@@ -471,6 +483,10 @@ def _chk_truncate(self):
471483
max_rows = h
472484

473485
if not hasattr(self, 'max_rows_adj'):
486+
if max_rows:
487+
if (len(self.frame) > max_rows) and self.min_rows:
488+
# if truncated, set max_rows showed to min_rows
489+
max_rows = min(self.min_rows, max_rows)
474490
self.max_rows_adj = max_rows
475491
if not hasattr(self, 'max_cols_adj'):
476492
self.max_cols_adj = max_cols

pandas/tests/io/formats/test_format.py

+56
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,34 @@ def mkframe(n):
377377
printing.pprint_thing(df._repr_fits_horizontal_())
378378
assert has_expanded_repr(df)
379379

380+
def test_repr_min_rows(self):
381+
df = pd.DataFrame({'a': range(20)})
382+
383+
# default setting no truncation even if above min_rows
384+
assert '..' not in repr(df)
385+
386+
df = pd.DataFrame({'a': range(61)})
387+
388+
# default of max_rows 60 triggers truncation if above
389+
assert '..' in repr(df)
390+
391+
with option_context('display.max_rows', 10, 'display.min_rows', 4):
392+
# truncated after first two rows
393+
assert '..' in repr(df)
394+
assert '2 ' not in repr(df)
395+
396+
with option_context('display.max_rows', 12, 'display.min_rows', None):
397+
# when set to None, follow value of max_rows
398+
assert '5 5' in repr(df)
399+
400+
with option_context('display.max_rows', 10, 'display.min_rows', 12):
401+
# when set value higher as max_rows, use the minimum
402+
assert '5 5' not in repr(df)
403+
404+
with option_context('display.max_rows', None, 'display.min_rows', 12):
405+
# max_rows of None -> never truncate
406+
assert '..' not in repr(df)
407+
380408
def test_str_max_colwidth(self):
381409
# GH 7856
382410
df = pd.DataFrame([{'a': 'foo',
@@ -2284,6 +2312,34 @@ def test_show_dimensions(self):
22842312
"display.show_dimensions", False):
22852313
assert 'Length' not in repr(s)
22862314

2315+
def test_repr_min_rows(self):
2316+
s = pd.Series(range(20))
2317+
2318+
# default setting no truncation even if above min_rows
2319+
assert '..' not in repr(s)
2320+
2321+
s = pd.Series(range(61))
2322+
2323+
# default of max_rows 60 triggers truncation if above
2324+
assert '..' in repr(s)
2325+
2326+
with option_context('display.max_rows', 10, 'display.min_rows', 4):
2327+
# truncated after first two rows
2328+
assert '..' in repr(s)
2329+
assert '2 ' not in repr(s)
2330+
2331+
with option_context('display.max_rows', 12, 'display.min_rows', None):
2332+
# when set to None, follow value of max_rows
2333+
assert '5 5' in repr(s)
2334+
2335+
with option_context('display.max_rows', 10, 'display.min_rows', 12):
2336+
# when set value higher as max_rows, use the minimum
2337+
assert '5 5' not in repr(s)
2338+
2339+
with option_context('display.max_rows', None, 'display.min_rows', 12):
2340+
# max_rows of None -> never truncate
2341+
assert '..' not in repr(s)
2342+
22872343
def test_to_string_name(self):
22882344
s = Series(range(100), dtype='int64')
22892345
s.name = 'myser'

0 commit comments

Comments
 (0)