Skip to content

Commit a7d8227

Browse files
author
y-p
committed
ENH: revamp null count supression for large frames in df.info()
1 parent a1fff60 commit a7d8227

File tree

3 files changed

+26
-15
lines changed

3 files changed

+26
-15
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ Improvements to existing features
7272
- perf improvements in Series datetime/timedelta binary operations (:issue:`5801`)
7373
- `option_context` context manager now available as top-level API (:issue:`5752`)
7474
- df.info() view now display dtype info per column (:issue: `5682`)
75+
- df.info() now honors option max_info_rows, disable null counts for large frames (:issue: `5974`)
7576
- perf improvements in DataFrame ``count/dropna`` for ``axis=1``
7677
- Series.str.contains now has a `regex=False` keyword which can be faster for plain (non-regex) string patterns. (:issue: `5879`)
7778
- support ``dtypes`` on ``Panel``

pandas/core/config_init.py

+3-8
Original file line numberDiff line numberDiff line change
@@ -166,11 +166,9 @@
166166

167167
pc_max_info_rows_doc = """
168168
: int or None
169-
Deprecated.
170-
"""
171-
172-
pc_max_info_rows_deprecation_warning = """\
173-
max_info_rows has been deprecated, as reprs no longer use the info view.
169+
df.info() will usually show null-counts for each column.
170+
For large frames this can be quite slow. max_info_rows and max_info_cols
171+
limit this null check only to frames with smaller dimensions then specified.
174172
"""
175173

176174
pc_large_repr_doc = """
@@ -266,9 +264,6 @@ def mpl_style_cb(key):
266264
msg=pc_height_deprecation_warning,
267265
rkey='display.max_rows')
268266

269-
cf.deprecate_option('display.max_info_rows',
270-
msg=pc_max_info_rows_deprecation_warning)
271-
272267
tc_sim_interactive_doc = """
273268
: boolean
274269
Whether to simulate interactive mode for purposes of testing

pandas/core/frame.py

+22-7
Original file line numberDiff line numberDiff line change
@@ -1419,20 +1419,35 @@ def info(self, verbose=True, buf=None, max_cols=None):
14191419
max_cols = get_option(
14201420
'display.max_info_columns', len(self.columns) + 1)
14211421

1422-
if verbose and len(self.columns) <= max_cols:
1422+
max_rows = get_option('display.max_info_rows', len(self) + 1)
1423+
1424+
show_counts = ((len(self.columns) <= max_cols) and
1425+
(len(self) < max_rows))
1426+
if verbose:
14231427
lines.append('Data columns (total %d columns):' %
14241428
len(self.columns))
14251429
space = max([len(com.pprint_thing(k)) for k in self.columns]) + 4
1426-
counts = self.count()
1427-
if len(cols) != len(counts): # pragma: no cover
1428-
raise AssertionError('Columns must equal counts (%d != %d)' %
1429-
(len(cols), len(counts)))
1430+
counts = None
1431+
1432+
tmpl = "%s%s"
1433+
if show_counts:
1434+
counts = self.count()
1435+
if len(cols) != len(counts): # pragma: no cover
1436+
raise AssertionError('Columns must equal counts (%d != %d)' %
1437+
(len(cols), len(counts)))
1438+
tmpl = "%s non-null %s"
1439+
14301440
dtypes = self.dtypes
1431-
for col, count in compat.iteritems(counts):
1441+
for i, col in enumerate(self.columns):
14321442
dtype = dtypes[col]
14331443
col = com.pprint_thing(col)
1444+
1445+
count= ""
1446+
if show_counts:
1447+
count = counts[i]
1448+
14341449
lines.append(_put_str(col, space) +
1435-
'%d non-null %s' % (count, dtype))
1450+
tmpl % (count, dtype))
14361451
else:
14371452
lines.append(self.columns.summary(name='Columns'))
14381453

0 commit comments

Comments
 (0)