Skip to content

Commit 1e32421

Browse files
Roymprogjreback
authored andcommitted
ENH: show numbers on .info() with verbose flag (#28876)
1 parent a712191 commit 1e32421

File tree

3 files changed

+133
-26
lines changed

3 files changed

+133
-26
lines changed

doc/source/whatsnew/v1.0.0.rst

+30-1
Original file line numberDiff line numberDiff line change
@@ -290,13 +290,42 @@ New repr for :class:`~pandas.arrays.IntervalArray`
290290
closed='right',
291291
dtype='interval[int64]')
292292
293-
294293
*pandas 1.0.0*
295294

296295
.. ipython:: python
297296
298297
pd.arrays.IntervalArray.from_tuples([(0, 1), (2, 3)])
299298
299+
Extended verbose info output for :class:`~pandas.DataFrame`
300+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
301+
302+
- :meth:`Dataframe.info` now shows line numbers for the columns summary (:issue:`17304`)
303+
304+
*pandas 0.25.x*
305+
306+
.. code-block:: python
307+
308+
>>> df = pd.DataFrame({"int_col": [1, 2, 3],
309+
... "text_col": ["a", "b", "c"],
310+
... "float_col": [0.0, 0.1, 0.2]})
311+
>>> df.info(verbose=True)
312+
<class 'pandas.core.frame.DataFrame'>
313+
RangeIndex: 3 entries, 0 to 2
314+
Data columns (total 3 columns):
315+
int_col 3 non-null int64
316+
text_col 3 non-null object
317+
float_col 3 non-null float64
318+
dtypes: float64(1), int64(1), object(1)
319+
memory usage: 152.0+ bytes
320+
321+
*pandas 1.0.0*
322+
323+
.. ipython:: python
324+
325+
df = pd.DataFrame({"int_col": [1, 2, 3],
326+
"text_col": ["a", "b", "c"],
327+
"float_col": [0.0, 0.1, 0.2]})
328+
df.info(verbose=True)
300329
301330
:meth:`pandas.array` inference changes
302331
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

pandas/core/frame.py

+64-17
Original file line numberDiff line numberDiff line change
@@ -2278,9 +2278,11 @@ def info(
22782278
<class 'pandas.core.frame.DataFrame'>
22792279
RangeIndex: 5 entries, 0 to 4
22802280
Data columns (total 3 columns):
2281-
int_col 5 non-null int64
2282-
text_col 5 non-null object
2283-
float_col 5 non-null float64
2281+
# Column Non-Null Count Dtype
2282+
--- ------ -------------- -----
2283+
0 int_col 5 non-null int64
2284+
1 text_col 5 non-null object
2285+
2 float_col 5 non-null float64
22842286
dtypes: float64(1), int64(1), object(1)
22852287
memory usage: 248.0+ bytes
22862288
@@ -2319,19 +2321,23 @@ def info(
23192321
<class 'pandas.core.frame.DataFrame'>
23202322
RangeIndex: 1000000 entries, 0 to 999999
23212323
Data columns (total 3 columns):
2322-
column_1 1000000 non-null object
2323-
column_2 1000000 non-null object
2324-
column_3 1000000 non-null object
2324+
# Column Non-Null Count Dtype
2325+
--- ------ -------------- -----
2326+
0 column_1 1000000 non-null object
2327+
1 column_2 1000000 non-null object
2328+
2 column_3 1000000 non-null object
23252329
dtypes: object(3)
23262330
memory usage: 22.9+ MB
23272331
23282332
>>> df.info(memory_usage='deep')
23292333
<class 'pandas.core.frame.DataFrame'>
23302334
RangeIndex: 1000000 entries, 0 to 999999
23312335
Data columns (total 3 columns):
2332-
column_1 1000000 non-null object
2333-
column_2 1000000 non-null object
2334-
column_3 1000000 non-null object
2336+
# Column Non-Null Count Dtype
2337+
--- ------ -------------- -----
2338+
0 column_1 1000000 non-null object
2339+
1 column_2 1000000 non-null object
2340+
2 column_3 1000000 non-null object
23352341
dtypes: object(3)
23362342
memory usage: 188.8 MB
23372343
"""
@@ -2350,6 +2356,7 @@ def info(
23502356
return
23512357

23522358
cols = self.columns
2359+
col_count = len(self.columns)
23532360

23542361
# hack
23552362
if max_cols is None:
@@ -2358,36 +2365,76 @@ def info(
23582365
max_rows = get_option("display.max_info_rows", len(self) + 1)
23592366

23602367
if null_counts is None:
2361-
show_counts = (len(self.columns) <= max_cols) and (len(self) < max_rows)
2368+
show_counts = (col_count <= max_cols) and (len(self) < max_rows)
23622369
else:
23632370
show_counts = null_counts
2364-
exceeds_info_cols = len(self.columns) > max_cols
2371+
exceeds_info_cols = col_count > max_cols
23652372

23662373
def _verbose_repr():
23672374
lines.append(f"Data columns (total {len(self.columns)} columns):")
2368-
space = max(len(pprint_thing(k)) for k in self.columns) + 4
2375+
2376+
id_head = " # "
2377+
column_head = "Column"
2378+
col_space = 2
2379+
2380+
max_col = max(len(pprint_thing(k)) for k in cols)
2381+
len_column = len(pprint_thing(column_head))
2382+
space = max(max_col, len_column) + col_space
2383+
2384+
max_id = len(pprint_thing(col_count))
2385+
len_id = len(pprint_thing(id_head))
2386+
space_num = max(max_id, len_id) + col_space
23692387
counts = None
23702388

2371-
tmpl = "{count}{dtype}"
2389+
header = _put_str(id_head, space_num) + _put_str(column_head, space)
23722390
if show_counts:
23732391
counts = self.count()
23742392
if len(cols) != len(counts): # pragma: no cover
23752393
raise AssertionError(
23762394
f"Columns must equal counts ({len(cols)} != {len(counts)})"
23772395
)
2378-
tmpl = "{count} non-null {dtype}"
2396+
count_header = "Non-Null Count"
2397+
len_count = len(count_header)
2398+
non_null = " non-null"
2399+
max_count = max(len(pprint_thing(k)) for k in counts) + len(non_null)
2400+
space_count = max(len_count, max_count) + col_space
2401+
count_temp = "{count}" + non_null
2402+
else:
2403+
count_header = ""
2404+
space_count = len(count_header)
2405+
len_count = space_count
2406+
count_temp = "{count}"
2407+
2408+
dtype_header = "Dtype"
2409+
len_dtype = len(dtype_header)
2410+
max_dtypes = max(len(pprint_thing(k)) for k in self.dtypes)
2411+
space_dtype = max(len_dtype, max_dtypes)
2412+
header += _put_str(count_header, space_count) + _put_str(
2413+
dtype_header, space_dtype
2414+
)
2415+
2416+
lines.append(header)
2417+
lines.append(
2418+
_put_str("-" * len_id, space_num)
2419+
+ _put_str("-" * len_column, space)
2420+
+ _put_str("-" * len_count, space_count)
2421+
+ _put_str("-" * len_dtype, space_dtype)
2422+
)
23792423

2380-
dtypes = self.dtypes
23812424
for i, col in enumerate(self.columns):
2382-
dtype = dtypes.iloc[i]
2425+
dtype = self.dtypes.iloc[i]
23832426
col = pprint_thing(col)
23842427

2428+
line_no = _put_str(" {num}".format(num=i), space_num)
23852429
count = ""
23862430
if show_counts:
23872431
count = counts.iloc[i]
23882432

23892433
lines.append(
2390-
_put_str(col, space) + tmpl.format(count=count, dtype=dtype)
2434+
line_no
2435+
+ _put_str(col, space)
2436+
+ _put_str(count_temp.format(count=count), space_count)
2437+
+ _put_str(dtype, space_dtype)
23912438
)
23922439

23932440
def _non_verbose_repr():

pandas/tests/frame/test_repr_info.py

+39-8
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,28 @@ def test_info(self, float_frame, datetime_frame):
205205
frame.info()
206206
frame.info(verbose=False)
207207

208+
def test_info_verbose(self):
209+
buf = StringIO()
210+
size = 1001
211+
start = 5
212+
frame = DataFrame(np.random.randn(3, size))
213+
frame.info(verbose=True, buf=buf)
214+
215+
res = buf.getvalue()
216+
header = " # Column Dtype \n--- ------ ----- "
217+
assert header in res
218+
219+
frame.info(verbose=True, buf=buf)
220+
buf.seek(0)
221+
lines = buf.readlines()
222+
assert len(lines) > 0
223+
224+
for i, line in enumerate(lines):
225+
if i >= start and i < start + size:
226+
index = i - start
227+
line_nr = " {} ".format(index)
228+
assert line.startswith(line_nr)
229+
208230
def test_info_memory(self):
209231
# https://github.com/pandas-dev/pandas/issues/21056
210232
df = pd.DataFrame({"a": pd.Series([1, 2], dtype="i8")})
@@ -218,7 +240,9 @@ def test_info_memory(self):
218240
<class 'pandas.core.frame.DataFrame'>
219241
RangeIndex: 2 entries, 0 to 1
220242
Data columns (total 1 columns):
221-
a 2 non-null int64
243+
# Column Non-Null Count Dtype
244+
--- ------ -------------- -----
245+
0 a 2 non-null int64
222246
dtypes: int64(1)
223247
memory usage: {} bytes
224248
""".format(
@@ -262,8 +286,8 @@ def test_info_duplicate_columns_shows_correct_dtypes(self):
262286
frame.info(buf=io)
263287
io.seek(0)
264288
lines = io.readlines()
265-
assert "a 1 non-null int64\n" == lines[3]
266-
assert "a 1 non-null float64\n" == lines[4]
289+
assert " 0 a 1 non-null int64 \n" == lines[5]
290+
assert " 1 a 1 non-null float64\n" == lines[6]
267291

268292
def test_info_shows_column_dtypes(self):
269293
dtypes = [
@@ -283,30 +307,37 @@ def test_info_shows_column_dtypes(self):
283307
buf = StringIO()
284308
df.info(buf=buf)
285309
res = buf.getvalue()
310+
header = (
311+
" # Column Non-Null Count Dtype \n"
312+
"--- ------ -------------- ----- "
313+
)
314+
assert header in res
286315
for i, dtype in enumerate(dtypes):
287-
name = "{i:d} {n:d} non-null {dtype}".format(i=i, n=n, dtype=dtype)
316+
name = " {i:d} {i:d} {n:d} non-null {dtype}".format(
317+
i=i, n=n, dtype=dtype
318+
)
288319
assert name in res
289320

290321
def test_info_max_cols(self):
291322
df = DataFrame(np.random.randn(10, 5))
292-
for len_, verbose in [(5, None), (5, False), (10, True)]:
323+
for len_, verbose in [(5, None), (5, False), (12, True)]:
293324
# For verbose always ^ setting ^ summarize ^ full output
294325
with option_context("max_info_columns", 4):
295326
buf = StringIO()
296327
df.info(buf=buf, verbose=verbose)
297328
res = buf.getvalue()
298329
assert len(res.strip().split("\n")) == len_
299330

300-
for len_, verbose in [(10, None), (5, False), (10, True)]:
331+
for len_, verbose in [(12, None), (5, False), (12, True)]:
301332

302-
# max_cols no exceeded
333+
# max_cols not exceeded
303334
with option_context("max_info_columns", 5):
304335
buf = StringIO()
305336
df.info(buf=buf, verbose=verbose)
306337
res = buf.getvalue()
307338
assert len(res.strip().split("\n")) == len_
308339

309-
for len_, max_cols in [(10, 5), (5, 4)]:
340+
for len_, max_cols in [(12, 5), (5, 4)]:
310341
# setting truncates
311342
with option_context("max_info_columns", 4):
312343
buf = StringIO()

0 commit comments

Comments
 (0)