diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index eb4b72d01d59a..17efbbcc927d3 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -294,6 +294,34 @@ ExtensionArray - Bug in :class:`arrays.PandasArray` when setting a scalar string (:issue:`28118`, :issue:`28150`). - +Output Formatting Enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- :func:`DataFrame.info` now shows line numbers for the columns summary (:issue:`17304`) + +.. ipython:: python + + df = pd.DataFrame({ + 'int_col': [1, 2, 3, 4, 5], + 'text_col': ['alpha', 'beta', 'gamma', 'delta', 'epsilon'], + 'float_col': [0.0, 0.25, 0.5, 0.75, 1.0]}) + df.info() + +Previous Behavior: + +.. code-block:: python + + In [1]: df.info() + + RangeIndex: 5 entries, 0 to 4 + Data columns (total 3 columns): + int_col 5 non-null int64 + text_col 5 non-null object + float_col 5 non-null float64 + dtypes: float64(1), int64(1), object(1) + memory usage: 200.0+ bytes + + Other ^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 16f34fee5e1ff..b68188d9c1137 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2332,9 +2332,11 @@ def info( RangeIndex: 5 entries, 0 to 4 Data columns (total 3 columns): - int_col 5 non-null int64 - text_col 5 non-null object - float_col 5 non-null float64 + #. Column Non-Null Count & Dtype + --- ------ ---------------------- + 0 int_col 5 non-null int64 + 1 text_col 5 non-null object + 2 float_col 5 non-null float64 dtypes: float64(1), int64(1), object(1) memory usage: 248.0+ bytes @@ -2373,9 +2375,11 @@ def info( RangeIndex: 1000000 entries, 0 to 999999 Data columns (total 3 columns): - column_1 1000000 non-null object - column_2 1000000 non-null object - column_3 1000000 non-null object + #. Column Non-Null Count & Dtype + --- ------ ---------------------- + 0 column_1 1000000 non-null object + 1 column_2 1000000 non-null object + 2 column_3 1000000 non-null object dtypes: object(3) memory usage: 22.9+ MB @@ -2383,9 +2387,11 @@ def info( RangeIndex: 1000000 entries, 0 to 999999 Data columns (total 3 columns): - column_1 1000000 non-null object - column_2 1000000 non-null object - column_3 1000000 non-null object + #. Column Non-Null Count & Dtype + --- ------ ---------------------- + 0 column_1 1000000 non-null object + 1 column_2 1000000 non-null object + 2 column_3 1000000 non-null object dtypes: object(3) memory usage: 188.8 MB """ @@ -2404,48 +2410,61 @@ def info( return cols = self.columns + cols_count = len(cols) # hack if max_cols is None: - max_cols = get_option("display.max_info_columns", len(self.columns) + 1) + max_cols = get_option('display.max_info_columns', cols_count + 1) max_rows = get_option("display.max_info_rows", len(self) + 1) if null_counts is None: - show_counts = (len(self.columns) <= max_cols) and (len(self) < max_rows) + show_counts = (cols_count <= max_cols) and (len(self) < max_rows) else: show_counts = null_counts - exceeds_info_cols = len(self.columns) > max_cols + exceeds_info_cols = cols_count > max_cols def _verbose_repr(): - lines.append("Data columns (total %d columns):" % len(self.columns)) - space = max(len(pprint_thing(k)) for k in self.columns) + 4 + lines.append('Data columns (total ' + '{count} columns):'.format(count=cols_count)) + space = max(len(pprint_thing(k)) for k in cols) + len_column = len(pprint_thing('Column')) + space = max(space, len_column) + 4 + space_num = len(pprint_thing(cols_count)) + len_id = len(pprint_thing(' #.')) + space_num = max(space_num, len_id) + 2 counts = None - tmpl = "{count}{dtype}" + header = _put_str(' #.', space_num) + _put_str('Column', space) if show_counts: counts = self.count() if len(cols) != len(counts): # pragma: no cover raise AssertionError( - "Columns must equal counts " - "({cols:d} != {counts:d})".format( - cols=len(cols), counts=len(counts) - ) - ) - tmpl = "{count} non-null {dtype}" - + '({cols_count} != {count})'.format( + cols_count=cols_count, count=len(counts))) + col_header = 'Non-Null Count & Dtype' + tmpl = '{count} non-null {dtype}' + else: + col_header = 'Dtype' + tmpl = '{count}{dtype}' + header += col_header + + lines.append(header) + lines.append(_put_str('-' * len_id, space_num) + + _put_str('-' * len_column, space) + + '-' * len(pprint_thing(col_header))) dtypes = self.dtypes - for i, col in enumerate(self.columns): + for i, col in enumerate(cols): dtype = dtypes.iloc[i] col = pprint_thing(col) + line_no = _put_str(' {num}'.format(num=i), space_num) count = "" if show_counts: count = counts.iloc[i] - lines.append( - _put_str(col, space) + tmpl.format(count=count, dtype=dtype) - ) + lines.append(line_no + _put_str(col, space) + + tmpl.format(count=count, dtype=dtype)) def _non_verbose_repr(): lines.append(self.columns._summary(name="Columns")) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 48f42b5f101ce..5c51fb4f2bead 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -219,7 +219,9 @@ def test_info_memory(self): RangeIndex: 2 entries, 0 to 1 Data columns (total 1 columns): - a 2 non-null int64 + #. Column Non-Null Count & Dtype + --- ------ ---------------------- + 0 a 2 non-null int64 dtypes: int64(1) memory usage: {} bytes """.format( @@ -229,6 +231,26 @@ def test_info_memory(self): assert result == expected + def test_info_without_null_counts(self): + df = pd.DataFrame({'a': [1, 2]}) + buf = StringIO() + df.info(buf=buf, null_counts=False) + buf.seek(0) + lines = buf.readlines() + result = ''.join(lines[:-1]) + expected = textwrap.dedent('''\ + + RangeIndex: 2 entries, 0 to 1 + Data columns (total 1 columns): + #. Column Dtype + --- ------ ----- + 0 a int64 + dtypes: int64(1) + ''') + assert result == expected + + + def test_info_wide(self): from pandas import set_option, reset_option @@ -263,8 +285,8 @@ def test_info_duplicate_columns_shows_correct_dtypes(self): frame.info(buf=io) io.seek(0) lines = io.readlines() - assert "a 1 non-null int64\n" == lines[3] - assert "a 1 non-null float64\n" == lines[4] + assert ' 0 a 1 non-null int64\n' == lines[5] + assert ' 1 a 1 non-null float64\n' == lines[6] def test_info_shows_column_dtypes(self): dtypes = [ @@ -285,12 +307,12 @@ def test_info_shows_column_dtypes(self): df.info(buf=buf) res = buf.getvalue() for i, dtype in enumerate(dtypes): - name = "{i:d} {n:d} non-null {dtype}".format(i=i, n=n, dtype=dtype) + name = '%s %d non-null %s' % (i, n, dtype) assert name in res def test_info_max_cols(self): df = DataFrame(np.random.randn(10, 5)) - for len_, verbose in [(5, None), (5, False), (10, True)]: + for len_, verbose in [(5, None), (5, False), (12, True)]: # For verbose always ^ setting ^ summarize ^ full output with option_context("max_info_columns", 4): buf = StringIO() @@ -298,7 +320,7 @@ def test_info_max_cols(self): res = buf.getvalue() assert len(res.strip().split("\n")) == len_ - for len_, verbose in [(10, None), (5, False), (10, True)]: + for len_, verbose in [(10, None), (5, False), (12, True)]: # max_cols no exceeded with option_context("max_info_columns", 5): @@ -307,7 +329,7 @@ def test_info_max_cols(self): res = buf.getvalue() assert len(res.strip().split("\n")) == len_ - for len_, max_cols in [(10, 5), (5, 4)]: + for len_, max_cols in [(12, 5), (5, 4)]: # setting truncates with option_context("max_info_columns", 4): buf = StringIO()