pandas-dev · Alexandreae · Sep 30, 2019
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -294,6 +294,34 @@ ExtensionArray
 - Bug in :class:`arrays.PandasArray` when setting a scalar string (:issue:`28118`, :issue:`28150`).
 -
 
+Output Formatting Enhancements
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- :func:`DataFrame.info` now shows line numbers for the columns summary (:issue:`17304`)
+
+.. ipython:: python
+
+    df = pd.DataFrame({
+            'int_col': [1, 2, 3, 4, 5],
+            'text_col': ['alpha', 'beta', 'gamma', 'delta', 'epsilon'],
+            'float_col': [0.0, 0.25, 0.5, 0.75, 1.0]})
+    df.info()
+
+Previous Behavior:
+
+.. code-block:: python
+
+    In [1]: df.info()
+    <class 'pandas.core.frame.DataFrame'>
+    RangeIndex: 5 entries, 0 to 4
+    Data columns (total 3 columns):
+    int_col      5 non-null int64
+    text_col     5 non-null object
+    float_col    5 non-null float64
+    dtypes: float64(1), int64(1), object(1)
+    memory usage: 200.0+ bytes
+
+
 
 Other
 ^^^^^

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2332,9 +2332,11 @@ def info(
         <class 'pandas.core.frame.DataFrame'>
         RangeIndex: 5 entries, 0 to 4
         Data columns (total 3 columns):
-        int_col      5 non-null int64
-        text_col     5 non-null object
-        float_col    5 non-null float64
+         #.  Column       Non-Null Count & Dtype
+        ---  ------       ----------------------
+         0   int_col      5 non-null int64
+         1   text_col     5 non-null object
+         2   float_col    5 non-null float64
         dtypes: float64(1), int64(1), object(1)
         memory usage: 248.0+ bytes
 
@@ -2373,19 +2375,23 @@ def info(
         <class 'pandas.core.frame.DataFrame'>
         RangeIndex: 1000000 entries, 0 to 999999
         Data columns (total 3 columns):
-        column_1    1000000 non-null object
-        column_2    1000000 non-null object
-        column_3    1000000 non-null object
+         #.  Column      Non-Null Count & Dtype
+        ---  ------      ----------------------
+         0   column_1    1000000 non-null object
+         1   column_2    1000000 non-null object
+         2   column_3    1000000 non-null object
         dtypes: object(3)
         memory usage: 22.9+ MB
 
         >>> df.info(memory_usage='deep')
         <class 'pandas.core.frame.DataFrame'>
         RangeIndex: 1000000 entries, 0 to 999999
         Data columns (total 3 columns):
-        column_1    1000000 non-null object
-        column_2    1000000 non-null object
-        column_3    1000000 non-null object
+         #.  Column      Non-Null Count & Dtype
+        ---  ------      ----------------------
+         0   column_1    1000000 non-null object
+         1   column_2    1000000 non-null object
+         2   column_3    1000000 non-null object
         dtypes: object(3)
         memory usage: 188.8 MB
         """
@@ -2404,48 +2410,61 @@ def info(
             return
 
         cols = self.columns
+        cols_count = len(cols)
 
         # hack
         if max_cols is None:
-            max_cols = get_option("display.max_info_columns", len(self.columns) + 1)
+            max_cols = get_option('display.max_info_columns', cols_count + 1)
 
         max_rows = get_option("display.max_info_rows", len(self) + 1)
 
         if null_counts is None:
-            show_counts = (len(self.columns) <= max_cols) and (len(self) < max_rows)
+            show_counts = (cols_count <= max_cols) and (len(self) < max_rows)
         else:
             show_counts = null_counts
-        exceeds_info_cols = len(self.columns) > max_cols
+        exceeds_info_cols = cols_count > max_cols
 
         def _verbose_repr():
-            lines.append("Data columns (total %d columns):" % len(self.columns))
-            space = max(len(pprint_thing(k)) for k in self.columns) + 4
+            lines.append('Data columns (total '
+                         '{count} columns):'.format(count=cols_count))
+            space = max(len(pprint_thing(k)) for k in cols)
+            len_column = len(pprint_thing('Column'))
+            space = max(space, len_column) + 4
+            space_num = len(pprint_thing(cols_count))
+            len_id = len(pprint_thing(' #.'))
+            space_num = max(space_num, len_id) + 2
             counts = None
 
-            tmpl = "{count}{dtype}"
+            header = _put_str(' #.', space_num) + _put_str('Column', space)
             if show_counts:
                 counts = self.count()
                 if len(cols) != len(counts):  # pragma: no cover
                     raise AssertionError(
-                        "Columns must equal counts "
-                        "({cols:d} != {counts:d})".format(
-                            cols=len(cols), counts=len(counts)
-                        )
-                    )
-                tmpl = "{count} non-null {dtype}"
-
+                        '({cols_count} != {count})'.format(
+                            cols_count=cols_count, count=len(counts)))
+                col_header = 'Non-Null Count & Dtype'
+                tmpl = '{count} non-null {dtype}'
+            else:
+                col_header = 'Dtype'
+                tmpl = '{count}{dtype}'
+            header += col_header
+
+            lines.append(header)
+            lines.append(_put_str('-' * len_id, space_num) +
+                         _put_str('-' * len_column, space) +
+                         '-' * len(pprint_thing(col_header)))
             dtypes = self.dtypes
-            for i, col in enumerate(self.columns):
+            for i, col in enumerate(cols):
                 dtype = dtypes.iloc[i]
                 col = pprint_thing(col)
 
+                line_no = _put_str(' {num}'.format(num=i), space_num)
                 count = ""
                 if show_counts:
                     count = counts.iloc[i]
 
-                lines.append(
-                    _put_str(col, space) + tmpl.format(count=count, dtype=dtype)
-                )
+                lines.append(line_no + _put_str(col, space) +
+                             tmpl.format(count=count, dtype=dtype))
 
         def _non_verbose_repr():
             lines.append(self.columns._summary(name="Columns"))

diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py
@@ -219,7 +219,9 @@ def test_info_memory(self):
         <class 'pandas.core.frame.DataFrame'>
         RangeIndex: 2 entries, 0 to 1
         Data columns (total 1 columns):
-        a    2 non-null int64
+         #.  Column    Non-Null Count & Dtype
+        ---  ------    ----------------------
+         0   a         2 non-null int64
         dtypes: int64(1)
         memory usage: {} bytes
         """.format(
@@ -229,6 +231,26 @@ def test_info_memory(self):
 
         assert result == expected
 
+    def test_info_without_null_counts(self):
+        df = pd.DataFrame({'a': [1, 2]})
+        buf = StringIO()
+        df.info(buf=buf, null_counts=False)
+        buf.seek(0)
+        lines = buf.readlines()
+        result = ''.join(lines[:-1])
+        expected = textwrap.dedent('''\
+        <class 'pandas.core.frame.DataFrame'>
+        RangeIndex: 2 entries, 0 to 1
+        Data columns (total 1 columns):
+         #.  Column    Dtype
+        ---  ------    -----
+         0   a         int64
+        dtypes: int64(1)
+        ''')
+        assert result == expected
+
+
+
     def test_info_wide(self):
         from pandas import set_option, reset_option
 
@@ -263,8 +285,8 @@ def test_info_duplicate_columns_shows_correct_dtypes(self):
         frame.info(buf=io)
         io.seek(0)
         lines = io.readlines()
-        assert "a    1 non-null int64\n" == lines[3]
-        assert "a    1 non-null float64\n" == lines[4]
+        assert ' 0   a         1 non-null int64\n' == lines[5]
+        assert ' 1   a         1 non-null float64\n' == lines[6]
 
     def test_info_shows_column_dtypes(self):
         dtypes = [
@@ -285,20 +307,20 @@ def test_info_shows_column_dtypes(self):
         df.info(buf=buf)
         res = buf.getvalue()
         for i, dtype in enumerate(dtypes):
-            name = "{i:d}    {n:d} non-null {dtype}".format(i=i, n=n, dtype=dtype)
+            name = '%s         %d non-null %s' % (i, n, dtype)
             assert name in res
 
     def test_info_max_cols(self):
         df = DataFrame(np.random.randn(10, 5))
-        for len_, verbose in [(5, None), (5, False), (10, True)]:
+        for len_, verbose in [(5, None), (5, False), (12, True)]:
             # For verbose always      ^ setting  ^ summarize ^ full output
             with option_context("max_info_columns", 4):
                 buf = StringIO()
                 df.info(buf=buf, verbose=verbose)
                 res = buf.getvalue()
                 assert len(res.strip().split("\n")) == len_
 
-        for len_, verbose in [(10, None), (5, False), (10, True)]:
+        for len_, verbose in [(10, None), (5, False), (12, True)]:
 
             # max_cols no exceeded
             with option_context("max_info_columns", 5):
@@ -307,7 +329,7 @@ def test_info_max_cols(self):
                 res = buf.getvalue()
                 assert len(res.strip().split("\n")) == len_
 
-        for len_, max_cols in [(10, 5), (5, 4)]:
+        for len_, max_cols in [(12, 5), (5, 4)]:
             # setting truncates
             with option_context("max_info_columns", 4):
                 buf = StringIO()