pandas-dev · jreback · Jan 3, 2020 · Oct 8, 2019 · Oct 9, 2019 · Oct 9, 2019
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -50,6 +50,8 @@ including other versions of pandas.
 Enhancements
 ~~~~~~~~~~~~
 
+- :meth:`Dataframe.info` now shows line numbers for the columns summary (:issue:`17304`)
+
 .. _whatsnew_100.string:
 
 Dedicated string data type

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2343,9 +2343,11 @@ def info(
         <class 'pandas.core.frame.DataFrame'>
         RangeIndex: 5 entries, 0 to 4
         Data columns (total 3 columns):
-        int_col      5 non-null int64
-        text_col     5 non-null object
-        float_col    5 non-null float64
+         #   Column     Non-Null Count  Dtype
+        ---  ------     --------------  -----
+         0   int_col    5 non-null      int64
+         1   text_col   5 non-null      object
+         2   float_col  5 non-null      float64
         dtypes: float64(1), int64(1), object(1)
         memory usage: 248.0+ bytes
 
@@ -2384,19 +2386,23 @@ def info(
         <class 'pandas.core.frame.DataFrame'>
         RangeIndex: 1000000 entries, 0 to 999999
         Data columns (total 3 columns):
-        column_1    1000000 non-null object
-        column_2    1000000 non-null object
-        column_3    1000000 non-null object
+         #   Column    Non-Null Count    Dtype
+        ---  ------    --------------    -----
+         0   column_1  1000000 non-null  object
+         1   column_2  1000000 non-null  object
+         2   column_3  1000000 non-null  object
         dtypes: object(3)
         memory usage: 22.9+ MB
 
         >>> df.info(memory_usage='deep')
         <class 'pandas.core.frame.DataFrame'>
         RangeIndex: 1000000 entries, 0 to 999999
         Data columns (total 3 columns):
-        column_1    1000000 non-null object
-        column_2    1000000 non-null object
-        column_3    1000000 non-null object
+         #   Column    Non-Null Count    Dtype
+        ---  ------    --------------    -----
+         0   column_1  1000000 non-null  object
+         1   column_2  1000000 non-null  object
+         2   column_3  1000000 non-null  object
         dtypes: object(3)
         memory usage: 188.8 MB
         """
@@ -2415,6 +2421,7 @@ def info(
             return
 
         cols = self.columns
+        col_count = len(self.columns)
 
         # hack
         if max_cols is None:
@@ -2423,17 +2430,28 @@ def info(
         max_rows = get_option("display.max_info_rows", len(self) + 1)
 
         if null_counts is None:
-            show_counts = (len(self.columns) <= max_cols) and (len(self) < max_rows)
+            show_counts = (col_count <= max_cols) and (len(self) < max_rows)
         else:
             show_counts = null_counts
-        exceeds_info_cols = len(self.columns) > max_cols
+        exceeds_info_cols = col_count > max_cols
 
         def _verbose_repr():
             lines.append("Data columns (total %d columns):" % len(self.columns))
-            space = max(len(pprint_thing(k)) for k in self.columns) + 4
+
+            id_head = " # "
+            column_head = "Column"
+            col_space = 2
+
+            max_col = max(len(pprint_thing(k)) for k in cols)
+            len_column = len(pprint_thing(column_head))
+            space = max(max_col, len_column) + col_space
+
+            max_id = len(pprint_thing(col_count))
+            len_id = len(pprint_thing(id_head))
+            space_num = max(max_id, len_id) + col_space
             counts = None
 
-            tmpl = "{count}{dtype}"
+            header = _put_str(id_head, space_num) + _put_str(column_head, space)
             if show_counts:
                 counts = self.count()
                 if len(cols) != len(counts):  # pragma: no cover
@@ -2443,19 +2461,48 @@ def _verbose_repr():
                             cols=len(cols), counts=len(counts)
                         )
                     )
-                tmpl = "{count} non-null {dtype}"
+                count_header = "Non-Null Count"
+                len_count = len(count_header)
+                non_null = " non-null"
+                max_count = max(len(pprint_thing(k)) for k in counts) + len(non_null)
+                space_count = max(len_count, max_count) + col_space
+                count_temp = "{count}" + non_null
+            else:
+                count_header = ""
+                space_count = len(count_header)
+                len_count = space_count
+                count_temp = "{count}"
+
+            dtype_header = "Dtype"
+            len_dtype = len(dtype_header)
+            max_dtypes = max(len(pprint_thing(k)) for k in self.dtypes)
+            space_dtype = max(len_dtype, max_dtypes)
+            header += _put_str(count_header, space_count) + _put_str(
+                dtype_header, space_dtype
+            )
+
+            lines.append(header)
+            lines.append(
+                _put_str("-" * len_id, space_num)
+                + _put_str("-" * len_column, space)
+                + _put_str("-" * len_count, space_count)
+                + _put_str("-" * len_dtype, space_dtype)
+            )
 
-            dtypes = self.dtypes
             for i, col in enumerate(self.columns):
-                dtype = dtypes.iloc[i]
+                dtype = self.dtypes.iloc[i]
                 col = pprint_thing(col)
 
+                line_no = _put_str(" {num}".format(num=i), space_num)
                 count = ""
                 if show_counts:
                     count = counts.iloc[i]
 
                 lines.append(
-                    _put_str(col, space) + tmpl.format(count=count, dtype=dtype)
+                    line_no
+                    + _put_str(col, space)
+                    + _put_str(count_temp.format(count=count), space_count)
+                    + _put_str(dtype, space_dtype)
                 )
 
         def _non_verbose_repr():

diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py
@@ -206,6 +206,28 @@ def test_info(self):
         frame.info()
         frame.info(verbose=False)
 
+    def test_info_verbose(self):
+        buf = StringIO()
+        size = 1001
+        start = 5
+        frame = DataFrame(np.random.randn(3, size))
+        frame.info(verbose=True, buf=buf)
+
+        res = buf.getvalue()
+        header = " #    Column  Dtype  \n" "---   ------  -----  "
+        assert header in res
+
+        frame.info(verbose=True, buf=buf)
+        buf.seek(0)
+        lines = buf.readlines()
+        assert len(lines) > 0
+
+        for i, line in enumerate(lines):
+            if i >= start and i < start + size:
+                index = i - start
+                line_nr = " {} ".format(index)
+                assert line.startswith(line_nr)
+
     def test_info_memory(self):
         # https://github.com/pandas-dev/pandas/issues/21056
         df = pd.DataFrame({"a": pd.Series([1, 2], dtype="i8")})
@@ -219,7 +241,9 @@ def test_info_memory(self):
         <class 'pandas.core.frame.DataFrame'>
         RangeIndex: 2 entries, 0 to 1
         Data columns (total 1 columns):
-        a    2 non-null int64
+         #   Column  Non-Null Count  Dtype
+        ---  ------  --------------  -----
+         0   a       2 non-null      int64
         dtypes: int64(1)
         memory usage: {} bytes
         """.format(
@@ -263,8 +287,8 @@ def test_info_duplicate_columns_shows_correct_dtypes(self):
         frame.info(buf=io)
         io.seek(0)
         lines = io.readlines()
-        assert "a    1 non-null int64\n" == lines[3]
-        assert "a    1 non-null float64\n" == lines[4]
+        assert " 0   a       1 non-null      int64  \n" == lines[5]
+        assert " 1   a       1 non-null      float64\n" == lines[6]
 
     def test_info_shows_column_dtypes(self):
         dtypes = [
@@ -284,30 +308,37 @@ def test_info_shows_column_dtypes(self):
         buf = StringIO()
         df.info(buf=buf)
         res = buf.getvalue()
+        header = (
+            " #   Column  Non-Null Count  Dtype          \n"
+            "---  ------  --------------  -----          "
+        )
+        assert header in res
         for i, dtype in enumerate(dtypes):
-            name = "{i:d}    {n:d} non-null {dtype}".format(i=i, n=n, dtype=dtype)
+            name = " {i:d}   {i:d}       {n:d} non-null     {dtype}".format(
+                i=i, n=n, dtype=dtype
+            )
             assert name in res
 
     def test_info_max_cols(self):
         df = DataFrame(np.random.randn(10, 5))
-        for len_, verbose in [(5, None), (5, False), (10, True)]:
+        for len_, verbose in [(5, None), (5, False), (12, True)]:
             # For verbose always      ^ setting  ^ summarize ^ full output
             with option_context("max_info_columns", 4):
                 buf = StringIO()
                 df.info(buf=buf, verbose=verbose)
                 res = buf.getvalue()
                 assert len(res.strip().split("\n")) == len_
 
-        for len_, verbose in [(10, None), (5, False), (10, True)]:
+        for len_, verbose in [(12, None), (5, False), (12, True)]:
 
-            # max_cols no exceeded
+            # max_cols not exceeded
             with option_context("max_info_columns", 5):
                 buf = StringIO()
                 df.info(buf=buf, verbose=verbose)
                 res = buf.getvalue()
                 assert len(res.strip().split("\n")) == len_
 
-        for len_, max_cols in [(10, 5), (5, 4)]:
+        for len_, max_cols in [(12, 5), (5, 4)]:
             # setting truncates
             with option_context("max_info_columns", 4):
                 buf = StringIO()