From 1a86a5034814c7290681dc991bfe64314daeaa59 Mon Sep 17 00:00:00 2001
From: gcerri <guglielmo.cerri@cogentech.it>
Date: Thu, 8 Aug 2024 16:00:16 +0200
Subject: [PATCH 1/8] Add option to DataFrame.info for structured output

---
 pandas/core/frame.py      |  6 +++++-
 pandas/io/formats/info.py | 41 +++++++++++++++++++++++++++++++--------
 2 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index ea91046f4b8e4..9d7321138bd23 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3529,17 +3529,21 @@ def info(
         max_cols: int | None = None,
         memory_usage: bool | str | None = None,
         show_counts: bool | None = None,
+        return_dict: bool | None = None,
     ) -> None:
         info = DataFrameInfo(
             data=self,
             memory_usage=memory_usage,
         )
-        info.render(
+        info_return = info.render(
             buf=buf,
             max_cols=max_cols,
             verbose=verbose,
             show_counts=show_counts,
+            return_dict=return_dict,
         )
+        if return_dict:
+            return info_return
 
     def memory_usage(self, index: bool = True, deep: bool = False) -> Series:
         """
diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py
index 469dcfb76ba0b..706f8f08fd265 100644
--- a/pandas/io/formats/info.py
+++ b/pandas/io/formats/info.py
@@ -494,7 +494,28 @@ def non_null_counts(self) -> Series:
     def memory_usage_bytes(self) -> int:
         deep = self.memory_usage == "deep"
         return self.data.memory_usage(index=True, deep=deep).sum()
-
+    
+    def to_dict(self) -> dict:
+        """Return DataFrame info as a dictionary."""
+        return {
+            'Column summary': self._get_column_summary(),
+            'Memory usage': self.memory_usage_bytes,
+            'Index type': type(self.data.index).__name__,
+            'Index entries': len(self.data.index),
+        }
+
+    def _get_column_summary(self) -> list[dict]:
+        """Return a DataFrame summarizing columns."""
+        return [
+            {
+                '#': i,
+                'Column': col,
+                'Non-Null Count': self.data[col].notna().sum(),
+                'Dtype': self.data[col].dtype
+            }
+            for i, col in enumerate(self.ids)
+        ]
+    
     def render(
         self,
         *,
@@ -502,14 +523,18 @@ def render(
         max_cols: int | None,
         verbose: bool | None,
         show_counts: bool | None,
+        return_dict: bool | None,
     ) -> None:
-        printer = _DataFrameInfoPrinter(
-            info=self,
-            max_cols=max_cols,
-            verbose=verbose,
-            show_counts=show_counts,
-        )
-        printer.to_buffer(buf)
+        if return_dict:
+            return self.to_dict()
+        else:
+            printer = _DataFrameInfoPrinter(
+                info=self,
+                max_cols=max_cols,
+                verbose=verbose,
+                show_counts=show_counts,
+            )
+            printer.to_buffer(buf)
 
 
 class SeriesInfo(_BaseInfo):

From 0abc4f4cef756bff62bf0f793912a29832ead70b Mon Sep 17 00:00:00 2001
From: gcerri <guglielmo.cerri@cogentech.it>
Date: Fri, 9 Aug 2024 11:51:44 +0200
Subject: [PATCH 2/8] adding dash in the keys of df.info() as dictionary

---
 pandas/io/formats/info.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py
index 706f8f08fd265..c7642f0964ab6 100644
--- a/pandas/io/formats/info.py
+++ b/pandas/io/formats/info.py
@@ -498,10 +498,10 @@ def memory_usage_bytes(self) -> int:
     def to_dict(self) -> dict:
         """Return DataFrame info as a dictionary."""
         return {
-            'Column summary': self._get_column_summary(),
-            'Memory usage': self.memory_usage_bytes,
-            'Index type': type(self.data.index).__name__,
-            'Index entries': len(self.data.index),
+            'Column_summary': self._get_column_summary(),
+            'Memory_usage': self.memory_usage_bytes,
+            'Index_type': type(self.data.index).__name__,
+            'Index_entries': len(self.data.index),
         }
 
     def _get_column_summary(self) -> list[dict]:
@@ -510,7 +510,7 @@ def _get_column_summary(self) -> list[dict]:
             {
                 '#': i,
                 'Column': col,
-                'Non-Null Count': self.data[col].notna().sum(),
+                'Non-Null-Count': self.data[col].notna().sum(),
                 'Dtype': self.data[col].dtype
             }
             for i, col in enumerate(self.ids)

From dd48b41ac1363ead8bb389155031fa9552698b1f Mon Sep 17 00:00:00 2001
From: gcerri <guglielmo.cerri@cogentech.it>
Date: Fri, 9 Aug 2024 11:52:51 +0200
Subject: [PATCH 3/8] Add unit tests for info() with return_dict=True

---
 pandas/tests/frame/methods/test_info.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/pandas/tests/frame/methods/test_info.py b/pandas/tests/frame/methods/test_info.py
index a4319f8a8ae7f..4771f435cc8fa 100644
--- a/pandas/tests/frame/methods/test_info.py
+++ b/pandas/tests/frame/methods/test_info.py
@@ -569,3 +569,28 @@ def test_info_show_counts(row, columns, show_counts, result):
         with StringIO() as buf:
             df.info(buf=buf, show_counts=show_counts)
             assert ("non-null" in buf.getvalue()) is result
+
+@pytest.mark.parametrize(
+    "df", [
+        DataFrame({
+            'A': [1, 2, 3],
+            'B': [4, 5, 6]
+        }),
+        DataFrame({}),
+    ]
+)
+def test_info_return_dict(df):
+    result = df.info(return_dict=True)
+    expected_keys = {'Column_summary', 'Memory_usage', 'Index_type', 'Index_entries'}
+    assert isinstance(result, dict)
+    assert expected_keys.issubset(result.keys())
+
+    assert 'Column_summary' in result
+    assert 'Memory_usage' in result
+    assert 'Index_type' in result
+    assert 'Index_entries' in result
+    
+    assert isinstance(result['Column_summary'], list)
+    assert isinstance(result['Memory_usage'], np.int64)
+    assert isinstance(result['Index_type'], str)
+    assert isinstance(result['Index_entries'], int)
\ No newline at end of file

From 3453f874eb816473241436afa4a55e3a23fc4a48 Mon Sep 17 00:00:00 2001
From: gcerri <guglielmo.cerri@cogentech.it>
Date: Fri, 9 Aug 2024 12:28:45 +0200
Subject: [PATCH 4/8] Added return_dict of DataFrame.info into the doc

---
 doc/source/whatsnew/v3.0.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 32c98fbf9d655..f960a9f03476b 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -48,6 +48,7 @@ Other enhancements
 - :meth:`DataFrame.ewm` now allows ``adjust=False`` when ``times`` is provided (:issue:`54328`)
 - :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
 - :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`)
+- :meth:`DataFrame.info` now have a ``return_dict`` parameter (:issue:`#59387`)
 - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
 - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
 - Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)

From 35a5842c2f838c899fbf473bbd0a925dc745ae75 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 9 Aug 2024 12:25:57 +0000
Subject: [PATCH 5/8] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 pandas/io/formats/info.py               | 20 ++++++++--------
 pandas/tests/frame/methods/test_info.py | 31 ++++++++++++-------------
 2 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py
index c7642f0964ab6..cf6134f77e7b6 100644
--- a/pandas/io/formats/info.py
+++ b/pandas/io/formats/info.py
@@ -494,28 +494,28 @@ def non_null_counts(self) -> Series:
     def memory_usage_bytes(self) -> int:
         deep = self.memory_usage == "deep"
         return self.data.memory_usage(index=True, deep=deep).sum()
-    
+
     def to_dict(self) -> dict:
         """Return DataFrame info as a dictionary."""
         return {
-            'Column_summary': self._get_column_summary(),
-            'Memory_usage': self.memory_usage_bytes,
-            'Index_type': type(self.data.index).__name__,
-            'Index_entries': len(self.data.index),
+            "Column_summary": self._get_column_summary(),
+            "Memory_usage": self.memory_usage_bytes,
+            "Index_type": type(self.data.index).__name__,
+            "Index_entries": len(self.data.index),
         }
 
     def _get_column_summary(self) -> list[dict]:
         """Return a DataFrame summarizing columns."""
         return [
             {
-                '#': i,
-                'Column': col,
-                'Non-Null-Count': self.data[col].notna().sum(),
-                'Dtype': self.data[col].dtype
+                "#": i,
+                "Column": col,
+                "Non-Null-Count": self.data[col].notna().sum(),
+                "Dtype": self.data[col].dtype,
             }
             for i, col in enumerate(self.ids)
         ]
-    
+
     def render(
         self,
         *,
diff --git a/pandas/tests/frame/methods/test_info.py b/pandas/tests/frame/methods/test_info.py
index 4771f435cc8fa..bd7aa0e8f1f06 100644
--- a/pandas/tests/frame/methods/test_info.py
+++ b/pandas/tests/frame/methods/test_info.py
@@ -570,27 +570,26 @@ def test_info_show_counts(row, columns, show_counts, result):
             df.info(buf=buf, show_counts=show_counts)
             assert ("non-null" in buf.getvalue()) is result
 
+
 @pytest.mark.parametrize(
-    "df", [
-        DataFrame({
-            'A': [1, 2, 3],
-            'B': [4, 5, 6]
-        }),
+    "df",
+    [
+        DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
         DataFrame({}),
-    ]
+    ],
 )
 def test_info_return_dict(df):
     result = df.info(return_dict=True)
-    expected_keys = {'Column_summary', 'Memory_usage', 'Index_type', 'Index_entries'}
+    expected_keys = {"Column_summary", "Memory_usage", "Index_type", "Index_entries"}
     assert isinstance(result, dict)
     assert expected_keys.issubset(result.keys())
 
-    assert 'Column_summary' in result
-    assert 'Memory_usage' in result
-    assert 'Index_type' in result
-    assert 'Index_entries' in result
-    
-    assert isinstance(result['Column_summary'], list)
-    assert isinstance(result['Memory_usage'], np.int64)
-    assert isinstance(result['Index_type'], str)
-    assert isinstance(result['Index_entries'], int)
\ No newline at end of file
+    assert "Column_summary" in result
+    assert "Memory_usage" in result
+    assert "Index_type" in result
+    assert "Index_entries" in result
+
+    assert isinstance(result["Column_summary"], list)
+    assert isinstance(result["Memory_usage"], np.int64)
+    assert isinstance(result["Index_type"], str)
+    assert isinstance(result["Index_entries"], int)

From 992d57963a0f9eed43fe3787cb0f94d78284d583 Mon Sep 17 00:00:00 2001
From: gcerri <guglielmo.cerri@cogentech.it>
Date: Fri, 9 Aug 2024 15:07:10 +0200
Subject: [PATCH 6/8] Adding doc and fix typing

---
 pandas/io/formats/info.py | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py
index cf6134f77e7b6..092fb26f6ee48 100644
--- a/pandas/io/formats/info.py
+++ b/pandas/io/formats/info.py
@@ -54,6 +54,15 @@
 )
 
 
+return_dict_sub = dedent(
+    """\
+    return_dict : bool, optional
+        Whether to return the summary as a dictionary. If True, the method
+        returns a dictionary containing information about the DataFrame.
+        If False, the summary is printed and None is returned."""
+)
+
+
 frame_examples_sub = dedent(
     """\
     >>> int_values = [1, 2, 3, 4, 5]
@@ -136,7 +145,11 @@
      1   column_2  1000000 non-null  object
      2   column_3  1000000 non-null  object
     dtypes: object(3)
-    memory usage: 165.9 MB"""
+    memory usage: 165.9 MB
+    
+    >>> info_dict = df.info(return_dict=True)
+    >>> print(info_dict)
+    {'Column_summary': '...', 'Memory_usage': 24000128, 'Index_type': 'RangeIndex', 'Index_entries': 1000000}"""
 )
 
 
@@ -153,6 +166,7 @@
     "type_sub": " and columns",
     "max_cols_sub": frame_max_cols_sub,
     "show_counts_sub": show_counts_sub,
+    "return_dict_sub": return_dict_sub,
     "examples_sub": frame_examples_sub,
     "see_also_sub": frame_see_also_sub,
     "version_added_sub": "",
@@ -233,6 +247,7 @@
     "type_sub": "",
     "max_cols_sub": "",
     "show_counts_sub": show_counts_sub,
+    "return_dict_sub": return_dict_sub,
     "examples_sub": series_examples_sub,
     "see_also_sub": series_see_also_sub,
     "version_added_sub": "\n.. versionadded:: 1.4.0\n",
@@ -273,11 +288,13 @@
         :ref:`Frequently Asked Questions <df-memory-usage>` for more
         details.
     {show_counts_sub}
-
+    {return_dict_sub}
+    
     Returns
     -------
-    None
-        This method prints a summary of a {klass} and returns None.
+    dict or None
+        If return_dict is True, returns a dictionary summarizing the {klass}.
+        Otherwise, returns None.
 
     See Also
     --------
@@ -435,7 +452,7 @@ def render(
         max_cols: int | None,
         verbose: bool | None,
         show_counts: bool | None,
-    ) -> None:
+    ) -> None | dict:
         pass
 
 
@@ -524,7 +541,7 @@ def render(
         verbose: bool | None,
         show_counts: bool | None,
         return_dict: bool | None,
-    ) -> None:
+    ) -> None | dict:
         if return_dict:
             return self.to_dict()
         else:

From f388827d5a9d04f3b0746f8f91b9bc8027ef42bb Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 9 Aug 2024 13:12:46 +0000
Subject: [PATCH 7/8] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 pandas/io/formats/info.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py
index 092fb26f6ee48..8c8cc145194ef 100644
--- a/pandas/io/formats/info.py
+++ b/pandas/io/formats/info.py
@@ -146,7 +146,7 @@
      2   column_3  1000000 non-null  object
     dtypes: object(3)
     memory usage: 165.9 MB
-    
+
     >>> info_dict = df.info(return_dict=True)
     >>> print(info_dict)
     {'Column_summary': '...', 'Memory_usage': 24000128, 'Index_type': 'RangeIndex', 'Index_entries': 1000000}"""
@@ -289,7 +289,7 @@
         details.
     {show_counts_sub}
     {return_dict_sub}
-    
+
     Returns
     -------
     dict or None

From 765d1da507a6507ea30086f1d7ea0b9ed1492b85 Mon Sep 17 00:00:00 2001
From: gcerri <guglielmo.cerri@cogentech.it>
Date: Fri, 9 Aug 2024 15:23:30 +0200
Subject: [PATCH 8/8] Fix line length issue in doctest example

---
 pandas/io/formats/info.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py
index 8c8cc145194ef..8629eb0ff6368 100644
--- a/pandas/io/formats/info.py
+++ b/pandas/io/formats/info.py
@@ -149,7 +149,8 @@
 
     >>> info_dict = df.info(return_dict=True)
     >>> print(info_dict)
-    {'Column_summary': '...', 'Memory_usage': 24000128, 'Index_type': 'RangeIndex', 'Index_entries': 1000000}"""
+    {'Column_summary': '...', 'Memory_usage': 24000128,
+    'Index_type': 'RangeIndex', 'Index_entries': 1000000}"""
 )