From b45f14afd179914f4433212f358ff376f59b7db7 Mon Sep 17 00:00:00 2001
From: David Pires <david.c.pires@tecnico.ulisboa.pt>
Date: Sat, 8 Jun 2024 16:03:11 +0000
Subject: [PATCH] ENH: Write excel comments, via styler.to_excel() tooltips
 (#58070)

Co-Authored-By: diogomsmiranda <diogomsmiranda@tecnico.ulisboa.pt>
---
 doc/source/whatsnew/v3.0.0.rst        |   1 +
 pandas/core/generic.py                |   9 +-
 pandas/io/excel/_base.py              |   3 +
 pandas/io/excel/_odswriter.py         |  11 ++
 pandas/io/excel/_openpyxl.py          |  23 ++++
 pandas/io/excel/_xlsxwriter.py        |  20 +++
 pandas/io/formats/excel.py            |   4 +
 pandas/io/formats/style.py            |  14 +++
 pandas/tests/io/excel/test_writers.py | 170 ++++++++++++++++++++++++++
 9 files changed, 252 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 639655ab28199..df37e7d1c5d47 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -40,6 +40,7 @@ Other enhancements
 - :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
 - :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`)
 - :func:`DataFrame.to_excel` argument ``merge_cells`` now accepts a value of ``"columns"`` to only merge :class:`MultiIndex` column header header cells (:issue:`35384`)
+- :func:`DataFrame.to_excel` now supports writing notes to an excel files via :meth:`Styler.set_tooltips` (:issue:`58070`)
 - :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`)
 - :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
 - :meth:`DataFrame.ewm` now allows ``adjust=False`` when ``times`` is provided (:issue:`54328`)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index fc9821a65777d..fd4d153874da0 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2126,10 +2126,11 @@ def _repr_data_resource_(self):
         storage_options_versionadded="1.2.0",
         extra_parameters=textwrap.dedent(
             """\
-        engine_kwargs : dict, optional
-            Arbitrary keyword arguments passed to excel engine.
-    """
+            engine_kwargs : dict, optional
+                Arbitrary keyword arguments passed to excel engine.
+            """
         ),
+        extra_examples="",
     )
     def to_excel(
         self,
@@ -2261,6 +2262,8 @@ def to_excel(
         automatically chosen depending on the file extension):
 
         >>> df1.to_excel("output1.xlsx", engine="xlsxwriter")  # doctest: +SKIP
+        {extra_examples}
+        End of examples.
         """
         if engine_kwargs is None:
             engine_kwargs = {}
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index f83f9cb1c8d74..4ddd46dbea0e5 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -1212,6 +1212,7 @@ def _write_cells(
         startrow: int = 0,
         startcol: int = 0,
         freeze_panes: tuple[int, int] | None = None,
+        notes: DataFrame | None = None,
     ) -> None:
         """
         Write given formatted cells into Excel an excel sheet
@@ -1220,6 +1221,8 @@ def _write_cells(
         ----------
         cells : generator
             cell of formatted data to save to Excel sheet
+        notes: DataFrame
+            DataFrame containing notes to be written to the Excel sheet
         sheet_name : str, default None
             Name of Excel sheet, if None, then use self.cur_sheet
         startrow : upper left cell row to dump data frame
diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py
index 0ddb59d3413ff..c89a17b5e2431 100644
--- a/pandas/io/excel/_odswriter.py
+++ b/pandas/io/excel/_odswriter.py
@@ -27,6 +27,8 @@
         WriteExcelBuffer,
     )
 
+    from pandas.core.frame import DataFrame
+
     from pandas.io.formats.excel import ExcelCell
 
 
@@ -99,6 +101,7 @@ def _write_cells(
         startrow: int = 0,
         startcol: int = 0,
         freeze_panes: tuple[int, int] | None = None,
+        notes: DataFrame | None = None,
     ) -> None:
         """
         Write the frame cells using odf
@@ -110,6 +113,14 @@ def _write_cells(
         )
         from odf.text import P
 
+        if notes is not None:
+            raise NotImplementedError(
+                """
+                Notes are not supported by the odswriter engine,
+                see https://github.com/eea/odfpy
+                """
+            )
+
         sheet_name = self._get_sheet_name(sheet_name)
         assert sheet_name is not None
 
diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index 218a592c22b4a..5e8ff2826c6f1 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -37,6 +37,8 @@
         WriteExcelBuffer,
     )
 
+    from pandas.core.frame import DataFrame
+
 
 class OpenpyxlWriter(ExcelWriter):
     _engine = "openpyxl"
@@ -447,7 +449,10 @@ def _write_cells(
         startrow: int = 0,
         startcol: int = 0,
         freeze_panes: tuple[int, int] | None = None,
+        notes: DataFrame | None = None,
     ) -> None:
+        from openpyxl.comments import Comment
+
         # Write the frame cells using openpyxl.
         sheet_name = self._get_sheet_name(sheet_name)
 
@@ -484,6 +489,10 @@ def _write_cells(
                 row=freeze_panes[0] + 1, column=freeze_panes[1] + 1
             )
 
+        notes_col = None
+        if notes is not None and cells is not None:
+            notes_col = startcol + next(cells).col + 1
+
         for cell in cells:
             xcell = wks.cell(
                 row=startrow + cell.row + 1, column=startcol + cell.col + 1
@@ -530,6 +539,20 @@ def _write_cells(
                             for k, v in style_kwargs.items():
                                 setattr(xcell, k, v)
 
+        if notes is None or notes_col is None:
+            return
+
+        for row_idx, val in enumerate(notes.itertuples(index=False)):
+            for col_idx, note in enumerate(val):
+                xcell = wks.cell(
+                    # first row has columns and openpyxl starts counting at 1, not 0
+                    row=row_idx + 2,
+                    column=col_idx + notes_col,  # n columns with indexes
+                )
+                if note:
+                    comment = Comment(str(note), "")
+                    xcell.comment = comment
+
 
 class OpenpyxlReader(BaseExcelReader["Workbook"]):
     @doc(storage_options=_shared_docs["storage_options"])
diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py
index b2fd24a670300..eeb318eadda5c 100644
--- a/pandas/io/excel/_xlsxwriter.py
+++ b/pandas/io/excel/_xlsxwriter.py
@@ -20,6 +20,8 @@
         WriteExcelBuffer,
     )
 
+    from pandas.core.frame import DataFrame
+
 
 class _XlsxStyler:
     # Map from openpyxl-oriented styles to flatter xlsxwriter representation
@@ -245,6 +247,7 @@ def _write_cells(
         startrow: int = 0,
         startcol: int = 0,
         freeze_panes: tuple[int, int] | None = None,
+        notes: DataFrame | None = None,
     ) -> None:
         # Write the frame cells using xlsxwriter.
         sheet_name = self._get_sheet_name(sheet_name)
@@ -258,6 +261,10 @@ def _write_cells(
         if validate_freeze_panes(freeze_panes):
             wks.freeze_panes(*(freeze_panes))
 
+        notes_col = None
+        if notes is not None and cells is not None:
+            notes_col = startcol + next(cells).col
+
         for cell in cells:
             val, fmt = self._value_with_fmt(cell.val)
 
@@ -282,3 +289,16 @@ def _write_cells(
                 )
             else:
                 wks.write(startrow + cell.row, startcol + cell.col, val, style)
+
+        if notes is None or notes_col is None:
+            return
+
+        for row_idx, row in enumerate(notes.itertuples(index=False)):
+            for col_idx, note in enumerate(row):
+                if note == "":
+                    continue
+                wks.write_comment(
+                    row_idx + 1,  # first row has columns
+                    col_idx + notes_col,  # n columns with indexes
+                    str(note),
+                )
diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py
index 52b5755558900..894e506b40961 100644
--- a/pandas/io/formats/excel.py
+++ b/pandas/io/formats/excel.py
@@ -557,7 +557,10 @@ def __init__(
     ) -> None:
         self.rowcounter = 0
         self.na_rep = na_rep
+        self.notes = None
         if not isinstance(df, DataFrame):
+            if df.tooltips is not None:
+                self.notes = df.tooltips.tt_data
             self.styler = df
             self.styler._compute()  # calculate applied styles
             df = df.data
@@ -954,6 +957,7 @@ def write(
                 startrow=startrow,
                 startcol=startcol,
                 freeze_panes=freeze_panes,
+                notes=self.notes,
             )
         finally:
             # make sure to close opened file handles
diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
index 6f4c2fa6c6eae..7cec2e0af1d28 100644
--- a/pandas/io/formats/style.py
+++ b/pandas/io/formats/style.py
@@ -83,6 +83,7 @@
 
     from pandas import ExcelWriter
 
+import textwrap
 
 ####
 # Shared Doc Strings
@@ -538,6 +539,19 @@ def set_tooltips(
         storage_options=_shared_docs["storage_options"],
         storage_options_versionadded="1.5.0",
         extra_parameters="",
+        extra_examples=textwrap.dedent(
+            """\
+            If you wish to write excel notes to the workbook, you can do so by
+            passing a DataFrame to ``set_tooltips``. This process is independent
+            from writing data to the workbook, therefore both DataFrames can have
+            different dimensions.
+
+            >>> notes = pd.DataFrame(
+            ...     [["cell 1", "cell 2"], ["cell 3", "cell 4"]],
+            ... )  # doctest: +SKIP
+            >>> df1.style.set_tooltips(notes).to_excel("output.xlsx")  # doctest: +SKIP
+            """
+        ),
     )
     def to_excel(
         self,
diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py
index 482b331332462..cea2604af747f 100644
--- a/pandas/tests/io/excel/test_writers.py
+++ b/pandas/tests/io/excel/test_writers.py
@@ -41,6 +41,36 @@ def get_exp_unit(path: str) -> str:
     return "us"
 
 
+def read_notes(tmp_excel) -> DataFrame:
+    from openpyxl import load_workbook
+
+    workbook = load_workbook(tmp_excel)
+    sheet = workbook["Sheet1"]
+    data_notes = []
+
+    for row in sheet.rows:
+        row_notes = [cell.comment.content if cell.comment else "" for cell in row]
+        data_notes.append(row_notes)
+
+    # trimming trailing empty rows and columns
+    while data_notes and all(cell == "" for cell in data_notes[0]):
+        data_notes.pop(0)
+
+    while data_notes and all(cell == "" for cell in data_notes[-1]):
+        data_notes.pop()
+
+    while data_notes and all(cell == "" for cell in [row[0] for row in data_notes]):
+        for row in data_notes:
+            row.pop(0)
+
+    while data_notes and all(cell == "" for cell in [row[-1] for row in data_notes]):
+        for row in data_notes:
+            row.pop()
+
+    notes_df = DataFrame(data_notes)
+    return notes_df
+
+
 @pytest.fixture
 def frame(float_frame):
     """
@@ -333,6 +363,146 @@ def test_multiindex_interval_datetimes(self, tmp_excel):
         )
         tm.assert_frame_equal(result, expected)
 
+    def test_write_with_notes(self, tmp_excel, ext):
+        if ext in ["xlsm", "xlsx"]:
+            expected = DataFrame(
+                [
+                    ["note 1", "note 2", "note 3"],
+                    ["note 1", "note 2", "note 3"],
+                    ["note 1", "note 2", "note 3"],
+                ]
+            )
+            df = DataFrame(
+                [
+                    [1, 100, 200],
+                    [2, 200, 300],
+                    [3, 300, 400],
+                ]
+            )
+            df.style.set_tooltips(expected).to_excel(tmp_excel)
+            result = read_notes(tmp_excel)
+            tm.assert_frame_equal(result, expected)
+
+    def test_read_write_with_notes_trim_rows(self, tmp_excel, ext):
+        if ext in ["xlsm", "xlsx"]:
+            expected = DataFrame(
+                [
+                    ["", "", ""],
+                    ["note 1", "note 2", "note3"],
+                    ["", "", ""],
+                ]
+            )
+            df = DataFrame(
+                [
+                    [1, 100, 200],
+                    [2, 200, 300],
+                    [3, 300, 400],
+                ]
+            )
+            df.style.set_tooltips(expected).to_excel(tmp_excel)
+            result = read_notes(tmp_excel)
+            tm.assert_frame_equal(result, expected)
+
+    def test_read_write_with_notes_trim_columns(self, tmp_excel, ext):
+        if ext in ["xlsm", "xlsx"]:
+            expected = DataFrame(
+                [
+                    ["", "note 2", ""],
+                    ["", "note 2", ""],
+                    ["", "note 2", ""],
+                ]
+            )
+            df = DataFrame(
+                [
+                    [1, 100, 200],
+                    [2, 200, 300],
+                    [3, 300, 400],
+                ]
+            )
+            df.style.set_tooltips(expected).to_excel(tmp_excel)
+            result = read_notes(tmp_excel)
+            tm.assert_frame_equal(result, expected)
+
+    def test_read_write_with_notes_trim_rows_and_columns(self, tmp_excel, ext):
+        if ext in ["xlsm", "xlsx"]:
+            expected = DataFrame(
+                [
+                    ["", "", ""],
+                    ["", "note 2", ""],
+                    ["", "", ""],
+                ]
+            )
+            df = DataFrame(
+                [
+                    [1, 100, 200],
+                    [2, 200, 300],
+                    [3, 300, 400],
+                ]
+            )
+            df.style.set_tooltips(expected).to_excel(tmp_excel)
+            result = read_notes(tmp_excel)
+            tm.assert_frame_equal(result, expected)
+
+    def test_read_write_with_notes_empty_comments_no_trim(self, tmp_excel, ext):
+        if ext in ["xlsm", "xlsx"]:
+            expected = DataFrame(
+                [
+                    ["note 1", "", ""],
+                    ["", "", ""],
+                    ["", "", "note 3"],
+                ]
+            )
+            df = DataFrame(
+                [
+                    [1, 100, 200],
+                    [2, 200, 300],
+                    [3, 300, 400],
+                ]
+            )
+            df.style.set_tooltips(expected).to_excel(tmp_excel)
+            result = read_notes(tmp_excel)
+            tm.assert_frame_equal(result, expected)
+
+    def test_read_write_with_notes_smaller_dimensions(self, tmp_excel, ext):
+        if ext in ["xlsm", "xlsx"]:
+            expected = DataFrame(
+                [
+                    ["note 1", "note 2"],
+                    ["note 1", "note 2"],
+                ]
+            )
+            df = DataFrame(
+                [
+                    [1, 100, 200],
+                    [2, 200, 300],
+                    [3, 300, 400],
+                ]
+            )
+            df.style.set_tooltips(expected).to_excel(tmp_excel)
+            result = read_notes(tmp_excel)
+            tm.assert_frame_equal(result, expected)
+
+    def test_read_write_with_notes_bigger_dimensions(self, tmp_excel, ext):
+        if ext in ["xlsm", "xlsx"]:
+            expected = DataFrame(
+                [
+                    ["note 1", "note 2", "note 3", "note 4"],
+                    ["note 1", "note 2", "note 3", "note 4"],
+                    ["note 1", "note 2", "note 3", "note 4"],
+                    ["note 1", "note 2", "note 3", "note 4"],
+                ]
+            )
+            df = DataFrame(
+                [
+                    [1, 100, 200],
+                    [2, 200, 300],
+                    [3, 300, 400],
+                ]
+            )
+            df.style.set_tooltips(expected).to_excel(tmp_excel)
+            result = read_notes(tmp_excel)
+            tm.assert_frame_equal(result, expected)
+
 
 @pytest.mark.parametrize(
     "engine,ext",