From b45f14afd179914f4433212f358ff376f59b7db7 Mon Sep 17 00:00:00 2001 From: David Pires Date: Sat, 8 Jun 2024 16:03:11 +0000 Subject: [PATCH] ENH: Write excel comments, via styler.to_excel() tooltips (#58070) Co-Authored-By: diogomsmiranda --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/generic.py | 9 +- pandas/io/excel/_base.py | 3 + pandas/io/excel/_odswriter.py | 11 ++ pandas/io/excel/_openpyxl.py | 23 ++++ pandas/io/excel/_xlsxwriter.py | 20 +++ pandas/io/formats/excel.py | 4 + pandas/io/formats/style.py | 14 +++ pandas/tests/io/excel/test_writers.py | 170 ++++++++++++++++++++++++++ 9 files changed, 252 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 639655ab28199..df37e7d1c5d47 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -40,6 +40,7 @@ Other enhancements - :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`) - :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`) - :func:`DataFrame.to_excel` argument ``merge_cells`` now accepts a value of ``"columns"`` to only merge :class:`MultiIndex` column header header cells (:issue:`35384`) +- :func:`DataFrame.to_excel` now supports writing notes to an excel files via :meth:`Styler.set_tooltips` (:issue:`58070`) - :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`) - :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`) - :meth:`DataFrame.ewm` now allows ``adjust=False`` when ``times`` is provided (:issue:`54328`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fc9821a65777d..fd4d153874da0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2126,10 +2126,11 @@ def _repr_data_resource_(self): storage_options_versionadded="1.2.0", extra_parameters=textwrap.dedent( """\ - engine_kwargs : dict, optional - Arbitrary keyword arguments passed to excel engine. - """ + engine_kwargs : dict, optional + Arbitrary keyword arguments passed to excel engine. + """ ), + extra_examples="", ) def to_excel( self, @@ -2261,6 +2262,8 @@ def to_excel( automatically chosen depending on the file extension): >>> df1.to_excel("output1.xlsx", engine="xlsxwriter") # doctest: +SKIP + {extra_examples} + End of examples. """ if engine_kwargs is None: engine_kwargs = {} diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index f83f9cb1c8d74..4ddd46dbea0e5 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1212,6 +1212,7 @@ def _write_cells( startrow: int = 0, startcol: int = 0, freeze_panes: tuple[int, int] | None = None, + notes: DataFrame | None = None, ) -> None: """ Write given formatted cells into Excel an excel sheet @@ -1220,6 +1221,8 @@ def _write_cells( ---------- cells : generator cell of formatted data to save to Excel sheet + notes: DataFrame + DataFrame containing notes to be written to the Excel sheet sheet_name : str, default None Name of Excel sheet, if None, then use self.cur_sheet startrow : upper left cell row to dump data frame diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 0ddb59d3413ff..c89a17b5e2431 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -27,6 +27,8 @@ WriteExcelBuffer, ) + from pandas.core.frame import DataFrame + from pandas.io.formats.excel import ExcelCell @@ -99,6 +101,7 @@ def _write_cells( startrow: int = 0, startcol: int = 0, freeze_panes: tuple[int, int] | None = None, + notes: DataFrame | None = None, ) -> None: """ Write the frame cells using odf @@ -110,6 +113,14 @@ def _write_cells( ) from odf.text import P + if notes is not None: + raise NotImplementedError( + """ + Notes are not supported by the odswriter engine, + see https://github.com/eea/odfpy + """ + ) + sheet_name = self._get_sheet_name(sheet_name) assert sheet_name is not None diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 218a592c22b4a..5e8ff2826c6f1 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -37,6 +37,8 @@ WriteExcelBuffer, ) + from pandas.core.frame import DataFrame + class OpenpyxlWriter(ExcelWriter): _engine = "openpyxl" @@ -447,7 +449,10 @@ def _write_cells( startrow: int = 0, startcol: int = 0, freeze_panes: tuple[int, int] | None = None, + notes: DataFrame | None = None, ) -> None: + from openpyxl.comments import Comment + # Write the frame cells using openpyxl. sheet_name = self._get_sheet_name(sheet_name) @@ -484,6 +489,10 @@ def _write_cells( row=freeze_panes[0] + 1, column=freeze_panes[1] + 1 ) + notes_col = None + if notes is not None and cells is not None: + notes_col = startcol + next(cells).col + 1 + for cell in cells: xcell = wks.cell( row=startrow + cell.row + 1, column=startcol + cell.col + 1 @@ -530,6 +539,20 @@ def _write_cells( for k, v in style_kwargs.items(): setattr(xcell, k, v) + if notes is None or notes_col is None: + return + + for row_idx, val in enumerate(notes.itertuples(index=False)): + for col_idx, note in enumerate(val): + xcell = wks.cell( + # first row has columns and openpyxl starts counting at 1, not 0 + row=row_idx + 2, + column=col_idx + notes_col, # n columns with indexes + ) + if note: + comment = Comment(str(note), "") + xcell.comment = comment + class OpenpyxlReader(BaseExcelReader["Workbook"]): @doc(storage_options=_shared_docs["storage_options"]) diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py index b2fd24a670300..eeb318eadda5c 100644 --- a/pandas/io/excel/_xlsxwriter.py +++ b/pandas/io/excel/_xlsxwriter.py @@ -20,6 +20,8 @@ WriteExcelBuffer, ) + from pandas.core.frame import DataFrame + class _XlsxStyler: # Map from openpyxl-oriented styles to flatter xlsxwriter representation @@ -245,6 +247,7 @@ def _write_cells( startrow: int = 0, startcol: int = 0, freeze_panes: tuple[int, int] | None = None, + notes: DataFrame | None = None, ) -> None: # Write the frame cells using xlsxwriter. sheet_name = self._get_sheet_name(sheet_name) @@ -258,6 +261,10 @@ def _write_cells( if validate_freeze_panes(freeze_panes): wks.freeze_panes(*(freeze_panes)) + notes_col = None + if notes is not None and cells is not None: + notes_col = startcol + next(cells).col + for cell in cells: val, fmt = self._value_with_fmt(cell.val) @@ -282,3 +289,16 @@ def _write_cells( ) else: wks.write(startrow + cell.row, startcol + cell.col, val, style) + + if notes is None or notes_col is None: + return + + for row_idx, row in enumerate(notes.itertuples(index=False)): + for col_idx, note in enumerate(row): + if note == "": + continue + wks.write_comment( + row_idx + 1, # first row has columns + col_idx + notes_col, # n columns with indexes + str(note), + ) diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 52b5755558900..894e506b40961 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -557,7 +557,10 @@ def __init__( ) -> None: self.rowcounter = 0 self.na_rep = na_rep + self.notes = None if not isinstance(df, DataFrame): + if df.tooltips is not None: + self.notes = df.tooltips.tt_data self.styler = df self.styler._compute() # calculate applied styles df = df.data @@ -954,6 +957,7 @@ def write( startrow=startrow, startcol=startcol, freeze_panes=freeze_panes, + notes=self.notes, ) finally: # make sure to close opened file handles diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 6f4c2fa6c6eae..7cec2e0af1d28 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -83,6 +83,7 @@ from pandas import ExcelWriter +import textwrap #### # Shared Doc Strings @@ -538,6 +539,19 @@ def set_tooltips( storage_options=_shared_docs["storage_options"], storage_options_versionadded="1.5.0", extra_parameters="", + extra_examples=textwrap.dedent( + """\ + If you wish to write excel notes to the workbook, you can do so by + passing a DataFrame to ``set_tooltips``. This process is independent + from writing data to the workbook, therefore both DataFrames can have + different dimensions. + + >>> notes = pd.DataFrame( + ... [["cell 1", "cell 2"], ["cell 3", "cell 4"]], + ... ) # doctest: +SKIP + >>> df1.style.set_tooltips(notes).to_excel("output.xlsx") # doctest: +SKIP + """ + ), ) def to_excel( self, diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 482b331332462..cea2604af747f 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -41,6 +41,36 @@ def get_exp_unit(path: str) -> str: return "us" +def read_notes(tmp_excel) -> DataFrame: + from openpyxl import load_workbook + + workbook = load_workbook(tmp_excel) + sheet = workbook["Sheet1"] + data_notes = [] + + for row in sheet.rows: + row_notes = [cell.comment.content if cell.comment else "" for cell in row] + data_notes.append(row_notes) + + # trimming trailing empty rows and columns + while data_notes and all(cell == "" for cell in data_notes[0]): + data_notes.pop(0) + + while data_notes and all(cell == "" for cell in data_notes[-1]): + data_notes.pop() + + while data_notes and all(cell == "" for cell in [row[0] for row in data_notes]): + for row in data_notes: + row.pop(0) + + while data_notes and all(cell == "" for cell in [row[-1] for row in data_notes]): + for row in data_notes: + row.pop() + + notes_df = DataFrame(data_notes) + return notes_df + + @pytest.fixture def frame(float_frame): """ @@ -333,6 +363,146 @@ def test_multiindex_interval_datetimes(self, tmp_excel): ) tm.assert_frame_equal(result, expected) + def test_write_with_notes(self, tmp_excel, ext): + if ext in ["xlsm", "xlsx"]: + expected = DataFrame( + [ + ["note 1", "note 2", "note 3"], + ["note 1", "note 2", "note 3"], + ["note 1", "note 2", "note 3"], + ] + ) + df = DataFrame( + [ + [1, 100, 200], + [2, 200, 300], + [3, 300, 400], + ] + ) + df.style.set_tooltips(expected).to_excel(tmp_excel) + result = read_notes(tmp_excel) + tm.assert_frame_equal(result, expected) + + def test_read_write_with_notes_trim_rows(self, tmp_excel, ext): + if ext in ["xlsm", "xlsx"]: + expected = DataFrame( + [ + ["", "", ""], + ["note 1", "note 2", "note3"], + ["", "", ""], + ] + ) + df = DataFrame( + [ + [1, 100, 200], + [2, 200, 300], + [3, 300, 400], + ] + ) + df.style.set_tooltips(expected).to_excel(tmp_excel) + result = read_notes(tmp_excel) + tm.assert_frame_equal(result, expected) + + def test_read_write_with_notes_trim_columns(self, tmp_excel, ext): + if ext in ["xlsm", "xlsx"]: + expected = DataFrame( + [ + ["", "note 2", ""], + ["", "note 2", ""], + ["", "note 2", ""], + ] + ) + df = DataFrame( + [ + [1, 100, 200], + [2, 200, 300], + [3, 300, 400], + ] + ) + df.style.set_tooltips(expected).to_excel(tmp_excel) + result = read_notes(tmp_excel) + tm.assert_frame_equal(result, expected) + + def test_read_write_with_notes_trim_rows_and_columns(self, tmp_excel, ext): + if ext in ["xlsm", "xlsx"]: + expected = DataFrame( + [ + ["", "", ""], + ["", "note 2", ""], + ["", "", ""], + ] + ) + df = DataFrame( + [ + [1, 100, 200], + [2, 200, 300], + [3, 300, 400], + ] + ) + df.style.set_tooltips(expected).to_excel(tmp_excel) + result = read_notes(tmp_excel) + tm.assert_frame_equal(result, expected) + + def test_read_write_with_notes_empty_comments_no_trim(self, tmp_excel, ext): + if ext in ["xlsm", "xlsx"]: + expected = DataFrame( + [ + ["note 1", "", ""], + ["", "", ""], + ["", "", "note 3"], + ] + ) + df = DataFrame( + [ + [1, 100, 200], + [2, 200, 300], + [3, 300, 400], + ] + ) + df.style.set_tooltips(expected).to_excel(tmp_excel) + result = read_notes(tmp_excel) + tm.assert_frame_equal(result, expected) + + def test_read_write_with_notes_smaller_dimensions(self, tmp_excel, ext): + if ext in ["xlsm", "xlsx"]: + expected = DataFrame( + [ + ["note 1", "note 2"], + ["note 1", "note 2"], + ] + ) + df = DataFrame( + [ + [1, 100, 200], + [2, 200, 300], + [3, 300, 400], + ] + ) + df.style.set_tooltips(expected).to_excel(tmp_excel) + result = read_notes(tmp_excel) + tm.assert_frame_equal(result, expected) + + def test_read_write_with_notes_bigger_dimensions(self, tmp_excel, ext): + if ext in ["xlsm", "xlsx"]: + expected = DataFrame( + [ + ["note 1", "note 2", "note 3", "note 4"], + ["note 1", "note 2", "note 3", "note 4"], + ["note 1", "note 2", "note 3", "note 4"], + ["note 1", "note 2", "note 3", "note 4"], + ] + ) + df = DataFrame( + [ + [1, 100, 200], + [2, 200, 300], + [3, 300, 400], + ] + ) + df.style.set_tooltips(expected).to_excel(tmp_excel) + result = read_notes(tmp_excel) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( "engine,ext",