Skip to content

Support writing/reading notes to/from excel files #58831

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ Other enhancements
- :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
- :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`)
- :func:`DataFrame.to_excel` argument ``merge_cells`` now accepts a value of ``"columns"`` to only merge :class:`MultiIndex` column header header cells (:issue:`35384`)
- :func:`DataFrame.to_excel` now supports writing notes to an excel files via :meth:`Styler.set_tooltips` (:issue:`58070`)
- :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`)
- :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
- :meth:`DataFrame.ewm` now allows ``adjust=False`` when ``times`` is provided (:issue:`54328`)
Expand Down
9 changes: 6 additions & 3 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2126,10 +2126,11 @@ def _repr_data_resource_(self):
storage_options_versionadded="1.2.0",
extra_parameters=textwrap.dedent(
"""\
engine_kwargs : dict, optional
Arbitrary keyword arguments passed to excel engine.
"""
engine_kwargs : dict, optional
Arbitrary keyword arguments passed to excel engine.
"""
),
extra_examples="",
)
def to_excel(
self,
Expand Down Expand Up @@ -2261,6 +2262,8 @@ def to_excel(
automatically chosen depending on the file extension):

>>> df1.to_excel("output1.xlsx", engine="xlsxwriter") # doctest: +SKIP
{extra_examples}
End of examples.
"""
if engine_kwargs is None:
engine_kwargs = {}
Expand Down
3 changes: 3 additions & 0 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1212,6 +1212,7 @@ def _write_cells(
startrow: int = 0,
startcol: int = 0,
freeze_panes: tuple[int, int] | None = None,
notes: DataFrame | None = None,
) -> None:
"""
Write given formatted cells into Excel an excel sheet
Expand All @@ -1220,6 +1221,8 @@ def _write_cells(
----------
cells : generator
cell of formatted data to save to Excel sheet
notes: DataFrame
DataFrame containing notes to be written to the Excel sheet
sheet_name : str, default None
Name of Excel sheet, if None, then use self.cur_sheet
startrow : upper left cell row to dump data frame
Expand Down
11 changes: 11 additions & 0 deletions pandas/io/excel/_odswriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
WriteExcelBuffer,
)

from pandas.core.frame import DataFrame

from pandas.io.formats.excel import ExcelCell


Expand Down Expand Up @@ -99,6 +101,7 @@ def _write_cells(
startrow: int = 0,
startcol: int = 0,
freeze_panes: tuple[int, int] | None = None,
notes: DataFrame | None = None,
) -> None:
"""
Write the frame cells using odf
Expand All @@ -110,6 +113,14 @@ def _write_cells(
)
from odf.text import P

if notes is not None:
raise NotImplementedError(
"""
Notes are not supported by the odswriter engine,
see https://github.com/eea/odfpy
"""
Comment on lines +118 to +121
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be a regular string, "Notes are not...", not a triple quoted string.

)

sheet_name = self._get_sheet_name(sheet_name)
assert sheet_name is not None

Expand Down
23 changes: 23 additions & 0 deletions pandas/io/excel/_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
WriteExcelBuffer,
)

from pandas.core.frame import DataFrame


class OpenpyxlWriter(ExcelWriter):
_engine = "openpyxl"
Expand Down Expand Up @@ -447,7 +449,10 @@ def _write_cells(
startrow: int = 0,
startcol: int = 0,
freeze_panes: tuple[int, int] | None = None,
notes: DataFrame | None = None,
) -> None:
from openpyxl.comments import Comment

# Write the frame cells using openpyxl.
sheet_name = self._get_sheet_name(sheet_name)

Expand Down Expand Up @@ -484,6 +489,10 @@ def _write_cells(
row=freeze_panes[0] + 1, column=freeze_panes[1] + 1
)

notes_col = None
if notes is not None and cells is not None:
notes_col = startcol + next(cells).col + 1

for cell in cells:
xcell = wks.cell(
row=startrow + cell.row + 1, column=startcol + cell.col + 1
Expand Down Expand Up @@ -530,6 +539,20 @@ def _write_cells(
for k, v in style_kwargs.items():
setattr(xcell, k, v)

if notes is None or notes_col is None:
return

for row_idx, val in enumerate(notes.itertuples(index=False)):
for col_idx, note in enumerate(val):
xcell = wks.cell(
# first row has columns and openpyxl starts counting at 1, not 0
row=row_idx + 2,
column=col_idx + notes_col, # n columns with indexes
)
if note:
comment = Comment(str(note), "")
xcell.comment = comment


class OpenpyxlReader(BaseExcelReader["Workbook"]):
@doc(storage_options=_shared_docs["storage_options"])
Expand Down
20 changes: 20 additions & 0 deletions pandas/io/excel/_xlsxwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
WriteExcelBuffer,
)

from pandas.core.frame import DataFrame


class _XlsxStyler:
# Map from openpyxl-oriented styles to flatter xlsxwriter representation
Expand Down Expand Up @@ -245,6 +247,7 @@ def _write_cells(
startrow: int = 0,
startcol: int = 0,
freeze_panes: tuple[int, int] | None = None,
notes: DataFrame | None = None,
) -> None:
# Write the frame cells using xlsxwriter.
sheet_name = self._get_sheet_name(sheet_name)
Expand All @@ -258,6 +261,10 @@ def _write_cells(
if validate_freeze_panes(freeze_panes):
wks.freeze_panes(*(freeze_panes))

notes_col = None
if notes is not None and cells is not None:
notes_col = startcol + next(cells).col

for cell in cells:
val, fmt = self._value_with_fmt(cell.val)

Expand All @@ -282,3 +289,16 @@ def _write_cells(
)
else:
wks.write(startrow + cell.row, startcol + cell.col, val, style)

if notes is None or notes_col is None:
return

for row_idx, row in enumerate(notes.itertuples(index=False)):
for col_idx, note in enumerate(row):
if note == "":
continue
wks.write_comment(
row_idx + 1, # first row has columns
col_idx + notes_col, # n columns with indexes
str(note),
)
4 changes: 4 additions & 0 deletions pandas/io/formats/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,10 @@ def __init__(
) -> None:
self.rowcounter = 0
self.na_rep = na_rep
self.notes = None
if not isinstance(df, DataFrame):
if df.tooltips is not None:
self.notes = df.tooltips.tt_data
self.styler = df
self.styler._compute() # calculate applied styles
df = df.data
Expand Down Expand Up @@ -954,6 +957,7 @@ def write(
startrow=startrow,
startcol=startcol,
freeze_panes=freeze_panes,
notes=self.notes,
)
finally:
# make sure to close opened file handles
Expand Down
14 changes: 14 additions & 0 deletions pandas/io/formats/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@

from pandas import ExcelWriter

import textwrap

####
# Shared Doc Strings
Expand Down Expand Up @@ -538,6 +539,19 @@ def set_tooltips(
storage_options=_shared_docs["storage_options"],
storage_options_versionadded="1.5.0",
extra_parameters="",
extra_examples=textwrap.dedent(
"""\
If you wish to write excel notes to the workbook, you can do so by
passing a DataFrame to ``set_tooltips``. This process is independent
from writing data to the workbook, therefore both DataFrames can have
different dimensions.

>>> notes = pd.DataFrame(
... [["cell 1", "cell 2"], ["cell 3", "cell 4"]],
... ) # doctest: +SKIP
>>> df1.style.set_tooltips(notes).to_excel("output.xlsx") # doctest: +SKIP
"""
),
)
def to_excel(
self,
Expand Down
Loading