Skip to content

Commit 948deac

Browse files
ENH: Add support to read notes from excel (openpyxl engine) (pandas-dev#58070)
Co-authored-by: Dacops <[email protected]>
1 parent 5b9d700 commit 948deac

File tree

2 files changed

+39
-1
lines changed

2 files changed

+39
-1
lines changed

pandas/io/excel/_base.py

+4
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,10 @@ def read_excel(
510510
if engine_kwargs is None:
511511
engine_kwargs = {}
512512

513+
# set to false so cells have a comment attribute
514+
if notes is not None and engine == "openpyxl":
515+
engine_kwargs = {"read_only": False}
516+
513517
if not isinstance(io, ExcelFile):
514518
should_close = True
515519
io = ExcelFile(

pandas/io/excel/_openpyxl.py

+35-1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
WriteExcelBuffer,
3838
)
3939

40+
from pandas.core.frame import DataFrame
41+
4042

4143
class OpenpyxlWriter(ExcelWriter):
4244
_engine = "openpyxl"
@@ -626,11 +628,43 @@ def _convert_cell(self, cell) -> Scalar:
626628
return cell.value
627629

628630
def get_sheet_data(
629-
self, sheet, file_rows_needed: int | None = None
631+
self, sheet, file_rows_needed: int | None = None, notes: DataFrame | None = None
630632
) -> list[list[Scalar]]:
631633
if self.book.read_only:
632634
sheet.reset_dimensions()
633635

636+
if notes is not None:
637+
data_notes = []
638+
for row in sheet.rows:
639+
row_notes = [
640+
cell.comment.content if cell.comment else "" for cell in row
641+
]
642+
data_notes.append(row_notes)
643+
644+
# trimming trailing empty rows and columns
645+
while data_notes and all(cell == "" for cell in data_notes[0]):
646+
data_notes.pop(0)
647+
648+
while data_notes and all(cell == "" for cell in data_notes[-1]):
649+
data_notes.pop()
650+
651+
while data_notes and all(
652+
cell == "" for cell in [row[0] for row in data_notes]
653+
):
654+
for row in data_notes:
655+
row.pop(0)
656+
657+
while data_notes and all(
658+
cell == "" for cell in [row[-1] for row in data_notes]
659+
):
660+
for row in data_notes:
661+
row.pop()
662+
663+
notes_df = DataFrame(data_notes)
664+
665+
for col in notes_df.columns:
666+
notes[col] = notes_df[col]
667+
634668
data: list[list[Scalar]] = []
635669
last_row_with_data = -1
636670
for row_number, row in enumerate(sheet.rows):

0 commit comments

Comments
 (0)