Skip to content

Commit f6d6387

Browse files
CLN: Simplify _xlrd engine method to get notes (pandas-dev#58070)
Co-authored-by: diogomsmiranda <[email protected]>
1 parent 5b7390b commit f6d6387

File tree

2 files changed

+11
-16
lines changed

2 files changed

+11
-16
lines changed

pandas/io/excel/_base.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@
280280
281281
engine_kwargs : dict, optional
282282
Arbitrary keyword arguments passed to excel engine.
283-
283+
284284
notes: DataFrame, default None
285285
A DataFrame to hold the notes extracted from the Excel file.
286286
@@ -358,12 +358,12 @@
358358
2 None NaN
359359
360360
To get the comments of the excel input file, pass a ``notes`` DataFrame.
361-
This new DataFrame might have different dimensions than the data returned
362-
DataFrame since it'll only read the columns which cells have notes.
363-
(last column with note - first column with note + 1) *
361+
This DataFrame might have different dimensions than the returned DataFrame
362+
by ``read_excel`` since it'll only read the cells with notes. This DataFrame
363+
dimensions will be: (last column with note - first column with note + 1) *
364364
(last row with note - first row with note + 1).
365365
366-
Cells with no notes will have an empty string ("").
366+
Cells with no notes inside these limits will have an empty string ("").
367367
368368
If the data in the ``tmp.xlsx`` file was written using the
369369
``set_tooltips(notes)`` method of ``Styler.to_excel``, like in
@@ -377,7 +377,7 @@
377377
378378
>>> df_notes = pd.DataFrame() # doctest: +SKIP
379379
380-
>>> pd.read_excel('tmp.xlsx', df_notes) # doctest: +SKIP
380+
>>> pd.read_excel('tmp.xlsx', notes=df_notes) # doctest: +SKIP
381381
Name Value
382382
0 string1 1
383383
1 string2 2
@@ -625,7 +625,9 @@ def get_sheet_by_name(self, name: str):
625625
def get_sheet_by_index(self, index: int):
626626
raise NotImplementedError
627627

628-
def get_sheet_data(self, sheet, rows: int | None = None, notes: DataFrame | None = None):
628+
def get_sheet_data(
629+
self, sheet, rows: int | None = None, notes: DataFrame | None = None
630+
):
629631
raise NotImplementedError
630632

631633
def raise_if_bad_sheet_by_index(self, index: int) -> None:

pandas/io/excel/_xlrd.py

+2-9
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ def _parse_cell(cell_contents, cell_typ):
133133
nrows = sheet.nrows
134134
if file_rows_needed is not None:
135135
nrows = min(nrows, file_rows_needed)
136-
136+
137137
if notes is not None:
138138
notes_locations = dict(sheet.cell_note_map.items())
139139

@@ -142,10 +142,6 @@ def _parse_cell(cell_contents, cell_typ):
142142
min_x = min(location[1] for location in notes_locations.keys())
143143
max_x = max(location[1] for location in notes_locations.keys())
144144

145-
# Create column headers
146-
columns = [str(i) for i in range(min_x, max_x + 1)]
147-
148-
# Create empty rows
149145
data_notes = []
150146
for y in range(min_y, max_y + 1):
151147
row = []
@@ -156,10 +152,8 @@ def _parse_cell(cell_contents, cell_typ):
156152
row.append("")
157153
data_notes.append(row)
158154

159-
# Convert data_notes to DataFrame and set columns
160-
notes_df = DataFrame(data_notes, columns=columns)
155+
notes_df = DataFrame(data_notes)
161156

162-
# Update the notes DataFrame with the new data
163157
for col in notes_df.columns:
164158
notes[col] = notes_df[col]
165159

@@ -170,4 +164,3 @@ def _parse_cell(cell_contents, cell_typ):
170164
]
171165
for i in range(nrows)
172166
]
173-

0 commit comments

Comments
 (0)