Skip to content

Commit bc7b01f

Browse files
CLN: Simplify _xlrd engine method to get notes
Co-authored-by: diogomsmiranda <[email protected]>
1 parent 780c3d1 commit bc7b01f

File tree

2 files changed

+11
-16
lines changed

2 files changed

+11
-16
lines changed

pandas/io/excel/_base.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@
280280
281281
engine_kwargs : dict, optional
282282
Arbitrary keyword arguments passed to excel engine.
283-
283+
284284
notes: DataFrame, default None
285285
A DataFrame to hold the notes extracted from the Excel file.
286286
@@ -358,12 +358,12 @@
358358
2 None NaN
359359
360360
To get the comments of the excel input file, pass a ``notes`` DataFrame.
361-
This new DataFrame might have different dimensions than the data returned
362-
DataFrame since it'll only read the columns which cells have notes.
363-
(last column with note - first column with note + 1) *
361+
This DataFrame might have different dimensions than the returned DataFrame
362+
by ``read_excel`` since it'll only read the cells with notes. This DataFrame
363+
dimensions will be: (last column with note - first column with note + 1) *
364364
(last row with note - first row with note + 1).
365365
366-
Cells with no notes will have an empty string ("").
366+
Cells with no notes inside these limits will have an empty string ("").
367367
368368
If the data in the ``tmp.xlsx`` file was written using the
369369
``set_tooltips(notes)`` method of ``Styler.to_excel``, like in
@@ -377,7 +377,7 @@
377377
378378
>>> df_notes = pd.DataFrame() # doctest: +SKIP
379379
380-
>>> pd.read_excel('tmp.xlsx', df_notes) # doctest: +SKIP
380+
>>> pd.read_excel('tmp.xlsx', notes=df_notes) # doctest: +SKIP
381381
Name Value
382382
0 string1 1
383383
1 string2 2
@@ -625,7 +625,9 @@ def get_sheet_by_name(self, name: str):
625625
def get_sheet_by_index(self, index: int):
626626
raise NotImplementedError
627627

628-
def get_sheet_data(self, sheet, rows: int | None = None, notes: DataFrame | None = None):
628+
def get_sheet_data(
629+
self, sheet, rows: int | None = None, notes: DataFrame | None = None
630+
):
629631
raise NotImplementedError
630632

631633
def raise_if_bad_sheet_by_index(self, index: int) -> None:

pandas/io/excel/_xlrd.py

+2-9
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def _parse_cell(cell_contents, cell_typ):
135135
nrows = sheet.nrows
136136
if file_rows_needed is not None:
137137
nrows = min(nrows, file_rows_needed)
138-
138+
139139
if notes is not None:
140140
notes_locations = dict(sheet.cell_note_map.items())
141141

@@ -144,10 +144,6 @@ def _parse_cell(cell_contents, cell_typ):
144144
min_x = min(location[1] for location in notes_locations.keys())
145145
max_x = max(location[1] for location in notes_locations.keys())
146146

147-
# Create column headers
148-
columns = [str(i) for i in range(min_x, max_x + 1)]
149-
150-
# Create empty rows
151147
data_notes = []
152148
for y in range(min_y, max_y + 1):
153149
row = []
@@ -158,10 +154,8 @@ def _parse_cell(cell_contents, cell_typ):
158154
row.append("")
159155
data_notes.append(row)
160156

161-
# Convert data_notes to DataFrame and set columns
162-
notes_df = DataFrame(data_notes, columns=columns)
157+
notes_df = DataFrame(data_notes)
163158

164-
# Update the notes DataFrame with the new data
165159
for col in notes_df.columns:
166160
notes[col] = notes_df[col]
167161

@@ -173,4 +167,3 @@ def _parse_cell(cell_contents, cell_typ):
173167
data.append(row)
174168

175169
return data
176-

0 commit comments

Comments
 (0)