CLN: Simplify _xlrd engine method to get notes (pandas-dev#58070)

Dacops · diogomsmiranda · Dacops · commit f6d63877309a · 2024-05-25T01:04:09.000Z
Co-authored-by: diogomsmiranda &lt;diogomsmiranda@tecnico.ulisboa.pt&gt;
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
@@ -280,7 +280,7 @@
 
 engine_kwargs : dict, optional
     Arbitrary keyword arguments passed to excel engine.
-    
+
 notes: DataFrame, default None
     A DataFrame to hold the notes extracted from the Excel file.
 
@@ -358,12 +358,12 @@
 2     None    NaN
 
 To get the comments of the excel input file, pass a ``notes`` DataFrame.
-This new DataFrame might have different dimensions than the data returned
-DataFrame since it'll only read the columns which cells have notes.
-(last column with note - first column with note + 1) *
+This DataFrame might have different dimensions than the returned DataFrame
+by ``read_excel`` since it'll only read the cells with notes. This DataFrame
+dimensions will be: (last column with note - first column with note + 1) *
 (last row with note - first row with note + 1).
 
-Cells with no notes will have an empty string ("").
+Cells with no notes inside these limits will have an empty string ("").
 
 If the data in the ``tmp.xlsx`` file was written using the
 ``set_tooltips(notes)`` method of ``Styler.to_excel``, like in
@@ -377,7 +377,7 @@
 
 >>> df_notes = pd.DataFrame()  # doctest: +SKIP
 
->>> pd.read_excel('tmp.xlsx', df_notes)  # doctest: +SKIP
+>>> pd.read_excel('tmp.xlsx', notes=df_notes)  # doctest: +SKIP
        Name   Value
 0   string1       1
 1   string2       2
@@ -625,7 +625,9 @@ def get_sheet_by_name(self, name: str):
     def get_sheet_by_index(self, index: int):
         raise NotImplementedError
 
-    def get_sheet_data(self, sheet, rows: int | None = None, notes: DataFrame | None = None):
+    def get_sheet_data(
+        self, sheet, rows: int | None = None, notes: DataFrame | None = None
+    ):
         raise NotImplementedError
 
     def raise_if_bad_sheet_by_index(self, index: int) -> None:
diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
@@ -133,7 +133,7 @@ def _parse_cell(cell_contents, cell_typ):
         nrows = sheet.nrows
         if file_rows_needed is not None:
             nrows = min(nrows, file_rows_needed)
-                
+
         if notes is not None:
             notes_locations = dict(sheet.cell_note_map.items())
 
@@ -142,10 +142,6 @@ def _parse_cell(cell_contents, cell_typ):
             min_x = min(location[1] for location in notes_locations.keys())
             max_x = max(location[1] for location in notes_locations.keys())
 
-            # Create column headers
-            columns = [str(i) for i in range(min_x, max_x + 1)]
-
-            # Create empty rows
             data_notes = []
             for y in range(min_y, max_y + 1):
                 row = []
@@ -156,10 +152,8 @@ def _parse_cell(cell_contents, cell_typ):
                         row.append("")
                 data_notes.append(row)
 
-            # Convert data_notes to DataFrame and set columns
-            notes_df = DataFrame(data_notes, columns=columns)
+            notes_df = DataFrame(data_notes)
 
-            # Update the notes DataFrame with the new data
             for col in notes_df.columns:
                 notes[col] = notes_df[col]
 
@@ -170,4 +164,3 @@ def _parse_cell(cell_contents, cell_typ):
             ]
             for i in range(nrows)
         ]
-