pandas-dev · jreback · May 6, 2021 · Apr 26, 2021 · Apr 29, 2021 · Apr 29, 2021
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -795,6 +795,7 @@ I/O
 - Bug in :func:`read_hdf` returning unexpected records when filtering on categorical string columns using ``where`` parameter (:issue:`39189`)
 - Bug in :func:`read_sas` raising ``ValueError`` when ``datetimes`` were null (:issue:`39725`)
 - Bug in :func:`read_excel` dropping empty values from single-column spreadsheets (:issue:`39808`)
+- Bug in :func:`read_excel` loading trailing empty rows/columns for some filetypes (:issue:`41167`)
 - Bug in :func:`read_excel` raising ``AttributeError`` with ``MultiIndex`` header followed by two empty rows and no index, and bug affecting :func:`read_excel`, :func:`read_csv`, :func:`read_table`, :func:`read_fwf`, and :func:`read_clipboard` where one blank row after a ``MultiIndex`` header with no index would be dropped (:issue:`40442`)
 - Bug in :meth:`DataFrame.to_string` misplacing the truncation column when ``index=False`` (:issue:`40907`)
 - Bug in :func:`read_orc` always raising ``AttributeError`` (:issue:`40918`)

diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
@@ -571,15 +571,18 @@ def get_sheet_data(self, sheet, convert_float: bool) -> list[list[Scalar]]:
         last_row_with_data = -1
         for row_number, row in enumerate(sheet.rows):
             converted_row = [self._convert_cell(cell, convert_float) for cell in row]
-            if not all(cell == "" for cell in converted_row):
+            while converted_row and converted_row[-1] == "":
+                # trim trailing empty elements
+                converted_row.pop()
+            if converted_row:
                 last_row_with_data = row_number
             data.append(converted_row)
 
         # Trim trailing empty rows
         data = data[: last_row_with_data + 1]
 
-        if self.book.read_only and len(data) > 0:
-            # With dimension reset, openpyxl no longer pads rows
+        if len(data) > 0:
+            # extend rows to max width
             max_width = max(len(data_row) for data_row in data)
             if min(len(data_row) for data_row in data) < max_width:
                 empty_cell: list[Scalar] = [""]

diff --git a/pandas/io/excel/_pyxlsb.py b/pandas/io/excel/_pyxlsb.py
@@ -75,7 +75,27 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar:
         return cell.v
 
     def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]:
-        return [
-            [self._convert_cell(c, convert_float) for c in r]
-            for r in sheet.rows(sparse=False)
-        ]
+        data: list[list[Scalar]] = []
+        prevous_row_number = -1
+        # When sparse=True the rows can have different lengths and empty rows are
+        # not returned. The cells are namedtuples of row, col, value (r, c, v).
+        for row in sheet.rows(sparse=True):
+            row_number = row[0].r
+            converted_row = [self._convert_cell(cell, convert_float) for cell in row]
+            while converted_row and converted_row[-1] == "":
+                # trim trailing empty elements
+                converted_row.pop()
+            if converted_row:
+                data.extend([[]] * (row_number - prevous_row_number - 1))
+                data.append(converted_row)
+                prevous_row_number = row_number
+        if data:
+            # extend rows to max_width
+            max_width = max(len(data_row) for data_row in data)
+            if min(len(data_row) for data_row in data) < max_width:
+                empty_cell: list[Scalar] = [""]
+                data = [
+                    data_row + (max_width - len(data_row)) * empty_cell
+                    for data_row in data
+                ]
+        return data
diff --git a/pandas/tests/io/data/excel/trailing_blanks.ods b/pandas/tests/io/data/excel/trailing_blanks.ods
diff --git a/pandas/tests/io/data/excel/trailing_blanks.xls b/pandas/tests/io/data/excel/trailing_blanks.xls
diff --git a/pandas/tests/io/data/excel/trailing_blanks.xlsb b/pandas/tests/io/data/excel/trailing_blanks.xlsb
diff --git a/pandas/tests/io/data/excel/trailing_blanks.xlsm b/pandas/tests/io/data/excel/trailing_blanks.xlsm
diff --git a/pandas/tests/io/data/excel/trailing_blanks.xlsx b/pandas/tests/io/data/excel/trailing_blanks.xlsx
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
@@ -1204,6 +1204,15 @@ def test_multiheader_two_blank_lines(self, read_ext):
         )
         tm.assert_frame_equal(result, expected)
 
+    def test_trailing_blanks(self, read_ext):
+        """
+        Sheets can contain blank cells with no data. Some of our readers
+        were including those cells, creating many empty rows and columns
+        """
+        file_name = "trailing_blanks" + read_ext
+        result = pd.read_excel(file_name)
+        assert result.shape == (3, 3)
+
 
 class TestExcelFileRead:
     @pytest.fixture(autouse=True)