From 900afff344ad9e2df7d46c74cc23b2f787b149f9 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 27 Mar 2020 19:56:24 +0100
Subject: [PATCH 01/66] ENH: Skip rows while reading excel file with
 engine=openpyxl

---
 pandas/io/excel/_base.py      |  4 ++--
 pandas/io/excel/_odfreader.py |  2 +-
 pandas/io/excel/_openpyxl.py  | 22 +++++++++++++++++++---
 pandas/io/excel/_pyxlsb.py    |  2 +-
 pandas/io/excel/_xlrd.py      |  2 +-
 5 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index f98d9501f1f73..09977039521c7 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -380,7 +380,7 @@ def get_sheet_by_index(self, index):
         pass
 
     @abc.abstractmethod
-    def get_sheet_data(self, sheet, convert_float):
+    def get_sheet_data(self, sheet, convert_float, header, skiprows, nrows):
         pass
 
     def parse(
@@ -436,7 +436,7 @@ def parse(
             else:  # assume an integer if not a string
                 sheet = self.get_sheet_by_index(asheetname)
 
-            data = self.get_sheet_data(sheet, convert_float)
+            data = self.get_sheet_data(sheet, convert_float, header, skiprows, nrows)
             usecols = _maybe_convert_usecols(usecols)
 
             if not data:
diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
index 7af776dc1a10f..edd57a4aba0be 100644
--- a/pandas/io/excel/_odfreader.py
+++ b/pandas/io/excel/_odfreader.py
@@ -63,7 +63,7 @@ def get_sheet_by_name(self, name: str):
 
         raise ValueError(f"sheet {name} not found")
 
-    def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]:
+    def get_sheet_data(self, sheet, convert_float: bool, header, skiprows, nrows) -> List[List[Scalar]]:
         """
         Parse an ODF Table into a list of lists
         """
diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index a96c0f814e2d8..61a563c05bd56 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -524,9 +524,25 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar:
 
         return cell.value
 
-    def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]:
+    def get_sheet_data(self, sheet, convert_float: bool, header, skiprows, nrows) -> List[List[Scalar]]:
         data: List[List[Scalar]] = []
-        for row in sheet.rows:
-            data.append([self._convert_cell(cell, convert_float) for cell in row])
+        skiprows = 0 if skiprows is None else skiprows
+        header = 0 if header is None else header
+
+        if nrows is not None:
+            for row in sheet.rows:
+                if header > 1:
+                    header -= 1
+                    data.append(["", ""])
+                elif skiprows > 0:
+                    skiprows -= 1
+                    data.append(["", ""])
+                elif nrows >= 0:
+                    nrows -= 1
+                else:
+                    break
+        else:
+            for row in sheet.rows:
+                data.append([self._convert_cell(cell, convert_float) for cell in row])
 
         return data
diff --git a/pandas/io/excel/_pyxlsb.py b/pandas/io/excel/_pyxlsb.py
index 0d96c8c4acdb8..23385bcf60d9c 100644
--- a/pandas/io/excel/_pyxlsb.py
+++ b/pandas/io/excel/_pyxlsb.py
@@ -62,7 +62,7 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar:
 
         return cell.v
 
-    def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]:
+    def get_sheet_data(self, sheet, convert_float: bool, header, skiprows, nrows) -> List[List[Scalar]]:
         return [
             [self._convert_cell(c, convert_float) for c in r]
             for r in sheet.rows(sparse=False)
diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index 8f7d3b1368fc7..afdd40d747c49 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -46,7 +46,7 @@ def get_sheet_by_name(self, name):
     def get_sheet_by_index(self, index):
         return self.book.sheet_by_index(index)
 
-    def get_sheet_data(self, sheet, convert_float):
+    def get_sheet_data(self, sheet, convert_float, header, skiprows, nrows):
         from xlrd import (
             xldate,
             XL_CELL_DATE,

From df55b51320eb695830ed9846b2d03c283727dfe5 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Sat, 4 Apr 2020 02:07:03 +0200
Subject: [PATCH 02/66] ENH: Skiping rows with odf engine

---
 pandas/io/excel/_odfreader.py | 17 +++++++++++++++++
 pandas/io/excel/_openpyxl.py  | 34 ++++++++++++++++++----------------
 2 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
index edd57a4aba0be..e79a186e30673 100644
--- a/pandas/io/excel/_odfreader.py
+++ b/pandas/io/excel/_odfreader.py
@@ -78,8 +78,25 @@ def get_sheet_data(self, sheet, convert_float: bool, header, skiprows, nrows) ->
         max_row_len = 0
 
         table: List[List[Scalar]] = []
+        header = 0 if header is None else header
+        skiprows = 0 if skiprows is None else skiprows
+        nrows = 0 if nrows is None else nrows
 
         for i, sheet_row in enumerate(sheet_rows):
+
+            if header > 1:
+                header -= 1
+                table.append([])
+                continue
+            elif skiprows > 0:
+                skiprows -= 1
+                table.append([])
+                continue
+            elif nrows >= 0:
+                nrows -= 1
+            else: 
+                break
+
             sheet_cells = [x for x in sheet_row.childNodes if x.qname in cell_names]
             empty_cells = 0
             table_row: List[Scalar] = []
diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index 61a563c05bd56..2e8c478132890 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -526,23 +526,25 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar:
 
     def get_sheet_data(self, sheet, convert_float: bool, header, skiprows, nrows) -> List[List[Scalar]]:
         data: List[List[Scalar]] = []
-        skiprows = 0 if skiprows is None else skiprows
         header = 0 if header is None else header
+        skiprows = 0 if skiprows is None else skiprows
+        nrows = 0 if nrows is None else nrows
+        
+        for row in sheet.rows: 
+
+            if header > 1:
+                header -= 1
+                data.append([])
+                continue
+            elif skiprows > 0:
+                skiprows -= 1
+                data.append([])
+                continue
+            elif nrows >= 0:
+                nrows -= 1
+            else:
+                break
 
-        if nrows is not None:
-            for row in sheet.rows:
-                if header > 1:
-                    header -= 1
-                    data.append(["", ""])
-                elif skiprows > 0:
-                    skiprows -= 1
-                    data.append(["", ""])
-                elif nrows >= 0:
-                    nrows -= 1
-                else:
-                    break
-        else:
-            for row in sheet.rows:
-                data.append([self._convert_cell(cell, convert_float) for cell in row])
+            data.append([self._convert_cell(cell, convert_float) for cell in row])
 
         return data

From 817702435117118a7182afc7899d585fdc479861 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Sat, 4 Apr 2020 03:02:42 +0200
Subject: [PATCH 03/66] ENH: Optimize nrows in read_excel

---
 pandas/io/excel/_odfreader.py | 31 ++++++++++++++++++-------------
 pandas/io/excel/_openpyxl.py  | 32 ++++++++++++++++++--------------
 pandas/io/excel/_xlrd.py      | 22 +++++++++++++++++++++-
 3 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
index e79a186e30673..70bdba1942aa2 100644
--- a/pandas/io/excel/_odfreader.py
+++ b/pandas/io/excel/_odfreader.py
@@ -6,6 +6,7 @@
 import pandas as pd
 
 from pandas.io.excel._base import _BaseExcelReader
+from pandas.io.parsers import _validate_integer
 
 
 class _ODFReader(_BaseExcelReader):
@@ -78,24 +79,28 @@ def get_sheet_data(self, sheet, convert_float: bool, header, skiprows, nrows) ->
         max_row_len = 0
 
         table: List[List[Scalar]] = []
+
+        if nrows is not None: _validate_integer("nrows", nrows)
         header = 0 if header is None else header
         skiprows = 0 if skiprows is None else skiprows
-        nrows = 0 if nrows is None else nrows
+        if isinstance(header, list) or isinstance(skiprows, list):
+            nrows = None
 
         for i, sheet_row in enumerate(sheet_rows):
 
-            if header > 1:
-                header -= 1
-                table.append([])
-                continue
-            elif skiprows > 0:
-                skiprows -= 1
-                table.append([])
-                continue
-            elif nrows >= 0:
-                nrows -= 1
-            else: 
-                break
+            if nrows is not None:
+                if header > 1:
+                    header -= 1
+                    data.append([])
+                    continue
+                elif skiprows > 0:
+                    skiprows -= 1
+                    data.append([])
+                    continue
+                if nrows >= 0:
+                    nrows -= 1
+                else: 
+                    break
 
             sheet_cells = [x for x in sheet_row.childNodes if x.qname in cell_names]
             empty_cells = 0
diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index 2e8c478132890..58e096f1269d6 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -7,7 +7,7 @@
 
 from pandas.io.excel._base import ExcelWriter, _BaseExcelReader
 from pandas.io.excel._util import _validate_freeze_panes
-
+from pandas.io.parsers import _validate_integer
 
 class _OpenpyxlWriter(ExcelWriter):
     engine = "openpyxl"
@@ -526,24 +526,28 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar:
 
     def get_sheet_data(self, sheet, convert_float: bool, header, skiprows, nrows) -> List[List[Scalar]]:
         data: List[List[Scalar]] = []
+
+        if nrows is not None: _validate_integer("nrows", nrows)
         header = 0 if header is None else header
         skiprows = 0 if skiprows is None else skiprows
-        nrows = 0 if nrows is None else nrows
+        if isinstance(header, list) or isinstance(skiprows, list):
+            nrows = None
         
         for row in sheet.rows: 
 
-            if header > 1:
-                header -= 1
-                data.append([])
-                continue
-            elif skiprows > 0:
-                skiprows -= 1
-                data.append([])
-                continue
-            elif nrows >= 0:
-                nrows -= 1
-            else:
-                break
+            if nrows is not None:
+                if header > 1:
+                    header -= 1
+                    data.append([])
+                    continue
+                elif skiprows > 0:
+                    skiprows -= 1
+                    data.append([])
+                    continue
+                if nrows >= 0:
+                    nrows -= 1
+                else: 
+                    break
 
             data.append([self._convert_cell(cell, convert_float) for cell in row])
 
diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index afdd40d747c49..f3089e44d6faf 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -5,7 +5,7 @@
 from pandas.compat._optional import import_optional_dependency
 
 from pandas.io.excel._base import _BaseExcelReader
-
+from pandas.io.parsers import _validate_integer
 
 class _XlrdReader(_BaseExcelReader):
     def __init__(self, filepath_or_buffer):
@@ -97,7 +97,27 @@ def _parse_cell(cell_contents, cell_typ):
 
         data = []
 
+        if nrows is not None: _validate_integer("nrows", nrows)
+        header = 0 if header is None else header
+        skiprows = 0 if skiprows is None else skiprows
+        if isinstance(header, list) or isinstance(skiprows, list):
+            nrows = None
         for i in range(sheet.nrows):
+
+            if nrows is not None:
+                if header > 1:
+                    header -= 1
+                    data.append([])
+                    continue
+                elif skiprows > 0:
+                    skiprows -= 1
+                    data.append([])
+                    continue
+                if nrows >= 0:
+                    nrows -= 1
+                else: 
+                    break
+
             row = [
                 _parse_cell(value, typ)
                 for value, typ in zip(sheet.row_values(i), sheet.row_types(i))

From 79b34c3851375d1b46dc2e0215c3e7f5089632d5 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Sat, 4 Apr 2020 03:09:17 +0200
Subject: [PATCH 04/66] Reformatted

---
 pandas/io/excel/_odfreader.py |  9 ++++++---
 pandas/io/excel/_openpyxl.py  | 14 +++++++++-----
 pandas/io/excel/_xlrd.py      |  6 ++++--
 3 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
index 70bdba1942aa2..51e22d156663b 100644
--- a/pandas/io/excel/_odfreader.py
+++ b/pandas/io/excel/_odfreader.py
@@ -64,7 +64,9 @@ def get_sheet_by_name(self, name: str):
 
         raise ValueError(f"sheet {name} not found")
 
-    def get_sheet_data(self, sheet, convert_float: bool, header, skiprows, nrows) -> List[List[Scalar]]:
+    def get_sheet_data(
+        self, sheet, convert_float: bool, header, skiprows, nrows
+    ) -> List[List[Scalar]]:
         """
         Parse an ODF Table into a list of lists
         """
@@ -80,7 +82,8 @@ def get_sheet_data(self, sheet, convert_float: bool, header, skiprows, nrows) ->
 
         table: List[List[Scalar]] = []
 
-        if nrows is not None: _validate_integer("nrows", nrows)
+        if nrows is not None:
+            _validate_integer("nrows", nrows)
         header = 0 if header is None else header
         skiprows = 0 if skiprows is None else skiprows
         if isinstance(header, list) or isinstance(skiprows, list):
@@ -99,7 +102,7 @@ def get_sheet_data(self, sheet, convert_float: bool, header, skiprows, nrows) ->
                     continue
                 if nrows >= 0:
                     nrows -= 1
-                else: 
+                else:
                     break
 
             sheet_cells = [x for x in sheet_row.childNodes if x.qname in cell_names]
diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index 58e096f1269d6..2fad546681a2c 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -9,6 +9,7 @@
 from pandas.io.excel._util import _validate_freeze_panes
 from pandas.io.parsers import _validate_integer
 
+
 class _OpenpyxlWriter(ExcelWriter):
     engine = "openpyxl"
     supported_extensions = (".xlsx", ".xlsm")
@@ -524,16 +525,19 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar:
 
         return cell.value
 
-    def get_sheet_data(self, sheet, convert_float: bool, header, skiprows, nrows) -> List[List[Scalar]]:
+    def get_sheet_data(
+        self, sheet, convert_float: bool, header, skiprows, nrows
+    ) -> List[List[Scalar]]:
         data: List[List[Scalar]] = []
 
-        if nrows is not None: _validate_integer("nrows", nrows)
+        if nrows is not None:
+            _validate_integer("nrows", nrows)
         header = 0 if header is None else header
         skiprows = 0 if skiprows is None else skiprows
         if isinstance(header, list) or isinstance(skiprows, list):
             nrows = None
-        
-        for row in sheet.rows: 
+
+        for row in sheet.rows:
 
             if nrows is not None:
                 if header > 1:
@@ -546,7 +550,7 @@ def get_sheet_data(self, sheet, convert_float: bool, header, skiprows, nrows) ->
                     continue
                 if nrows >= 0:
                     nrows -= 1
-                else: 
+                else:
                     break
 
             data.append([self._convert_cell(cell, convert_float) for cell in row])
diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index f3089e44d6faf..72ff7ceaa37a3 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -7,6 +7,7 @@
 from pandas.io.excel._base import _BaseExcelReader
 from pandas.io.parsers import _validate_integer
 
+
 class _XlrdReader(_BaseExcelReader):
     def __init__(self, filepath_or_buffer):
         """
@@ -97,7 +98,8 @@ def _parse_cell(cell_contents, cell_typ):
 
         data = []
 
-        if nrows is not None: _validate_integer("nrows", nrows)
+        if nrows is not None:
+            _validate_integer("nrows", nrows)
         header = 0 if header is None else header
         skiprows = 0 if skiprows is None else skiprows
         if isinstance(header, list) or isinstance(skiprows, list):
@@ -115,7 +117,7 @@ def _parse_cell(cell_contents, cell_typ):
                     continue
                 if nrows >= 0:
                     nrows -= 1
-                else: 
+                else:
                     break
 
             row = [

From f0a2b8d912b5bb7f5384e1159787a5b7fd46e0e1 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Sat, 4 Apr 2020 13:54:25 +0200
Subject: [PATCH 05/66] Fix linting

---
 pandas/io/excel/_odfreader.py | 4 ++--
 pandas/io/excel/_pyxlsb.py    | 4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
index 51e22d156663b..bd32d6c2a4d8f 100644
--- a/pandas/io/excel/_odfreader.py
+++ b/pandas/io/excel/_odfreader.py
@@ -94,11 +94,11 @@ def get_sheet_data(
             if nrows is not None:
                 if header > 1:
                     header -= 1
-                    data.append([])
+                    table.append([])
                     continue
                 elif skiprows > 0:
                     skiprows -= 1
-                    data.append([])
+                    table.append([])
                     continue
                 if nrows >= 0:
                     nrows -= 1
diff --git a/pandas/io/excel/_pyxlsb.py b/pandas/io/excel/_pyxlsb.py
index 23385bcf60d9c..2948eff15a2c0 100644
--- a/pandas/io/excel/_pyxlsb.py
+++ b/pandas/io/excel/_pyxlsb.py
@@ -62,7 +62,9 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar:
 
         return cell.v
 
-    def get_sheet_data(self, sheet, convert_float: bool, header, skiprows, nrows) -> List[List[Scalar]]:
+    def get_sheet_data(
+        self, sheet, convert_float: bool, header, skiprows, nrows
+    ) -> List[List[Scalar]]:
         return [
             [self._convert_cell(c, convert_float) for c in r]
             for r in sheet.rows(sparse=False)

From 70ac23483e22752ab8332b5acaf4dabfb0bcae41 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Sat, 4 Apr 2020 14:17:09 +0200
Subject: [PATCH 06/66] Add annotation to variable

---
 pandas/io/excel/_xlrd.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index 72ff7ceaa37a3..55822a40d94d9 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -96,7 +96,7 @@ def _parse_cell(cell_contents, cell_typ):
                     cell_contents = val
             return cell_contents
 
-        data = []
+        data: List[List[Scalar]] = []
 
         if nrows is not None:
             _validate_integer("nrows", nrows)

From 27cae3ad72361f96656743618de32f1a73ef711d Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Sat, 4 Apr 2020 14:37:49 +0200
Subject: [PATCH 07/66] Add imports

---
 pandas/io/excel/_xlrd.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index 55822a40d94d9..4926540aa5720 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -1,7 +1,9 @@
 from datetime import time
+from typing import List
 
 import numpy as np
 
+from pandas._typing import Scalar
 from pandas.compat._optional import import_optional_dependency
 
 from pandas.io.excel._base import _BaseExcelReader

From 4248f8c3311b4542fc8edf87a83c04ff45de86be Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Sun, 5 Apr 2020 01:21:27 +0200
Subject: [PATCH 08/66] Add types

---
 pandas/io/excel/_odfreader.py | 7 ++++---
 pandas/io/excel/_openpyxl.py  | 7 ++++---
 pandas/io/excel/_xlrd.py      | 9 ++++++---
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
index bd32d6c2a4d8f..13b2a95dc30e8 100644
--- a/pandas/io/excel/_odfreader.py
+++ b/pandas/io/excel/_odfreader.py
@@ -1,6 +1,6 @@
-from typing import List
+from typing import List, Sequence
 
-from pandas._typing import FilePathOrBuffer, Scalar
+from pandas._typing import FilePathOrBuffer, Scalar, Union
 from pandas.compat._optional import import_optional_dependency
 
 import pandas as pd
@@ -65,7 +65,8 @@ def get_sheet_by_name(self, name: str):
         raise ValueError(f"sheet {name} not found")
 
     def get_sheet_data(
-        self, sheet, convert_float: bool, header, skiprows, nrows
+        self, sheet, convert_float: bool, header: Union[int, Sequence[int]],
+        skiprows: Union[int, Sequence[int]], nrows: int
     ) -> List[List[Scalar]]:
         """
         Parse an ODF Table into a list of lists
diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index 2fad546681a2c..3c47944d8b397 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -1,8 +1,8 @@
-from typing import List
+from typing import List, Sequence
 
 import numpy as np
 
-from pandas._typing import FilePathOrBuffer, Scalar
+from pandas._typing import FilePathOrBuffer, Scalar, Union
 from pandas.compat._optional import import_optional_dependency
 
 from pandas.io.excel._base import ExcelWriter, _BaseExcelReader
@@ -526,7 +526,8 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar:
         return cell.value
 
     def get_sheet_data(
-        self, sheet, convert_float: bool, header, skiprows, nrows
+        self, sheet, convert_float: bool, header: Union[int, Sequence[int]],
+        skiprows: Union[int, Sequence[int]], nrows: int
     ) -> List[List[Scalar]]:
         data: List[List[Scalar]] = []
 
diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index 4926540aa5720..9ff62f781039b 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -1,9 +1,9 @@
 from datetime import time
-from typing import List
+from typing import List, Sequence
 
 import numpy as np
 
-from pandas._typing import Scalar
+from pandas._typing import Scalar, Union
 from pandas.compat._optional import import_optional_dependency
 
 from pandas.io.excel._base import _BaseExcelReader
@@ -49,7 +49,10 @@ def get_sheet_by_name(self, name):
     def get_sheet_by_index(self, index):
         return self.book.sheet_by_index(index)
 
-    def get_sheet_data(self, sheet, convert_float, header, skiprows, nrows):
+    def get_sheet_data(
+        self, sheet, convert_float, header: Union[int, Sequence[int]],
+        skiprows: Union[int, Sequence[int]], nrows: int
+    ) -> List[List[Scalar]]:
         from xlrd import (
             xldate,
             XL_CELL_DATE,

From 70f46b3850bd02549b350e94a97dbe41a9159ef7 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Thu, 9 Apr 2020 20:37:59 +0200
Subject: [PATCH 09/66] ENH: Fix

---
 pandas/io/excel/_odfreader.py | 8 ++++++--
 pandas/io/excel/_openpyxl.py  | 8 ++++++--
 pandas/io/excel/_xlrd.py      | 8 ++++++--
 3 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
index 13b2a95dc30e8..9ee93e55625a2 100644
--- a/pandas/io/excel/_odfreader.py
+++ b/pandas/io/excel/_odfreader.py
@@ -65,8 +65,12 @@ def get_sheet_by_name(self, name: str):
         raise ValueError(f"sheet {name} not found")
 
     def get_sheet_data(
-        self, sheet, convert_float: bool, header: Union[int, Sequence[int]],
-        skiprows: Union[int, Sequence[int]], nrows: int
+        self,
+        sheet,
+        convert_float: bool,
+        header: Union[int, Sequence[int]],
+        skiprows: Union[int, Sequence[int]],
+        nrows: int,
     ) -> List[List[Scalar]]:
         """
         Parse an ODF Table into a list of lists
diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index 3c47944d8b397..d73382c8cccef 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -526,8 +526,12 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar:
         return cell.value
 
     def get_sheet_data(
-        self, sheet, convert_float: bool, header: Union[int, Sequence[int]],
-        skiprows: Union[int, Sequence[int]], nrows: int
+        self,
+        sheet,
+        convert_float: bool,
+        header: Union[int, Sequence[int]],
+        skiprows: Union[int, Sequence[int]],
+        nrows: int,
     ) -> List[List[Scalar]]:
         data: List[List[Scalar]] = []
 
diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index 9ff62f781039b..b35b5e0d5b667 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -50,8 +50,12 @@ def get_sheet_by_index(self, index):
         return self.book.sheet_by_index(index)
 
     def get_sheet_data(
-        self, sheet, convert_float, header: Union[int, Sequence[int]],
-        skiprows: Union[int, Sequence[int]], nrows: int
+        self,
+        sheet,
+        convert_float,
+        header: Union[int, Sequence[int]],
+        skiprows: Union[int, Sequence[int]],
+        nrows: int,
     ) -> List[List[Scalar]]:
         from xlrd import (
             xldate,

From cdfc05dea1de5478ffcb75860724b479c70c4872 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Thu, 9 Apr 2020 21:38:37 +0200
Subject: [PATCH 10/66] ENH: Mark variables as optional

---
 pandas/io/excel/_odfreader.py | 8 ++++----
 pandas/io/excel/_openpyxl.py  | 8 ++++----
 pandas/io/excel/_xlrd.py      | 8 ++++----
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
index 9ee93e55625a2..717f87aa15f13 100644
--- a/pandas/io/excel/_odfreader.py
+++ b/pandas/io/excel/_odfreader.py
@@ -1,4 +1,4 @@
-from typing import List, Sequence
+from typing import List, Optional, Sequence
 
 from pandas._typing import FilePathOrBuffer, Scalar, Union
 from pandas.compat._optional import import_optional_dependency
@@ -68,9 +68,9 @@ def get_sheet_data(
         self,
         sheet,
         convert_float: bool,
-        header: Union[int, Sequence[int]],
-        skiprows: Union[int, Sequence[int]],
-        nrows: int,
+        header: Optional[Union[int, Sequence[int]]],
+        skiprows: Optional[Union[int, Sequence[int]]],
+        nrows: Optional[int],
     ) -> List[List[Scalar]]:
         """
         Parse an ODF Table into a list of lists
diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index d73382c8cccef..2c99690ed5fc4 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -1,4 +1,4 @@
-from typing import List, Sequence
+from typing import List, Optional, Sequence
 
 import numpy as np
 
@@ -529,9 +529,9 @@ def get_sheet_data(
         self,
         sheet,
         convert_float: bool,
-        header: Union[int, Sequence[int]],
-        skiprows: Union[int, Sequence[int]],
-        nrows: int,
+        header: Optional[Union[int, Sequence[int]]],
+        skiprows: Optional[Union[int, Sequence[int]]],
+        nrows: Optional[int],
     ) -> List[List[Scalar]]:
         data: List[List[Scalar]] = []
 
diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index b35b5e0d5b667..76d5e1337a755 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -1,5 +1,5 @@
 from datetime import time
-from typing import List, Sequence
+from typing import List, Optional, Sequence
 
 import numpy as np
 
@@ -53,9 +53,9 @@ def get_sheet_data(
         self,
         sheet,
         convert_float,
-        header: Union[int, Sequence[int]],
-        skiprows: Union[int, Sequence[int]],
-        nrows: int,
+        header: Optional[Union[int, Sequence[int]]],
+        skiprows: Optional[Union[int, Sequence[int]]],
+        nrows: Optional[int],
     ) -> List[List[Scalar]]:
         from xlrd import (
             xldate,

From 4c8a42a95dac8a48cf97c055e6b8e9196726e4b9 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 10 Apr 2020 02:52:46 +0200
Subject: [PATCH 11/66] ENH: Move nrows variable check

---
 pandas/io/excel/_base.py      | 6 ++++--
 pandas/io/excel/_odfreader.py | 2 --
 pandas/io/excel/_openpyxl.py  | 2 --
 pandas/io/excel/_pyxlsb.py    | 7 ++++---
 pandas/io/excel/_xlrd.py      | 3 +--
 5 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index efb9cc1da1cc0..82e663e48518d 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -28,7 +28,7 @@
     _pop_header_name,
     get_writer,
 )
-from pandas.io.parsers import TextParser
+from pandas.io.parsers import TextParser, _validate_integer
 
 _read_excel_doc = (
     """
@@ -307,6 +307,9 @@ def read_excel(
             "an ExcelFile - ExcelFile already has the engine set"
         )
 
+    if nrows is not None:
+        _validate_integer("nrows", nrows)
+
     return io.parse(
         sheet_name=sheet_name,
         header=header,
@@ -333,7 +336,6 @@ def read_excel(
         **kwds,
     )
 
-
 class _BaseExcelReader(metaclass=abc.ABCMeta):
     def __init__(self, filepath_or_buffer):
         # If filepath_or_buffer is a url, load the data into a BytesIO
diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
index 80ff3a907cd49..e3a51eb8bcf20 100644
--- a/pandas/io/excel/_odfreader.py
+++ b/pandas/io/excel/_odfreader.py
@@ -87,8 +87,6 @@ def get_sheet_data(
 
         table: List[List[Scalar]] = []
 
-        if nrows is not None:
-            _validate_integer("nrows", nrows)
         header = 0 if header is None else header
         skiprows = 0 if skiprows is None else skiprows
         if isinstance(header, list) or isinstance(skiprows, list):
diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index 100a44ac8e36c..366574f0ddcda 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -540,8 +540,6 @@ def get_sheet_data(
     ) -> List[List[Scalar]]:
         data: List[List[Scalar]] = []
 
-        if nrows is not None:
-            _validate_integer("nrows", nrows)
         header = 0 if header is None else header
         skiprows = 0 if skiprows is None else skiprows
         if isinstance(header, list) or isinstance(skiprows, list):
diff --git a/pandas/io/excel/_pyxlsb.py b/pandas/io/excel/_pyxlsb.py
index 2948eff15a2c0..51b5d9bb73211 100644
--- a/pandas/io/excel/_pyxlsb.py
+++ b/pandas/io/excel/_pyxlsb.py
@@ -1,6 +1,6 @@
-from typing import List
+from typing import List, Optional, Sequence
 
-from pandas._typing import FilePathOrBuffer, Scalar
+from pandas._typing import FilePathOrBuffer, Scalar, Union
 from pandas.compat._optional import import_optional_dependency
 
 from pandas.io.excel._base import _BaseExcelReader
@@ -63,7 +63,8 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar:
         return cell.v
 
     def get_sheet_data(
-        self, sheet, convert_float: bool, header, skiprows, nrows
+        self, sheet, convert_float: bool, header: Optional[Union[int, Sequence[int]]],
+        skiprows: Optional[Union[int, Sequence[int]]], nrows: Optional[int]
     ) -> List[List[Scalar]]:
         return [
             [self._convert_cell(c, convert_float) for c in r]
diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index 76d5e1337a755..7915acfb98b46 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -107,12 +107,11 @@ def _parse_cell(cell_contents, cell_typ):
 
         data: List[List[Scalar]] = []
 
-        if nrows is not None:
-            _validate_integer("nrows", nrows)
         header = 0 if header is None else header
         skiprows = 0 if skiprows is None else skiprows
         if isinstance(header, list) or isinstance(skiprows, list):
             nrows = None
+
         for i in range(sheet.nrows):
 
             if nrows is not None:

From 19bb9275e76f0b270a6f9fa72bd2a03a19d130cb Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 10 Apr 2020 03:31:28 +0200
Subject: [PATCH 12/66] ENH: Remove unused imports

---
 pandas/io/excel/_base.py      | 1 +
 pandas/io/excel/_odfreader.py | 1 -
 pandas/io/excel/_openpyxl.py  | 1 -
 pandas/io/excel/_pyxlsb.py    | 8 ++++++--
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 82e663e48518d..6156487dca755 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -336,6 +336,7 @@ def read_excel(
         **kwds,
     )
 
+
 class _BaseExcelReader(metaclass=abc.ABCMeta):
     def __init__(self, filepath_or_buffer):
         # If filepath_or_buffer is a url, load the data into a BytesIO
diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
index e3a51eb8bcf20..d15266a2b41a0 100644
--- a/pandas/io/excel/_odfreader.py
+++ b/pandas/io/excel/_odfreader.py
@@ -6,7 +6,6 @@
 import pandas as pd
 
 from pandas.io.excel._base import _BaseExcelReader
-from pandas.io.parsers import _validate_integer
 
 
 class _ODFReader(_BaseExcelReader):
diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index 366574f0ddcda..58f82cbfb1e7f 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -7,7 +7,6 @@
 
 from pandas.io.excel._base import ExcelWriter, _BaseExcelReader
 from pandas.io.excel._util import _validate_freeze_panes
-from pandas.io.parsers import _validate_integer
 
 
 class _OpenpyxlWriter(ExcelWriter):
diff --git a/pandas/io/excel/_pyxlsb.py b/pandas/io/excel/_pyxlsb.py
index 51b5d9bb73211..a7962572775b5 100644
--- a/pandas/io/excel/_pyxlsb.py
+++ b/pandas/io/excel/_pyxlsb.py
@@ -63,8 +63,12 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar:
         return cell.v
 
     def get_sheet_data(
-        self, sheet, convert_float: bool, header: Optional[Union[int, Sequence[int]]],
-        skiprows: Optional[Union[int, Sequence[int]]], nrows: Optional[int]
+        self,
+        sheet,
+        convert_float: bool,
+        header: Optional[Union[int, Sequence[int]]],
+        skiprows: Optional[Union[int, Sequence[int]]],
+        nrows: Optional[int],
     ) -> List[List[Scalar]]:
         return [
             [self._convert_cell(c, convert_float) for c in r]

From 6c2a3b59897af655d883eba1edf7e29497be5134 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 17 Apr 2020 01:36:58 +0200
Subject: [PATCH 13/66] ENH: Move repeated code to base

---
 pandas/io/excel/_base.py      | 19 ++++++++++++++++++-
 pandas/io/excel/_odfreader.py | 26 ++++++++------------------
 pandas/io/excel/_openpyxl.py  | 30 ++++++++++--------------------
 pandas/io/excel/_xlrd.py      | 26 ++++++++------------------
 4 files changed, 44 insertions(+), 57 deletions(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 6156487dca755..d98b11bdb20d6 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -389,6 +389,15 @@ def get_sheet_by_index(self, index):
     def get_sheet_data(self, sheet, convert_float, header, skiprows, nrows):
         pass
 
+    def should_read_row(self, index, header, skiprows, nrows):
+        if nrows is not None:
+            if index <= header - 1 + skiprows:
+                return True, False
+            if index <= header - 1 + skiprows + nrows + 1:
+                return False, False
+            return False, True
+        return False, False
+
     def parse(
         self,
         sheet_name=0,
@@ -442,7 +451,15 @@ def parse(
             else:  # assume an integer if not a string
                 sheet = self.get_sheet_by_index(asheetname)
 
-            data = self.get_sheet_data(sheet, convert_float, header, skiprows, nrows)
+            gsd_header = 0 if header is None else header
+            gsd_skiprows = 0 if skiprows is None else skiprows
+            gsd_nrows = nrows
+            if isinstance(gsd_header, list) or isinstance(gsd_skiprows, list):
+                gsd_nrows = None
+
+            data = self.get_sheet_data(
+                sheet, convert_float, gsd_header, gsd_skiprows, gsd_nrows
+            )
             usecols = _maybe_convert_usecols(usecols)
 
             if not data:
diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
index d15266a2b41a0..4094e9da223ea 100644
--- a/pandas/io/excel/_odfreader.py
+++ b/pandas/io/excel/_odfreader.py
@@ -86,26 +86,16 @@ def get_sheet_data(
 
         table: List[List[Scalar]] = []
 
-        header = 0 if header is None else header
-        skiprows = 0 if skiprows is None else skiprows
-        if isinstance(header, list) or isinstance(skiprows, list):
-            nrows = None
-
         for i, sheet_row in enumerate(sheet_rows):
 
-            if nrows is not None:
-                if header > 1:
-                    header -= 1
-                    table.append([])
-                    continue
-                elif skiprows > 0:
-                    skiprows -= 1
-                    table.append([])
-                    continue
-                if nrows >= 0:
-                    nrows -= 1
-                else:
-                    break
+            should_continue, should_break = self.should_read_row(
+                i, header, skiprows, nrows
+            )
+            if should_continue:
+                table.append([])
+                continue
+            if should_break:
+                break
 
             sheet_cells = [x for x in sheet_row.childNodes if x.qname in cell_names]
             empty_cells = 0
diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index 58f82cbfb1e7f..3cfc7aaa55837 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -539,26 +539,16 @@ def get_sheet_data(
     ) -> List[List[Scalar]]:
         data: List[List[Scalar]] = []
 
-        header = 0 if header is None else header
-        skiprows = 0 if skiprows is None else skiprows
-        if isinstance(header, list) or isinstance(skiprows, list):
-            nrows = None
-
-        for row in sheet.rows:
-
-            if nrows is not None:
-                if header > 1:
-                    header -= 1
-                    data.append([])
-                    continue
-                elif skiprows > 0:
-                    skiprows -= 1
-                    data.append([])
-                    continue
-                if nrows >= 0:
-                    nrows -= 1
-                else:
-                    break
+        for i, row in enumerate(sheet.rows):
+
+            should_continue, should_break = self.should_read_row(
+                i, header, skiprows, nrows
+            )
+            if should_continue:
+                data.append([])
+                continue
+            if should_break:
+                break
 
             data.append([self._convert_cell(cell, convert_float) for cell in row])
 
diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index 7915acfb98b46..5c7ae62febe3f 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -107,26 +107,16 @@ def _parse_cell(cell_contents, cell_typ):
 
         data: List[List[Scalar]] = []
 
-        header = 0 if header is None else header
-        skiprows = 0 if skiprows is None else skiprows
-        if isinstance(header, list) or isinstance(skiprows, list):
-            nrows = None
-
         for i in range(sheet.nrows):
 
-            if nrows is not None:
-                if header > 1:
-                    header -= 1
-                    data.append([])
-                    continue
-                elif skiprows > 0:
-                    skiprows -= 1
-                    data.append([])
-                    continue
-                if nrows >= 0:
-                    nrows -= 1
-                else:
-                    break
+            should_continue, should_break = self.should_read_row(
+                i, header, skiprows, nrows
+            )
+            if should_continue:
+                table.append([])
+                continue
+            if should_break:
+                break
 
             row = [
                 _parse_cell(value, typ)

From b865c8814bc848522ee8a08002f40e8b643e76e0 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 17 Apr 2020 01:42:49 +0200
Subject: [PATCH 14/66] ENH: Remove import

---
 pandas/io/excel/_xlrd.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index 5c7ae62febe3f..ad3eb665b1818 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -7,8 +7,6 @@
 from pandas.compat._optional import import_optional_dependency
 
 from pandas.io.excel._base import _BaseExcelReader
-from pandas.io.parsers import _validate_integer
-
 
 class _XlrdReader(_BaseExcelReader):
     def __init__(self, filepath_or_buffer):
@@ -109,9 +107,7 @@ def _parse_cell(cell_contents, cell_typ):
 
         for i in range(sheet.nrows):
 
-            should_continue, should_break = self.should_read_row(
-                i, header, skiprows, nrows
-            )
+            should_continue, should_break = self.should_read_row(i, header, skiprows, nrows)
             if should_continue:
                 table.append([])
                 continue

From 49276daccfa7e1b21bef5a7f3bf6ae1eeb0b3276 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 17 Apr 2020 01:43:59 +0200
Subject: [PATCH 15/66] ENH: Lint

---
 pandas/io/excel/_xlrd.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index ad3eb665b1818..5fd948cfe4518 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -8,6 +8,7 @@
 
 from pandas.io.excel._base import _BaseExcelReader
 
+
 class _XlrdReader(_BaseExcelReader):
     def __init__(self, filepath_or_buffer):
         """
@@ -107,7 +108,9 @@ def _parse_cell(cell_contents, cell_typ):
 
         for i in range(sheet.nrows):
 
-            should_continue, should_break = self.should_read_row(i, header, skiprows, nrows)
+            should_continue, should_break = self.should_read_row(
+                i, header, skiprows, nrows
+            )
             if should_continue:
                 table.append([])
                 continue

From 393a622f2617b9c22635a278a96d19814b0b72ab Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 17 Apr 2020 02:02:52 +0200
Subject: [PATCH 16/66] ENH: Lint

---
 pandas/io/excel/_xlrd.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index 5fd948cfe4518..8dc065cf92034 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -112,7 +112,7 @@ def _parse_cell(cell_contents, cell_typ):
                 i, header, skiprows, nrows
             )
             if should_continue:
-                table.append([])
+                data.append([])
                 continue
             if should_break:
                 break

From e00fff18f2d1709115d9d1fa918f20070b8a772e Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 17 Apr 2020 02:48:33 +0200
Subject: [PATCH 17/66] ENH: Add docstring to should_read_row

---
 pandas/io/excel/_base.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index d98b11bdb20d6..35e7d4dc7ba72 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -390,6 +390,24 @@ def get_sheet_data(self, sheet, convert_float, header, skiprows, nrows):
         pass
 
     def should_read_row(self, index, header, skiprows, nrows):
+        """
+        Determines whether row should be read.
+ 
+	Parameters
+        ----------
+        index : int
+            Index of row.
+        header : int
+            Row used as column labels.
+        skiprows : int
+            Rows to skip at the begining.
+        nrows : int
+            Number of rows to parse.
+
+        Returns
+        -------
+        Tuple with the first bool element determining if row should be skipped and second bool element determining if reading should be stopped.
+        """
         if nrows is not None:
             if index <= header - 1 + skiprows:
                 return True, False

From b14642bc395fd2c8aeaa485ac54d4ae2b45473da Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 17 Apr 2020 03:13:27 +0200
Subject: [PATCH 18/66] ENH: Lint

---
 pandas/io/excel/_base.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 35e7d4dc7ba72..a737410db8469 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -392,8 +392,8 @@ def get_sheet_data(self, sheet, convert_float, header, skiprows, nrows):
     def should_read_row(self, index, header, skiprows, nrows):
         """
         Determines whether row should be read.
- 
-	Parameters
+
+        Parameters
         ----------
         index : int
             Index of row.
@@ -406,7 +406,8 @@ def should_read_row(self, index, header, skiprows, nrows):
 
         Returns
         -------
-        Tuple with the first bool element determining if row should be skipped and second bool element determining if reading should be stopped.
+        Tuple with the first bool element determining if row should be 
+        skipped and second bool element determining if reading should be stopped.
         """
         if nrows is not None:
             if index <= header - 1 + skiprows:

From dfc794a6defbfc6d9a47a7d4af4e59f87a83b58a Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 17 Apr 2020 03:34:36 +0200
Subject: [PATCH 19/66] ENH: Lint

---
 pandas/io/excel/_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index a737410db8469..e6a8c14c51f72 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -406,7 +406,7 @@ def should_read_row(self, index, header, skiprows, nrows):
 
         Returns
         -------
-        Tuple with the first bool element determining if row should be 
+        Tuple with the first bool element determining if row should be
         skipped and second bool element determining if reading should be stopped.
         """
         if nrows is not None:

From 7b501de9affffe93649d365204fed00ce8915cc7 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 17 Apr 2020 19:36:05 +0200
Subject: [PATCH 20/66] ENH: Move nrows value check

---
 pandas/io/excel/_base.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index e6a8c14c51f72..5fa822265f57a 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -307,9 +307,6 @@ def read_excel(
             "an ExcelFile - ExcelFile already has the engine set"
         )
 
-    if nrows is not None:
-        _validate_integer("nrows", nrows)
-
     return io.parse(
         sheet_name=sheet_name,
         header=header,
@@ -443,6 +440,8 @@ def parse(
     ):
 
         validate_header_arg(header)
+        if nrows is not None:
+            _validate_integer("nrows", nrows)
 
         ret_dict = False
 

From 3292f6ba80a3851640c9faf24c499b8f170e08a6 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 17 Apr 2020 20:15:16 +0200
Subject: [PATCH 21/66] ENH: Remove nrows validation

---
 pandas/io/excel/_base.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 5fa822265f57a..10ca5ed43040d 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -28,7 +28,7 @@
     _pop_header_name,
     get_writer,
 )
-from pandas.io.parsers import TextParser, _validate_integer
+from pandas.io.parsers import TextParser
 
 _read_excel_doc = (
     """
@@ -440,8 +440,6 @@ def parse(
     ):
 
         validate_header_arg(header)
-        if nrows is not None:
-            _validate_integer("nrows", nrows)
 
         ret_dict = False
 
@@ -471,7 +469,7 @@ def parse(
 
             gsd_header = 0 if header is None else header
             gsd_skiprows = 0 if skiprows is None else skiprows
-            gsd_nrows = nrows
+            gsd_nrows = nrows if isinstance(nrows, int) else None
             if isinstance(gsd_header, list) or isinstance(gsd_skiprows, list):
                 gsd_nrows = None
 

From bdd5780608c12bdffe7ad2260f7844a05eba490a Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 24 Apr 2020 14:08:37 +0200
Subject: [PATCH 22/66] Run tests

---
 pandas/io/excel/_base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 10ca5ed43040d..45ec69b6b687a 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -470,6 +470,7 @@ def parse(
             gsd_header = 0 if header is None else header
             gsd_skiprows = 0 if skiprows is None else skiprows
             gsd_nrows = nrows if isinstance(nrows, int) else None
+
             if isinstance(gsd_header, list) or isinstance(gsd_skiprows, list):
                 gsd_nrows = None
 

From 1867088b14ecb0a7920e82ec67dcec0bf828b214 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 24 Apr 2020 21:47:18 +0200
Subject: [PATCH 23/66] ENH: Fix reading rows in openpyxl

---
 pandas/io/excel/_openpyxl.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index 3cfc7aaa55837..e79f4e3712b59 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -539,17 +539,14 @@ def get_sheet_data(
     ) -> List[List[Scalar]]:
         data: List[List[Scalar]] = []
 
-        for i, row in enumerate(sheet.rows):
-
-            should_continue, should_break = self.should_read_row(
+        max_row = None if nrows is None else header + skiprows + nrows + 1
+        for i, row in enumerate(sheet.iter_rows(max_row=max_row)):     
+            should_continue, _ = self.should_read_row(
                 i, header, skiprows, nrows
             )
             if should_continue:
                 data.append([])
-                continue
-            if should_break:
-                break
-
+                continue            
             data.append([self._convert_cell(cell, convert_float) for cell in row])
 
         return data

From 3c1eb10e1f6044d8502bd85548986ddb254184d6 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 24 Apr 2020 21:48:13 +0200
Subject: [PATCH 24/66] ENH: Fix lint

---
 pandas/io/excel/_openpyxl.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index e79f4e3712b59..fbe1ac529ab9c 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -540,13 +540,11 @@ def get_sheet_data(
         data: List[List[Scalar]] = []
 
         max_row = None if nrows is None else header + skiprows + nrows + 1
-        for i, row in enumerate(sheet.iter_rows(max_row=max_row)):     
-            should_continue, _ = self.should_read_row(
-                i, header, skiprows, nrows
-            )
+        for i, row in enumerate(sheet.iter_rows(max_row=max_row)):
+            should_continue, _ = self.should_read_row(i, header, skiprows, nrows)
             if should_continue:
                 data.append([])
-                continue            
+                continue
             data.append([self._convert_cell(cell, convert_float) for cell in row])
 
         return data

From 88c3117d284105d73ad77ead46ad64d67cf11790 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 24 Apr 2020 22:17:27 +0200
Subject: [PATCH 25/66] Fix max_row variable definition

---
 pandas/io/excel/_openpyxl.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index fbe1ac529ab9c..e4715ac658a7f 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -539,7 +539,15 @@ def get_sheet_data(
     ) -> List[List[Scalar]]:
         data: List[List[Scalar]] = []
 
-        max_row = None if nrows is None else header + skiprows + nrows + 1
+        if (
+            isinstance(header, int)
+            and isinstance(skiprows, int)
+            and isinstance(nrows, int)
+        ):
+            max_row = header + skiprows + nrows + 1
+        else:
+            max_row = None
+
         for i, row in enumerate(sheet.iter_rows(max_row=max_row)):
             should_continue, _ = self.should_read_row(i, header, skiprows, nrows)
             if should_continue:

From dc600552b8f6ff5df0cd959f69bd45b15ab69d2b Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 24 Apr 2020 22:38:01 +0200
Subject: [PATCH 26/66] Fix max_row variable definition

---
 pandas/io/excel/_openpyxl.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index e4715ac658a7f..a91075957e036 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -539,14 +539,15 @@ def get_sheet_data(
     ) -> List[List[Scalar]]:
         data: List[List[Scalar]] = []
 
-        if (
-            isinstance(header, int)
-            and isinstance(skiprows, int)
-            and isinstance(nrows, int)
-        ):
-            max_row = header + skiprows + nrows + 1
-        else:
-            max_row = None
+        max_row = (
+            header + skiprows + nrows + 1
+            if (
+                isinstance(header, int)
+                and isinstance(skiprows, int)
+                and isinstance(nrows, int)
+            )
+            else None
+        )
 
         for i, row in enumerate(sheet.iter_rows(max_row=max_row)):
             should_continue, _ = self.should_read_row(i, header, skiprows, nrows)

From 6fdedfd8aa20558926072b4e4eb4c199a144932c Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Sat, 25 Apr 2020 04:02:29 +0200
Subject: [PATCH 27/66] Add typed in should_read_row function

---
 pandas/io/excel/_base.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 45ec69b6b687a..885627d1dbc52 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -3,10 +3,12 @@
 from io import BytesIO
 import os
 from textwrap import fill
+from typing import List, Optional, Sequence, Tuple
 
 from pandas._config import config
 
 from pandas._libs.parsers import STR_NA_VALUES
+from pandas._typing import Union
 from pandas.errors import EmptyDataError
 from pandas.util._decorators import Appender
 
@@ -386,7 +388,13 @@ def get_sheet_by_index(self, index):
     def get_sheet_data(self, sheet, convert_float, header, skiprows, nrows):
         pass
 
-    def should_read_row(self, index, header, skiprows, nrows):
+    def should_read_row(
+        self,
+        index,
+        header: Optional[Union[int, Sequence[int]]],
+        skiprows: Optional[Union[int, Sequence[int]]],
+        nrows: Optional[int],
+    ) -> Tuple[bool, bool]:
         """
         Determines whether row should be read.
 

From ba7175cb380d30c24ec387b56eab77c6582b0b1c Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Sat, 25 Apr 2020 12:06:44 +0200
Subject: [PATCH 28/66] Add types and tests

---
 pandas/io/excel/_base.py              |  4 ++--
 pandas/tests/io/excel/test_readers.py | 14 ++++++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 885627d1dbc52..2d718eb18532d 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -415,9 +415,9 @@ def should_read_row(
         skipped and second bool element determining if reading should be stopped.
         """
         if nrows is not None:
-            if index <= header - 1 + skiprows:
+            if index < header + skiprows - 1:
                 return True, False
-            if index <= header - 1 + skiprows + nrows + 1:
+            if index <= header + skiprows + nrows:
                 return False, False
             return False, True
         return False, False
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index 99447c03e89af..116a440902443 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -1084,3 +1084,17 @@ def test_excel_high_surrogate(self, engine):
         # should not produce a segmentation violation
         actual = pd.read_excel("high_surrogate.xlsx")
         tm.assert_frame_equal(expected, actual)
+
+    def test_header_skiprows_nrows(self, engine, read_ext):
+        data = pd.read_excel("test1" + read_ext, engine=engine)
+        expected = (
+            DataFrame(data.iloc[3:6])
+            .reset_index(drop=True)
+            .rename(columns=data.iloc[2].rename(None))
+        )
+        actual = pd.read_excel(
+            "test1" + read_ext, engine=engine, header=1, skiprows=2, nrows=3
+        )
+        tm.assert_frame_equal(expected, actual)
+        actual = pd.read_excel("test1" + read_ext, engine=engine, skiprows=3, nrows=3)
+        tm.assert_frame_equal(expected, actual)

From d884803f8259c9cc058e00a33ba041c4d3dd9167 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Sat, 25 Apr 2020 12:14:16 +0200
Subject: [PATCH 29/66] Add whatsnew

---
 doc/source/whatsnew/v1.1.0.rst | 1 +
 pandas/io/excel/_xlrd.py       | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 5c39377899a20..ade05436fa76a 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -357,6 +357,7 @@ Performance improvements
   :meth:`DataFrame.sparse.from_spmatrix` constructor (:issue:`32821`,
   :issue:`32825`,  :issue:`32826`, :issue:`32856`, :issue:`32858`).
 - Performance improvement in reductions (sum, min, max) for nullable (integer and boolean) dtypes (:issue:`30982`, :issue:`33261`).
+- Performance improvement in `read_excel` for integer header, skiprows and nrows (:issue:`33281`)
 
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index 8dc065cf92034..d8c31b6eb9a9d 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -51,7 +51,7 @@ def get_sheet_by_index(self, index):
     def get_sheet_data(
         self,
         sheet,
-        convert_float,
+        convert_float: bool,
         header: Optional[Union[int, Sequence[int]]],
         skiprows: Optional[Union[int, Sequence[int]]],
         nrows: Optional[int],

From 4be3d275d9b08eefd0582152632301660232b526 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Mon, 4 May 2020 20:46:29 +0200
Subject: [PATCH 30/66] Fix import

---
 pandas/io/excel/_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 2d718eb18532d..532697ab8f17f 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -3,7 +3,7 @@
 from io import BytesIO
 import os
 from textwrap import fill
-from typing import List, Optional, Sequence, Tuple
+from typing import Optional, Sequence, Tuple
 
 from pandas._config import config
 

From 95e3e029b6854b7ffe6c15b4c1db3d07bca08980 Mon Sep 17 00:00:00 2001
From: mproszewska <38814059+mproszewska@users.noreply.github.com>
Date: Mon, 4 May 2020 20:50:15 +0200
Subject: [PATCH 31/66] Update doc/source/whatsnew/v1.1.0.rst

Co-authored-by: gfyoung <gfyoung17+GitHub@gmail.com>
---
 doc/source/whatsnew/v1.1.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 19a6b988b0d00..82600f4503151 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -427,7 +427,7 @@ Performance improvements
   :meth:`DataFrame.sparse.from_spmatrix` constructor (:issue:`32821`,
   :issue:`32825`,  :issue:`32826`, :issue:`32856`, :issue:`32858`).
 - Performance improvement in reductions (sum, prod, min, max) for nullable (integer and boolean) dtypes (:issue:`30982`, :issue:`33261`, :issue:`33442`).
-- Performance improvement in `read_excel` for integer header, skiprows and nrows (:issue:`33281`).
+- Performance improvement in `read_excel` for integer ``header``, ``skiprows``, and ``nrows`` (:issue:`33281`).
 
 
 .. ---------------------------------------------------------------------------

From 38520e26bc657d14dc2d125ddcf31bfe14158872 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Mon, 4 May 2020 20:51:11 +0200
Subject: [PATCH 32/66] Add index type

---
 pandas/io/excel/_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 532697ab8f17f..f57157aa8fb12 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -390,7 +390,7 @@ def get_sheet_data(self, sheet, convert_float, header, skiprows, nrows):
 
     def should_read_row(
         self,
-        index,
+        index: int,
         header: Optional[Union[int, Sequence[int]]],
         skiprows: Optional[Union[int, Sequence[int]]],
         nrows: Optional[int],

From c03b46a96821af7cdd2282d0dfda0f79f476d9ec Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Mon, 4 May 2020 20:55:37 +0200
Subject: [PATCH 33/66] Parametrize test

---
 pandas/tests/io/excel/test_readers.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index 116a440902443..63d6850a46f45 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -1085,7 +1085,8 @@ def test_excel_high_surrogate(self, engine):
         actual = pd.read_excel("high_surrogate.xlsx")
         tm.assert_frame_equal(expected, actual)
 
-    def test_header_skiprows_nrows(self, engine, read_ext):
+    @pytest.mark.parametrize("header, skiprows", [(1, 2), (0, 3)]) 
+    def test_header_skiprows_nrows(self, engine, read_ext, header, skiprows):
         data = pd.read_excel("test1" + read_ext, engine=engine)
         expected = (
             DataFrame(data.iloc[3:6])
@@ -1093,8 +1094,6 @@ def test_header_skiprows_nrows(self, engine, read_ext):
             .rename(columns=data.iloc[2].rename(None))
         )
         actual = pd.read_excel(
-            "test1" + read_ext, engine=engine, header=1, skiprows=2, nrows=3
+            "test1" + read_ext, engine=engine, header=header, skiprows=skiprows, nrows=3
         )
         tm.assert_frame_equal(expected, actual)
-        actual = pd.read_excel("test1" + read_ext, engine=engine, skiprows=3, nrows=3)
-        tm.assert_frame_equal(expected, actual)

From aa8cfe9a90af53f7fb7c1a366b732e37c3cb70a5 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Tue, 5 May 2020 03:05:42 +0200
Subject: [PATCH 34/66] Fix lint

---
 pandas/tests/io/excel/test_readers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index 63d6850a46f45..9b62a643429d7 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -1085,7 +1085,7 @@ def test_excel_high_surrogate(self, engine):
         actual = pd.read_excel("high_surrogate.xlsx")
         tm.assert_frame_equal(expected, actual)
 
-    @pytest.mark.parametrize("header, skiprows", [(1, 2), (0, 3)]) 
+    @pytest.mark.parametrize("header, skiprows", [(1, 2), (0, 3)])
     def test_header_skiprows_nrows(self, engine, read_ext, header, skiprows):
         data = pd.read_excel("test1" + read_ext, engine=engine)
         expected = (

From c9a622d5f51b3b74cb8840aef2bb4d0f151dfcf1 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Tue, 5 May 2020 12:02:56 +0200
Subject: [PATCH 35/66] Add decorator to test

---
 pandas/tests/io/excel/test_readers.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index 9b62a643429d7..846818705c5dc 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -1086,6 +1086,7 @@ def test_excel_high_surrogate(self, engine):
         tm.assert_frame_equal(expected, actual)
 
     @pytest.mark.parametrize("header, skiprows", [(1, 2), (0, 3)])
+    @td.check_file_leaks
     def test_header_skiprows_nrows(self, engine, read_ext, header, skiprows):
         data = pd.read_excel("test1" + read_ext, engine=engine)
         expected = (

From aa927839d65c83a34a4cc89fc20b0b3cee2973c1 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 8 May 2020 14:00:01 +0200
Subject: [PATCH 36/66] Fix types defintion

---
 pandas/io/excel/_base.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index f57157aa8fb12..9920cf439c21d 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -388,7 +388,7 @@ def get_sheet_by_index(self, index):
     def get_sheet_data(self, sheet, convert_float, header, skiprows, nrows):
         pass
 
-    def should_read_row(
+    def should_skip_row(
         self,
         index: int,
         header: Optional[Union[int, Sequence[int]]],
@@ -402,9 +402,9 @@ def should_read_row(
         ----------
         index : int
             Index of row.
-        header : int
-            Row used as column labels.
-        skiprows : int
+        header : int, list of int
+            Rows used as column labels.
+        skiprows : int, list of int
             Rows to skip at the begining.
         nrows : int
             Number of rows to parse.
@@ -414,7 +414,7 @@ def should_read_row(
         Tuple with the first bool element determining if row should be
         skipped and second bool element determining if reading should be stopped.
         """
-        if nrows is not None:
+        if nrows is not None and isinstance(header, int) and isinstance(skiprows, int):
             if index < header + skiprows - 1:
                 return True, False
             if index <= header + skiprows + nrows:

From ee16c159766545f22f302ae88cebea3aa5aef624 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 8 May 2020 14:41:36 +0200
Subject: [PATCH 37/66] Change function name

---
 pandas/io/excel/_odfreader.py | 2 +-
 pandas/io/excel/_openpyxl.py  | 2 +-
 pandas/io/excel/_xlrd.py      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
index 4094e9da223ea..2e5f05dfda3d4 100644
--- a/pandas/io/excel/_odfreader.py
+++ b/pandas/io/excel/_odfreader.py
@@ -88,7 +88,7 @@ def get_sheet_data(
 
         for i, sheet_row in enumerate(sheet_rows):
 
-            should_continue, should_break = self.should_read_row(
+            should_continue, should_break = self.should_skip_row(
                 i, header, skiprows, nrows
             )
             if should_continue:
diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index a91075957e036..5d69807f58e71 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -550,7 +550,7 @@ def get_sheet_data(
         )
 
         for i, row in enumerate(sheet.iter_rows(max_row=max_row)):
-            should_continue, _ = self.should_read_row(i, header, skiprows, nrows)
+            should_continue, _ = self.should_skip_row(i, header, skiprows, nrows)
             if should_continue:
                 data.append([])
                 continue
diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index d8c31b6eb9a9d..c620c960c9443 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -108,7 +108,7 @@ def _parse_cell(cell_contents, cell_typ):
 
         for i in range(sheet.nrows):
 
-            should_continue, should_break = self.should_read_row(
+            should_continue, should_break = self.should_skip_row(
                 i, header, skiprows, nrows
             )
             if should_continue:

From 547787ae4cc3721ed4058c2fbc41e743c3b667f6 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 8 May 2020 15:18:10 +0200
Subject: [PATCH 38/66] Leaks in test fix attempt

---
 pandas/io/excel/_odfreader.py | 2 +-
 pandas/io/excel/_xlrd.py      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
index 2e5f05dfda3d4..96fb7d50ee233 100644
--- a/pandas/io/excel/_odfreader.py
+++ b/pandas/io/excel/_odfreader.py
@@ -95,7 +95,7 @@ def get_sheet_data(
                 table.append([])
                 continue
             if should_break:
-                break
+                continue
 
             sheet_cells = [x for x in sheet_row.childNodes if x.qname in cell_names]
             empty_cells = 0
diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index c620c960c9443..57aaec4bd803e 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -115,7 +115,7 @@ def _parse_cell(cell_contents, cell_typ):
                 data.append([])
                 continue
             if should_break:
-                break
+                continue
 
             row = [
                 _parse_cell(value, typ)

From 368b77c696c72cc6b5989bae42672692b1076661 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 8 May 2020 16:42:36 +0200
Subject: [PATCH 39/66] Reverse changes

---
 pandas/io/excel/_odfreader.py | 2 +-
 pandas/io/excel/_xlrd.py      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
index 96fb7d50ee233..2e5f05dfda3d4 100644
--- a/pandas/io/excel/_odfreader.py
+++ b/pandas/io/excel/_odfreader.py
@@ -95,7 +95,7 @@ def get_sheet_data(
                 table.append([])
                 continue
             if should_break:
-                continue
+                break
 
             sheet_cells = [x for x in sheet_row.childNodes if x.qname in cell_names]
             empty_cells = 0
diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index 57aaec4bd803e..c620c960c9443 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -115,7 +115,7 @@ def _parse_cell(cell_contents, cell_typ):
                 data.append([])
                 continue
             if should_break:
-                continue
+                break
 
             row = [
                 _parse_cell(value, typ)

From 4d69922a6f91632e5045d36784d78240787b5539 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 8 May 2020 17:12:05 +0200
Subject: [PATCH 40/66] Change skiping rows in openpyxl

---
 pandas/io/excel/_openpyxl.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index 5d69807f58e71..8c23a4290e6d2 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -550,10 +550,6 @@ def get_sheet_data(
         )
 
         for i, row in enumerate(sheet.iter_rows(max_row=max_row)):
-            should_continue, _ = self.should_skip_row(i, header, skiprows, nrows)
-            if should_continue:
-                data.append([])
-                continue
             data.append([self._convert_cell(cell, convert_float) for cell in row])
 
         return data

From b4ae85f456e3018b929141dd336fefc4c668499a Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Sat, 9 May 2020 23:35:15 +0200
Subject: [PATCH 41/66] Run tests again

---
 pandas/io/excel/_openpyxl.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index 8c23a4290e6d2..d77b3db0abf39 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -550,6 +550,7 @@ def get_sheet_data(
         )
 
         for i, row in enumerate(sheet.iter_rows(max_row=max_row)):
+
             data.append([self._convert_cell(cell, convert_float) for cell in row])
 
         return data

From 81f86742e053414894df9fb6fc9a9455298e0329 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Sun, 10 May 2020 00:08:34 +0200
Subject: [PATCH 42/66] Remove all changes in openpyxl because of leaks

---
 pandas/io/excel/_openpyxl.py | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index d77b3db0abf39..4cd9e07e1be12 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -539,17 +539,7 @@ def get_sheet_data(
     ) -> List[List[Scalar]]:
         data: List[List[Scalar]] = []
 
-        max_row = (
-            header + skiprows + nrows + 1
-            if (
-                isinstance(header, int)
-                and isinstance(skiprows, int)
-                and isinstance(nrows, int)
-            )
-            else None
-        )
-
-        for i, row in enumerate(sheet.iter_rows(max_row=max_row)):
+        for row in sheet.rows:
 
             data.append([self._convert_cell(cell, convert_float) for cell in row])
 

From 7938fcfe7d3921fa68d3850fdd101951c56507b6 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Sun, 10 May 2020 01:18:37 +0200
Subject: [PATCH 43/66] Run tests

---
 pandas/io/excel/_openpyxl.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index 4cd9e07e1be12..b23ed0d494fe3 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -539,8 +539,7 @@ def get_sheet_data(
     ) -> List[List[Scalar]]:
         data: List[List[Scalar]] = []
 
-        for row in sheet.rows:
-
+        for row in (sheet.rows:
             data.append([self._convert_cell(cell, convert_float) for cell in row])
 
         return data

From 7f31a308e6cd35a7de39e5cf6bd2599810a7dd09 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Sun, 10 May 2020 01:36:46 +0200
Subject: [PATCH 44/66] Fix

---
 pandas/io/excel/_openpyxl.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index b23ed0d494fe3..4cd9e07e1be12 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -539,7 +539,8 @@ def get_sheet_data(
     ) -> List[List[Scalar]]:
         data: List[List[Scalar]] = []
 
-        for row in (sheet.rows:
+        for row in sheet.rows:
+
             data.append([self._convert_cell(cell, convert_float) for cell in row])
 
         return data

From ae8b84f70903becbdfd5e873db2f1e84efeae446 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Sun, 10 May 2020 16:22:59 +0200
Subject: [PATCH 45/66] Add types

---
 pandas/io/excel/_base.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 9920cf439c21d..a0a4175818b3b 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -3,12 +3,12 @@
 from io import BytesIO
 import os
 from textwrap import fill
-from typing import Optional, Sequence, Tuple
+from typing import List, Optional, Sequence, Tuple
 
 from pandas._config import config
 
 from pandas._libs.parsers import STR_NA_VALUES
-from pandas._typing import Union
+from pandas._typing import Scalar, Union
 from pandas.errors import EmptyDataError
 from pandas.util._decorators import Appender
 
@@ -385,7 +385,14 @@ def get_sheet_by_index(self, index):
         pass
 
     @abc.abstractmethod
-    def get_sheet_data(self, sheet, convert_float, header, skiprows, nrows):
+    def get_sheet_data(
+        self,
+        sheet,
+        convert_float: bool,
+        header: Optional[Union[int, Sequence[int]]],
+        skiprows: Optional[Union[int, Sequence[int]]],
+        nrows: Optional[int],
+    ) -> List[List[Scalar]]:
         pass
 
     def should_skip_row(

From a4f50097caa0b210315425bf31a15ee1a184c38e Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Sun, 10 May 2020 16:56:35 +0200
Subject: [PATCH 46/66] Run tests

---
 pandas/io/excel/_openpyxl.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index 4cd9e07e1be12..7e1c9f16278c2 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -540,7 +540,6 @@ def get_sheet_data(
         data: List[List[Scalar]] = []
 
         for row in sheet.rows:
-
             data.append([self._convert_cell(cell, convert_float) for cell in row])
 
         return data

From d7a28921ee3245b746946570f663575b4e19f54c Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Sun, 10 May 2020 18:43:44 +0200
Subject: [PATCH 47/66] Run tests again because of conda error

---
 pandas/io/excel/_openpyxl.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
index 7e1c9f16278c2..4cd9e07e1be12 100644
--- a/pandas/io/excel/_openpyxl.py
+++ b/pandas/io/excel/_openpyxl.py
@@ -540,6 +540,7 @@ def get_sheet_data(
         data: List[List[Scalar]] = []
 
         for row in sheet.rows:
+
             data.append([self._convert_cell(cell, convert_float) for cell in row])
 
         return data

From c94b45e1edd4494eee2a8885c25e041f6100eba6 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 15 May 2020 17:38:04 +0200
Subject: [PATCH 48/66] PERF: Remove unnecessary  copies in sorting functions

---
 pandas/core/sorting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index 25312b180dba1..da9cbe1023599 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -385,7 +385,7 @@ def ensure_key_mapped(values, key: Optional[Callable], levels=None):
     from pandas.core.indexes.api import Index
 
     if not key:
-        return values.copy()
+        return values
 
     if isinstance(values, ABCMultiIndex):
         return ensure_key_mapped_multiindex(values, key, level=levels)

From 0ab450b9ea5f38582d09acbcd8f697ac62f37919 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Sat, 16 May 2020 19:06:23 +0200
Subject: [PATCH 49/66] Run tests

---
 pandas/core/sorting.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index da9cbe1023599..2943714a5d015 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -386,7 +386,6 @@ def ensure_key_mapped(values, key: Optional[Callable], levels=None):
 
     if not key:
         return values
-
     if isinstance(values, ABCMultiIndex):
         return ensure_key_mapped_multiindex(values, key, level=levels)
 
@@ -404,7 +403,7 @@ def ensure_key_mapped(values, key: Optional[Callable], levels=None):
         else:
             type_of_values = type(values)
             result = type_of_values(result)  # try to revert to original type otherwise
-    except TypeError:
+    except TypeError:opy()
         raise TypeError(
             f"User-provided `key` function returned an invalid type {type(result)} \
             which could not be converted to {type(values)}."

From 54c7304d585c60dd148e3e47aa28514100289eb5 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Sat, 16 May 2020 19:07:12 +0200
Subject: [PATCH 50/66] Run tests

---
 pandas/core/sorting.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index 2943714a5d015..da9cbe1023599 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -386,6 +386,7 @@ def ensure_key_mapped(values, key: Optional[Callable], levels=None):
 
     if not key:
         return values
+
     if isinstance(values, ABCMultiIndex):
         return ensure_key_mapped_multiindex(values, key, level=levels)
 
@@ -403,7 +404,7 @@ def ensure_key_mapped(values, key: Optional[Callable], levels=None):
         else:
             type_of_values = type(values)
             result = type_of_values(result)  # try to revert to original type otherwise
-    except TypeError:opy()
+    except TypeError:
         raise TypeError(
             f"User-provided `key` function returned an invalid type {type(result)} \
             which could not be converted to {type(values)}."

From 192090595464ce62174100ccb5960c86a66c1fff Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 22 May 2020 20:35:37 +0200
Subject: [PATCH 51/66] Resolve conflicts

---
 doc/source/whatsnew/v1.1.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index ddd7d028be337..482af4f3e7c82 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -614,8 +614,8 @@ Performance improvements
   and :meth:`~pandas.core.groupby.groupby.Groupby.last` (:issue:`34178`)
 - Performance improvement in :func:`factorize` for nullable (integer and boolean) dtypes (:issue:`33064`).
 - Performance improvement in reductions (sum, prod, min, max) for nullable (integer and boolean) dtypes (:issue:`30982`, :issue:`33261`, :issue:`33442`).
-- Performance improvement in `read_excel` for integer ``header``, ``skiprows``, and ``nrows`` (:issue:`33281`).
 - Performance improvement in arithmetic operations between two :class:`DataFrame` objects (:issue:`32779`)
+- Performance improvement in `read_excel` for integer ``header``, ``skiprows``, and ``nrows`` (:issue:`33281`).
 
 .. ---------------------------------------------------------------------------
 

From 6d72a346770fc93778a83e171daceec52b60e6d4 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 22 May 2020 23:19:58 +0200
Subject: [PATCH 52/66] Add asv

---
 asv_bench/benchmarks/algorithms.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py
index 65e52e03c43c7..a96d9bc924308 100644
--- a/asv_bench/benchmarks/algorithms.py
+++ b/asv_bench/benchmarks/algorithms.py
@@ -174,4 +174,13 @@ def time_argsort(self, N):
         self.array.argsort()
 
 
+class SortIndexSeries:
+    def setup(self):
+        N = 10 ** 5
+        idx = pd.date_range(start="1/1/2000", periods=N, freq="s")
+        self.s = pd.Series(np.random.randn(N), index=idx)
+
+    def time_sort_index(self):
+        self.s.sort_index()
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip

From 5ba54a6039d3981a4187b38e11b479e53f8dcdd1 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 22 May 2020 23:20:53 +0200
Subject: [PATCH 53/66] Run black

---
 asv_bench/benchmarks/algorithms.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py
index a96d9bc924308..7afa97f9aa394 100644
--- a/asv_bench/benchmarks/algorithms.py
+++ b/asv_bench/benchmarks/algorithms.py
@@ -183,4 +183,5 @@ def setup(self):
     def time_sort_index(self):
         self.s.sort_index()
 
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip

From 276627019d8000792473742c0a9036cf59b5f3cb Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Sat, 23 May 2020 00:28:24 +0200
Subject: [PATCH 54/66] Remove asv

---
 asv_bench/benchmarks/algorithms.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py
index 7afa97f9aa394..65e52e03c43c7 100644
--- a/asv_bench/benchmarks/algorithms.py
+++ b/asv_bench/benchmarks/algorithms.py
@@ -174,14 +174,4 @@ def time_argsort(self, N):
         self.array.argsort()
 
 
-class SortIndexSeries:
-    def setup(self):
-        N = 10 ** 5
-        idx = pd.date_range(start="1/1/2000", periods=N, freq="s")
-        self.s = pd.Series(np.random.randn(N), index=idx)
-
-    def time_sort_index(self):
-        self.s.sort_index()
-
-
 from .pandas_vb_common import setup  # noqa: F401 isort:skip

From ac823f5298c816cc2c4fef4393f7d5cd7676ddd7 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Sun, 31 May 2020 15:20:54 +0200
Subject: [PATCH 55/66] Resolve conflict

---
 doc/source/whatsnew/v1.1.0.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 482af4f3e7c82..e307f71ccafe5 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -615,6 +615,8 @@ Performance improvements
 - Performance improvement in :func:`factorize` for nullable (integer and boolean) dtypes (:issue:`33064`).
 - Performance improvement in reductions (sum, prod, min, max) for nullable (integer and boolean) dtypes (:issue:`30982`, :issue:`33261`, :issue:`33442`).
 - Performance improvement in arithmetic operations between two :class:`DataFrame` objects (:issue:`32779`)
+- Performance improvement in :class:`pandas.core.groupby.RollingGroupby` (:issue:`34052`)
+- Performance improvement in arithmetic operations (sub, add, mul, div) for MultiIndex (:issue:`34297`)
 - Performance improvement in `read_excel` for integer ``header``, ``skiprows``, and ``nrows`` (:issue:`33281`).
 
 .. ---------------------------------------------------------------------------

From 6f188fe9e9d89b60e4670f9423dedd5006320bf9 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Mon, 1 Jun 2020 01:40:46 +0200
Subject: [PATCH 56/66] Revert change

---
 pandas/core/sorting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index da9cbe1023599..25312b180dba1 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -385,7 +385,7 @@ def ensure_key_mapped(values, key: Optional[Callable], levels=None):
     from pandas.core.indexes.api import Index
 
     if not key:
-        return values
+        return values.copy()
 
     if isinstance(values, ABCMultiIndex):
         return ensure_key_mapped_multiindex(values, key, level=levels)

From ba314fe73fee507dd68e2d2ced9ffba79b653197 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Mon, 1 Jun 2020 02:41:12 +0200
Subject: [PATCH 57/66] Change should_skip_row function

---
 pandas/io/excel/_base.py      | 10 +++-------
 pandas/io/excel/_odfreader.py | 10 ++++------
 pandas/io/excel/_xlrd.py      | 13 ++++++-------
 3 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index a0a4175818b3b..a6a1afdc6e90a 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -418,16 +418,12 @@ def should_skip_row(
 
         Returns
         -------
-        Tuple with the first bool element determining if row should be
-        skipped and second bool element determining if reading should be stopped.
+        Bool determining if row should be skipped.
         """
         if nrows is not None and isinstance(header, int) and isinstance(skiprows, int):
             if index < header + skiprows - 1:
-                return True, False
-            if index <= header + skiprows + nrows:
-                return False, False
-            return False, True
-        return False, False
+                return True
+        return False
 
     def parse(
         self,
diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
index 2e5f05dfda3d4..e30562ca23c3c 100644
--- a/pandas/io/excel/_odfreader.py
+++ b/pandas/io/excel/_odfreader.py
@@ -86,16 +86,14 @@ def get_sheet_data(
 
         table: List[List[Scalar]] = []
 
+        if nrows is not None and isinstance(header, int) and isinstance(skiprows, int):
+            sheet_rows = sheet_rows[0 : header + skiprows + nrows + 1]
+
         for i, sheet_row in enumerate(sheet_rows):
 
-            should_continue, should_break = self.should_skip_row(
-                i, header, skiprows, nrows
-            )
-            if should_continue:
+            if self.should_skip_row(i, header, skiprows, nrows):
                 table.append([])
                 continue
-            if should_break:
-                break
 
             sheet_cells = [x for x in sheet_row.childNodes if x.qname in cell_names]
             empty_cells = 0
diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py
index c620c960c9443..61040c4158240 100644
--- a/pandas/io/excel/_xlrd.py
+++ b/pandas/io/excel/_xlrd.py
@@ -106,16 +106,15 @@ def _parse_cell(cell_contents, cell_typ):
 
         data: List[List[Scalar]] = []
 
-        for i in range(sheet.nrows):
+        sheet_nrows = sheet.nrows
+        if nrows is not None and isinstance(header, int) and isinstance(skiprows, int):
+            sheet_nrows = min(header + skiprows + nrows + 1, sheet_nrows)
 
-            should_continue, should_break = self.should_skip_row(
-                i, header, skiprows, nrows
-            )
-            if should_continue:
+        for i in range(sheet_nrows):
+
+            if self.should_skip_row(i, header, skiprows, nrows):
                 data.append([])
                 continue
-            if should_break:
-                break
 
             row = [
                 _parse_cell(value, typ)

From f923bfd33b0f9ed7b129a3d3ee99e499dbdfe76a Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Mon, 1 Jun 2020 03:12:36 +0200
Subject: [PATCH 58/66] Fix return type

---
 pandas/io/excel/_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index a6a1afdc6e90a..71f5bb1e0b0f3 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -401,7 +401,7 @@ def should_skip_row(
         header: Optional[Union[int, Sequence[int]]],
         skiprows: Optional[Union[int, Sequence[int]]],
         nrows: Optional[int],
-    ) -> Tuple[bool, bool]:
+    ) -> bool:
         """
         Determines whether row should be read.
 

From 008add5969c40f02a3d7c018990b51bb1cba04ac Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Mon, 1 Jun 2020 03:43:33 +0200
Subject: [PATCH 59/66] Remove import

---
 pandas/io/excel/_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 71f5bb1e0b0f3..987d3d83152a3 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -3,7 +3,7 @@
 from io import BytesIO
 import os
 from textwrap import fill
-from typing import List, Optional, Sequence, Tuple
+from typing import List, Optional, Sequence
 
 from pandas._config import config
 

From 2226050d05a3e8666a210a49fcae858b37a7d745 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Wed, 3 Jun 2020 01:30:56 +0200
Subject: [PATCH 60/66] Run tests

---
 pandas/tests/io/excel/test_readers.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index 261d53b66aad5..f4e6273904983 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -1151,6 +1151,7 @@ def test_excel_high_surrogate(self, engine):
     @pytest.mark.parametrize("header, skiprows", [(1, 2), (0, 3)])
     @td.check_file_leaks
     def test_header_skiprows_nrows(self, engine, read_ext, header, skiprows):
+        # GH 32727
         data = pd.read_excel("test1" + read_ext, engine=engine)
         expected = (
             DataFrame(data.iloc[3:6])

From 9216210dc82953909976391041f8a3c4b682da70 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 5 Jun 2020 02:54:50 +0200
Subject: [PATCH 61/66] Add asv

---
 asv_bench/benchmarks/io/excel.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/asv_bench/benchmarks/io/excel.py b/asv_bench/benchmarks/io/excel.py
index 80af2cff41769..84c2534a9cbf4 100644
--- a/asv_bench/benchmarks/io/excel.py
+++ b/asv_bench/benchmarks/io/excel.py
@@ -69,5 +69,9 @@ def time_read_excel(self, engine):
         fname = self.fname_odf if engine == "odf" else self.fname_excel
         read_excel(fname, engine=engine)
 
+    def nrows_read_excel(self, engine):
+        name = self.fname_odf if engine == "odf" else self.fname_excel
+        read_excel(fname, engine=engine, nrows=1)
+
 
 from ..pandas_vb_common import setup  # noqa: F401 isort:skip

From d9aa31967e1286dc844773abb2be094a0b43a4ac Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 5 Jun 2020 03:51:05 +0200
Subject: [PATCH 62/66] Add asv

---
 asv_bench/benchmarks/io/excel.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/asv_bench/benchmarks/io/excel.py b/asv_bench/benchmarks/io/excel.py
index 80af2cff41769..926286ee5fab2 100644
--- a/asv_bench/benchmarks/io/excel.py
+++ b/asv_bench/benchmarks/io/excel.py
@@ -11,7 +11,7 @@
 
 
 def _generate_dataframe():
-    N = 2000
+    N = 20000
     C = 5
     df = DataFrame(
         np.random.randn(N, C),
@@ -69,5 +69,9 @@ def time_read_excel(self, engine):
         fname = self.fname_odf if engine == "odf" else self.fname_excel
         read_excel(fname, engine=engine)
 
+    def nrows_read_excel(self, engine):
+        name = self.fname_odf if engine == "odf" else self.fname_excel
+        read_excel(fname, engine=engine, nrows=1)
+
 
 from ..pandas_vb_common import setup  # noqa: F401 isort:skip

From 0afb1b14c359eece44f3885d5f20b40e07a9ccb6 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Fri, 5 Jun 2020 12:31:33 +0200
Subject: [PATCH 63/66] Fix

---
 asv_bench/benchmarks/io/excel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/asv_bench/benchmarks/io/excel.py b/asv_bench/benchmarks/io/excel.py
index 926286ee5fab2..e9776ff2c641e 100644
--- a/asv_bench/benchmarks/io/excel.py
+++ b/asv_bench/benchmarks/io/excel.py
@@ -70,7 +70,7 @@ def time_read_excel(self, engine):
         read_excel(fname, engine=engine)
 
     def nrows_read_excel(self, engine):
-        name = self.fname_odf if engine == "odf" else self.fname_excel
+        fname = self.fname_odf if engine == "odf" else self.fname_excel
         read_excel(fname, engine=engine, nrows=1)
 
 

From 06003a8b090577d55680cb5271b07e0bc36d2bb5 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Mon, 8 Jun 2020 17:22:26 +0200
Subject: [PATCH 64/66] Fix asv

---
 asv_bench/benchmarks/io/excel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/asv_bench/benchmarks/io/excel.py b/asv_bench/benchmarks/io/excel.py
index e9776ff2c641e..1eaccb9f2d897 100644
--- a/asv_bench/benchmarks/io/excel.py
+++ b/asv_bench/benchmarks/io/excel.py
@@ -69,7 +69,7 @@ def time_read_excel(self, engine):
         fname = self.fname_odf if engine == "odf" else self.fname_excel
         read_excel(fname, engine=engine)
 
-    def nrows_read_excel(self, engine):
+    def time_read_excel_nrows(self, engine):
         fname = self.fname_odf if engine == "odf" else self.fname_excel
         read_excel(fname, engine=engine, nrows=1)
 

From c08709b4aa213fe9ab15fc4b7eae14c53a0507e3 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Mon, 8 Jun 2020 17:27:42 +0200
Subject: [PATCH 65/66] Fix asv

---
 asv_bench/benchmarks/io/excel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/asv_bench/benchmarks/io/excel.py b/asv_bench/benchmarks/io/excel.py
index 1eaccb9f2d897..d4bde565ecdb2 100644
--- a/asv_bench/benchmarks/io/excel.py
+++ b/asv_bench/benchmarks/io/excel.py
@@ -11,7 +11,7 @@
 
 
 def _generate_dataframe():
-    N = 20000
+    N = 2000
     C = 5
     df = DataFrame(
         np.random.randn(N, C),

From c9a2c75ceb49ee85a62e926a1f7ee21bf15ea3a5 Mon Sep 17 00:00:00 2001
From: mproszewska <magdalena.proszewska@gmail.com>
Date: Mon, 8 Jun 2020 19:07:38 +0200
Subject: [PATCH 66/66] Fix asv

---
 asv_bench/benchmarks/io/excel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/asv_bench/benchmarks/io/excel.py b/asv_bench/benchmarks/io/excel.py
index d4bde565ecdb2..1eaccb9f2d897 100644
--- a/asv_bench/benchmarks/io/excel.py
+++ b/asv_bench/benchmarks/io/excel.py
@@ -11,7 +11,7 @@
 
 
 def _generate_dataframe():
-    N = 2000
+    N = 20000
     C = 5
     df = DataFrame(
         np.random.randn(N, C),