Skip to content

Commit 1b895ef

Browse files
authored
DOC: read_excel skiprows documentation matches read_csv (#36435) (#36437)
* DOC: updated read_excel skiprows documentation to match read_csv (GH36435) * TST: updated read_excel test with skiprows as int, callable (GH36435)
1 parent 87974c0 commit 1b895ef

File tree

2 files changed

+32
-3
lines changed

2 files changed

+32
-3
lines changed

pandas/io/excel/_base.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,12 @@
120120
Values to consider as True.
121121
false_values : list, default None
122122
Values to consider as False.
123-
skiprows : list-like
124-
Rows to skip at the beginning (0-indexed).
123+
skiprows : list-like, int, or callable, optional
124+
Line numbers to skip (0-indexed) or number of lines to skip (int) at the
125+
start of the file. If callable, the callable function will be evaluated
126+
against the row indices, returning True if the row should be skipped and
127+
False otherwise. An example of a valid callable argument would be ``lambda
128+
x: x in [0, 2]``.
125129
nrows : int, default None
126130
Number of rows to parse.
127131
na_values : scalar, str, list-like, or dict, default None

pandas/tests/io/excel/test_readers.py

+26-1
Original file line numberDiff line numberDiff line change
@@ -894,7 +894,7 @@ def test_read_excel_bool_header_arg(self, read_ext):
894894
with pytest.raises(TypeError, match=msg):
895895
pd.read_excel("test1" + read_ext, header=arg)
896896

897-
def test_read_excel_skiprows_list(self, read_ext):
897+
def test_read_excel_skiprows(self, read_ext):
898898
# GH 4903
899899
if pd.read_excel.keywords["engine"] == "pyxlsb":
900900
pytest.xfail("Sheets containing datetimes not supported by pyxlsb")
@@ -920,6 +920,31 @@ def test_read_excel_skiprows_list(self, read_ext):
920920
)
921921
tm.assert_frame_equal(actual, expected)
922922

923+
# GH36435
924+
actual = pd.read_excel(
925+
"testskiprows" + read_ext,
926+
sheet_name="skiprows_list",
927+
skiprows=lambda x: x in [0, 2],
928+
)
929+
tm.assert_frame_equal(actual, expected)
930+
931+
actual = pd.read_excel(
932+
"testskiprows" + read_ext,
933+
sheet_name="skiprows_list",
934+
skiprows=3,
935+
names=["a", "b", "c", "d"],
936+
)
937+
expected = DataFrame(
938+
[
939+
# [1, 2.5, pd.Timestamp("2015-01-01"), True],
940+
[2, 3.5, pd.Timestamp("2015-01-02"), False],
941+
[3, 4.5, pd.Timestamp("2015-01-03"), False],
942+
[4, 5.5, pd.Timestamp("2015-01-04"), True],
943+
],
944+
columns=["a", "b", "c", "d"],
945+
)
946+
tm.assert_frame_equal(actual, expected)
947+
923948
def test_read_excel_nrows(self, read_ext):
924949
# GH 16645
925950
num_rows_to_pull = 5

0 commit comments

Comments
 (0)