diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 32a2514b3b6a3..55300b6f24689 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -414,6 +414,7 @@ I/O - :meth:`read_sql` returned an empty generator if ``chunksize`` was no-zero and the query returned no results. Now returns a generator with a single empty dataframe (:issue:`34411`) - Bug in :func:`read_hdf` returning unexpected records when filtering on categorical string columns using ``where`` parameter (:issue:`39189`) - Bug in :func:`read_sas` raising ``ValueError`` when ``datetimes`` were null (:issue:`39725`) +- Bug in :func:`read_excel` not accepting ``skip_blank_lines`` argument (:issue:`39808`) Period ^^^^^^ diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 9ad589d4583c6..ed83522e5ae1f 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -204,6 +204,9 @@ Detect missing value markers (empty strings and the value of na_values). In data without any NAs, passing na_filter=False can improve the performance of reading a large file. +skip_blank_lines : bool, default True + If True, skip over blank lines in single-column spreadsheets rather than + interpreting as NaN values. verbose : bool, default False Indicate number of NA values placed in non-numeric columns. parse_dates : bool, list-like, or dict, default False @@ -352,6 +355,7 @@ def read_excel( na_values=None, keep_default_na=True, na_filter=True, + skip_blank_lines=True, verbose=False, parse_dates=False, date_parser=None, @@ -390,6 +394,7 @@ def read_excel( na_values=na_values, keep_default_na=keep_default_na, na_filter=na_filter, + skip_blank_lines=skip_blank_lines, verbose=verbose, parse_dates=parse_dates, date_parser=date_parser, @@ -486,6 +491,7 @@ def parse( skiprows=None, nrows=None, na_values=None, + skip_blank_lines=True, verbose=False, parse_dates=False, date_parser=None, @@ -598,6 +604,7 @@ def parse( skiprows=skiprows, nrows=nrows, na_values=na_values, + skip_blank_lines=skip_blank_lines, parse_dates=parse_dates, date_parser=date_parser, thousands=thousands, @@ -1166,6 +1173,7 @@ def parse( skiprows=None, nrows=None, na_values=None, + skip_blank_lines=True, parse_dates=False, date_parser=None, thousands=None, @@ -1199,6 +1207,7 @@ def parse( skiprows=skiprows, nrows=nrows, na_values=na_values, + skip_blank_lines=skip_blank_lines, parse_dates=parse_dates, date_parser=date_parser, thousands=thousands, diff --git a/pandas/tests/io/data/excel/one_col_blank_line.ods b/pandas/tests/io/data/excel/one_col_blank_line.ods new file mode 100644 index 0000000000000..df5fbcfaa0357 Binary files /dev/null and b/pandas/tests/io/data/excel/one_col_blank_line.ods differ diff --git a/pandas/tests/io/data/excel/one_col_blank_line.xls b/pandas/tests/io/data/excel/one_col_blank_line.xls new file mode 100644 index 0000000000000..dcf2ebecded61 Binary files /dev/null and b/pandas/tests/io/data/excel/one_col_blank_line.xls differ diff --git a/pandas/tests/io/data/excel/one_col_blank_line.xlsb b/pandas/tests/io/data/excel/one_col_blank_line.xlsb new file mode 100644 index 0000000000000..9257d016c762a Binary files /dev/null and b/pandas/tests/io/data/excel/one_col_blank_line.xlsb differ diff --git a/pandas/tests/io/data/excel/one_col_blank_line.xlsm b/pandas/tests/io/data/excel/one_col_blank_line.xlsm new file mode 100644 index 0000000000000..c249901ecc10e Binary files /dev/null and b/pandas/tests/io/data/excel/one_col_blank_line.xlsm differ diff --git a/pandas/tests/io/data/excel/one_col_blank_line.xlsx b/pandas/tests/io/data/excel/one_col_blank_line.xlsx new file mode 100644 index 0000000000000..2538e406d2e77 Binary files /dev/null and b/pandas/tests/io/data/excel/one_col_blank_line.xlsx differ diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 1c71666e88651..f12089702a268 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1185,6 +1185,20 @@ def test_no_header_with_list_index_col(self, read_ext): ) tm.assert_frame_equal(expected, result) + def test_one_col_skip_blank_line(self, read_ext): + file_name = "one_col_blank_line" + read_ext + data = [0.5, 1, 2] + expected = DataFrame(data, columns=["numbers"]) + result = pd.read_excel(file_name) + tm.assert_frame_equal(expected, result) + + def test_one_col_noskip_blank_line(self, read_ext): + file_name = "one_col_blank_line" + read_ext + data = [0.5, np.nan, 1, 2] + expected = DataFrame(data, columns=["numbers"]) + result = pd.read_excel(file_name, skip_blank_lines=False) + tm.assert_frame_equal(expected, result) + class TestExcelFileRead: @pytest.fixture(autouse=True)