diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index fa959f651d135..61046a9f2626b 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -1088,6 +1088,7 @@ Bug Fixes - Bug in ``Series.plot`` failing when index has a ``CustomBusinessDay`` frequency (:issue:`7222`). - Bug in ``read_excel`` failing to read data with one column when ``squeeze=True`` (:issue:`12157`) +- Bug in ``read_excel`` failing to read one empty column (:issue:`12292`) - Bug in ``.groupby`` where a ``KeyError`` was not raised for a wrong column if there was only one row in the dataframe (:issue:`11741`) - Bug in ``.read_csv`` with dtype specified on empty data producing an error (:issue:`12048`) - Bug in ``.read_csv`` where strings like ``'2E'`` are treated as valid floats (:issue:`12237`) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 06e99aa8ebce9..5656c360b3990 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -448,21 +448,26 @@ def _parse_cell(cell_contents, cell_typ): if com.is_list_like(header) and len(header) > 1: has_index_names = True - parser = TextParser(data, header=header, index_col=index_col, - has_index_names=has_index_names, - na_values=na_values, - thousands=thousands, - parse_dates=parse_dates, - date_parser=date_parser, - skiprows=skiprows, - skip_footer=skip_footer, - squeeze=squeeze, - **kwds) - - output[asheetname] = parser.read() - if not squeeze or isinstance(output[asheetname], DataFrame): - output[asheetname].columns = output[ - asheetname].columns.set_names(header_names) + # GH 12292 : error when read one empty column from excel file + try: + parser = TextParser(data, header=header, index_col=index_col, + has_index_names=has_index_names, + na_values=na_values, + thousands=thousands, + parse_dates=parse_dates, + date_parser=date_parser, + skiprows=skiprows, + skip_footer=skip_footer, + squeeze=squeeze, + **kwds) + + output[asheetname] = parser.read() + if not squeeze or isinstance(output[asheetname], DataFrame): + output[asheetname].columns = output[ + asheetname].columns.set_names(header_names) + except StopIteration: + # No Data, return an empty DataFrame + output[asheetname] = DataFrame() if ret_dict: return output diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 389d41327d75c..b8d61047a7b6d 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -401,6 +401,60 @@ def test_read_excel_blank_with_header(self): actual = self.get_exceldf('blank_with_header', 'Sheet1') tm.assert_frame_equal(actual, expected) + # GH 12292 : error when read one empty column from excel file + def test_read_one_empty_col_no_header(self): + df = pd.DataFrame( + [["", 1, 100], + ["", 2, 200], + ["", 3, 300], + ["", 4, 400]] + ) + with ensure_clean(self.ext) as path: + df.to_excel(path, 'no_header', index=False, header=False) + actual_header_none = read_excel( + path, + 'no_header', + parse_cols=[0], + header=None + ) + + actual_header_zero = read_excel( + path, + 'no_header', + parse_cols=[0], + header=0 + ) + expected = DataFrame() + tm.assert_frame_equal(actual_header_none, expected) + tm.assert_frame_equal(actual_header_zero, expected) + + def test_read_one_empty_col_with_header(self): + df = pd.DataFrame( + [["", 1, 100], + ["", 2, 200], + ["", 3, 300], + ["", 4, 400]] + ) + with ensure_clean(self.ext) as path: + df.to_excel(path, 'with_header', index=False, header=True) + actual_header_none = read_excel( + path, + 'with_header', + parse_cols=[0], + header=None + ) + + actual_header_zero = read_excel( + path, + 'with_header', + parse_cols=[0], + header=0 + ) + expected_header_none = DataFrame(pd.Series([0], dtype='int64')) + tm.assert_frame_equal(actual_header_none, expected_header_none) + expected_header_zero = DataFrame(columns=[0], dtype='int64') + tm.assert_frame_equal(actual_header_zero, expected_header_zero) + class XlrdTests(ReadingTestsBase): """