From 012fe58d3b72a73773ce9a9a18304edbd84a1c8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Wed, 30 Dec 2020 10:59:49 -0500 Subject: [PATCH] REGR: read_excel does not work for most file handles --- doc/source/whatsnew/v1.2.1.rst | 2 +- pandas/io/excel/_base.py | 9 ++------- pandas/tests/io/excel/test_readers.py | 16 ++++++++++++++++ 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst index 4c444ea1020dd..0fddbc696e4af 100644 --- a/doc/source/whatsnew/v1.2.1.rst +++ b/doc/source/whatsnew/v1.2.1.rst @@ -20,7 +20,7 @@ Fixed regressions - Bug in repr of float-like strings of an ``object`` dtype having trailing 0's truncated after the decimal (:issue:`38708`) - Fixed regression in :meth:`DataFrame.groupby()` with :class:`Categorical` grouping column not showing unused categories for ``grouped.indices`` (:issue:`38642`) - Fixed regression in :meth:`DataFrame.any` and :meth:`DataFrame.all` not returning a result for tz-aware ``datetime64`` columns (:issue:`38723`) -- +- :func:`read_excel` does not work for non-rawbyte file handles (issue:`38788`) .. --------------------------------------------------------------------------- diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 221e8b9ccfb14..5be8dbf152309 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1051,16 +1051,11 @@ def __init__( xlrd_version = LooseVersion(xlrd.__version__) - if isinstance(path_or_buffer, (BufferedIOBase, RawIOBase, bytes)): - ext = inspect_excel_format( - content=path_or_buffer, storage_options=storage_options - ) - elif xlrd_version is not None and isinstance(path_or_buffer, xlrd.Book): + if xlrd_version is not None and isinstance(path_or_buffer, xlrd.Book): ext = "xls" else: - # path_or_buffer is path-like, use stringified path ext = inspect_excel_format( - path=str(self._io), storage_options=storage_options + content=path_or_buffer, storage_options=storage_options ) if engine is None: diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index df1250cee8b00..8b1a96f694e71 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -657,6 +657,22 @@ def test_read_from_s3_url(self, read_ext, s3_resource, s3so): local_table = pd.read_excel("test1" + read_ext) tm.assert_frame_equal(url_table, local_table) + def test_read_from_s3_object(self, read_ext, s3_resource, s3so): + # GH 38788 + # Bucket "pandas-test" created in tests/io/conftest.py + with open("test1" + read_ext, "rb") as f: + s3_resource.Bucket("pandas-test").put_object(Key="test1" + read_ext, Body=f) + + import s3fs + + s3 = s3fs.S3FileSystem(**s3so) + + with s3.open("s3://pandas-test/test1" + read_ext) as f: + url_table = pd.read_excel(f) + + local_table = pd.read_excel("test1" + read_ext) + tm.assert_frame_equal(url_table, local_table) + @pytest.mark.slow def test_read_from_file_url(self, read_ext, datapath):