diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 7b9efd7f593dd..b3f17c62f3dde 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -295,6 +295,7 @@ Deprecations - Deprecated allowing ``downcast`` keyword other than ``None``, ``False``, "infer", or a dict with these as values in :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`40988`) - Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`) - Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`) +- Deprecated bytes input to :func:`read_excel`. To read a file path, use a string or path-like object. (:issue:`53767`) - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`) - Deprecated falling back to filling when ``value`` is not specified in :meth:`DataFrame.replace` and :meth:`Series.replace` with non-dict-like ``to_replace`` (:issue:`33302`) - Deprecated literal json input to :func:`read_json`. Wrap literal json string input in ``io.StringIO`` instead. (:issue:`53409`) @@ -305,6 +306,7 @@ Deprecations - Deprecated the "method" and "limit" keywords on :meth:`Series.fillna`, :meth:`DataFrame.fillna`, :meth:`SeriesGroupBy.fillna`, :meth:`DataFrameGroupBy.fillna`, and :meth:`Resampler.fillna`, use ``obj.bfill()`` or ``obj.ffill()`` instead (:issue:`53394`) - Deprecated the ``method`` and ``limit`` keywords in :meth:`DataFrame.replace` and :meth:`Series.replace` (:issue:`33302`) - Deprecated values "pad", "ffill", "bfill", "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate`, use ``obj.ffill()`` or ``obj.bfill()`` instead (:issue:`53581`) +- .. --------------------------------------------------------------------------- .. _whatsnew_210.performance: diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index f4782dcfcc08d..fbc3893e9b53a 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -21,6 +21,7 @@ cast, overload, ) +import warnings import zipfile from pandas._config import config @@ -36,6 +37,7 @@ Appender, doc, ) +from pandas.util._exceptions import find_stack_level from pandas.util._validators import check_dtype_backend from pandas.core.dtypes.common import ( @@ -97,6 +99,10 @@ By file-like object, we refer to objects with a ``read()`` method, such as a file handle (e.g. via builtin ``open`` function) or ``StringIO``. + + .. deprecated:: 2.1.0 + Passing byte strings is deprecated. To read from a + byte string, wrap it in a ``BytesIO`` object. sheet_name : str, int, list, or None, default 0 Strings are used for sheet names. Integers are used in zero-indexed sheet positions (chart sheets do not count as a sheet position). @@ -1504,6 +1510,13 @@ def __init__( # First argument can also be bytes, so create a buffer if isinstance(path_or_buffer, bytes): path_or_buffer = BytesIO(path_or_buffer) + warnings.warn( + "Passing bytes to 'read_excel' is deprecated and " + "will be removed in a future version. To read from a " + "byte string, wrap it in a `BytesIO` object.", + FutureWarning, + stacklevel=find_stack_level(), + ) # Could be a str, ExcelFile, Book, etc. self.io = path_or_buffer diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 88f55145b599a..1e17e866ec530 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -3,6 +3,7 @@ time, ) from functools import partial +from io import BytesIO import os from pathlib import Path import platform @@ -873,7 +874,7 @@ def test_corrupt_bytes_raises(self, engine): error = BadZipFile msg = "File is not a zip file" with pytest.raises(error, match=msg): - pd.read_excel(bad_stream) + pd.read_excel(BytesIO(bad_stream)) @pytest.mark.network @tm.network( @@ -1446,6 +1447,18 @@ def test_euro_decimal_format(self, read_ext): class TestExcelFileRead: + def test_deprecate_bytes_input(self, engine, read_ext): + # GH 53830 + msg = ( + "Passing bytes to 'read_excel' is deprecated and " + "will be removed in a future version. To read from a " + "byte string, wrap it in a `BytesIO` object." + ) + + with tm.assert_produces_warning(FutureWarning, match=msg): + with open("test1" + read_ext, "rb") as f: + pd.read_excel(f.read(), engine=engine) + @pytest.fixture(autouse=True) def cd_and_set_engine(self, engine, datapath, monkeypatch): """ @@ -1629,7 +1642,7 @@ def test_excel_read_binary(self, engine, read_ext): with open("test1" + read_ext, "rb") as f: data = f.read() - actual = pd.read_excel(data, engine=engine) + actual = pd.read_excel(BytesIO(data), engine=engine) tm.assert_frame_equal(expected, actual) def test_excel_read_binary_via_read_excel(self, read_ext, engine):