From a171b99a204aea7a7c925ba3322b168c23d10842 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Sat, 24 Jun 2023 11:11:12 -0400 Subject: [PATCH 1/6] Adding deprecation logic and updating documentation. --- doc/source/whatsnew/v2.1.0.rst | 2 ++ pandas/io/excel/_base.py | 13 ++++++++++++- pandas/tests/io/excel/test_readers.py | 17 +++++++++++++++-- 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 7b9efd7f593dd..b3f17c62f3dde 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -295,6 +295,7 @@ Deprecations - Deprecated allowing ``downcast`` keyword other than ``None``, ``False``, "infer", or a dict with these as values in :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`40988`) - Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`) - Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`) +- Deprecated bytes input to :func:`read_excel`. To read a file path, use a string or path-like object. (:issue:`53767`) - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`) - Deprecated falling back to filling when ``value`` is not specified in :meth:`DataFrame.replace` and :meth:`Series.replace` with non-dict-like ``to_replace`` (:issue:`33302`) - Deprecated literal json input to :func:`read_json`. Wrap literal json string input in ``io.StringIO`` instead. (:issue:`53409`) @@ -305,6 +306,7 @@ Deprecations - Deprecated the "method" and "limit" keywords on :meth:`Series.fillna`, :meth:`DataFrame.fillna`, :meth:`SeriesGroupBy.fillna`, :meth:`DataFrameGroupBy.fillna`, and :meth:`Resampler.fillna`, use ``obj.bfill()`` or ``obj.ffill()`` instead (:issue:`53394`) - Deprecated the ``method`` and ``limit`` keywords in :meth:`DataFrame.replace` and :meth:`Series.replace` (:issue:`33302`) - Deprecated values "pad", "ffill", "bfill", "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate`, use ``obj.ffill()`` or ``obj.bfill()`` instead (:issue:`53581`) +- .. --------------------------------------------------------------------------- .. _whatsnew_210.performance: diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index f4782dcfcc08d..7172053d3f909 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -21,6 +21,7 @@ cast, overload, ) +import warnings import zipfile from pandas._config import config @@ -36,6 +37,7 @@ Appender, doc, ) +from pandas.util._exceptions import find_stack_level from pandas.util._validators import check_dtype_backend from pandas.core.dtypes.common import ( @@ -97,6 +99,9 @@ By file-like object, we refer to objects with a ``read()`` method, such as a file handle (e.g. via builtin ``open`` function) or ``StringIO``. + + .. deprecated:: 2.1.0 + Passing byte strings is deprecated. sheet_name : str, int, list, or None, default 0 Strings are used for sheet names. Integers are used in zero-indexed sheet positions (chart sheets do not count as a sheet position). @@ -1503,7 +1508,13 @@ def __init__( # First argument can also be bytes, so create a buffer if isinstance(path_or_buffer, bytes): - path_or_buffer = BytesIO(path_or_buffer) + warnings.warn( + "Passing bytes to 'read_excel' is deprecated and " + "will be removed in a future version. To read from a " + "byte string, wrap it in a `BytesIO` object.", + FutureWarning, + stacklevel=find_stack_level(), + ) # Could be a str, ExcelFile, Book, etc. self.io = path_or_buffer diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 88f55145b599a..e493374d8d153 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -3,6 +3,7 @@ time, ) from functools import partial +from io import BytesIO import os from pathlib import Path import platform @@ -873,7 +874,7 @@ def test_corrupt_bytes_raises(self, engine): error = BadZipFile msg = "File is not a zip file" with pytest.raises(error, match=msg): - pd.read_excel(bad_stream) + pd.read_excel(BytesIO(bad_stream)) @pytest.mark.network @tm.network( @@ -1446,6 +1447,18 @@ def test_euro_decimal_format(self, read_ext): class TestExcelFileRead: + def test_deprecate_bytes_input(self, engine, read_ext): + # GH + msg = ( + "Passing bytes to 'read_excel' is deprecated and " + "will be removed in a future version. To read from a " + "byte string, wrap it in a `BytesIO` object." + ) + + with tm.assert_produces_warning(FutureWarning, match=msg): + with open("test1" + read_ext, "rb") as f: + pd.read_excel(f.read(), engine=engine) + @pytest.fixture(autouse=True) def cd_and_set_engine(self, engine, datapath, monkeypatch): """ @@ -1629,7 +1642,7 @@ def test_excel_read_binary(self, engine, read_ext): with open("test1" + read_ext, "rb") as f: data = f.read() - actual = pd.read_excel(data, engine=engine) + actual = pd.read_excel(BytesIO(data), engine=engine) tm.assert_frame_equal(expected, actual) def test_excel_read_binary_via_read_excel(self, read_ext, engine): From 17353ea6cf2e56025dda8465db3f0576507bb26b Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Sat, 24 Jun 2023 11:15:54 -0400 Subject: [PATCH 2/6] Adding PR number to new unit test --- pandas/tests/io/excel/test_readers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index e493374d8d153..1e17e866ec530 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1448,7 +1448,7 @@ def test_euro_decimal_format(self, read_ext): class TestExcelFileRead: def test_deprecate_bytes_input(self, engine, read_ext): - # GH + # GH 53830 msg = ( "Passing bytes to 'read_excel' is deprecated and " "will be removed in a future version. To read from a " From 5c5bdec3336be0ab6d9055d88c62225c473283a0 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Sun, 25 Jun 2023 16:26:05 -0400 Subject: [PATCH 3/6] Updating read_excel() documentation per reviewer recommendations. --- pandas/io/excel/_base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 7172053d3f909..0d7edd83b340d 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -101,7 +101,8 @@ or ``StringIO``. .. deprecated:: 2.1.0 - Passing byte strings is deprecated. + Passing byte strings is deprecated. To read from a " + "byte string, wrap it in a `BytesIO` object. sheet_name : str, int, list, or None, default 0 Strings are used for sheet names. Integers are used in zero-indexed sheet positions (chart sheets do not count as a sheet position). From 17ff094d851a0b8407557bab80ebc83c90d866a4 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Sun, 25 Jun 2023 17:49:53 -0400 Subject: [PATCH 4/6] Updating read_excel() documentation per reviewer recommendations. --- pandas/io/excel/_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 0d7edd83b340d..2ff8393f4ba23 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -101,8 +101,8 @@ or ``StringIO``. .. deprecated:: 2.1.0 - Passing byte strings is deprecated. To read from a " - "byte string, wrap it in a `BytesIO` object. + Passing byte strings is deprecated. To read from a + byte string, wrap it in a `BytesIO` object. sheet_name : str, int, list, or None, default 0 Strings are used for sheet names. Integers are used in zero-indexed sheet positions (chart sheets do not count as a sheet position). From a9ef371fea3a41e9a23ea903c305c9bd57e23535 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Mon, 26 Jun 2023 18:17:01 -0400 Subject: [PATCH 5/6] Updating read_excel() documentation per reviewer recommendations. --- pandas/io/excel/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 2ff8393f4ba23..493598534b5cf 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -102,7 +102,7 @@ .. deprecated:: 2.1.0 Passing byte strings is deprecated. To read from a - byte string, wrap it in a `BytesIO` object. + byte string, wrap it in a ``BytesIO`` object. sheet_name : str, int, list, or None, default 0 Strings are used for sheet names. Integers are used in zero-indexed sheet positions (chart sheets do not count as a sheet position). From c92a3d620a232389b06045959730b12a27d44ee0 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Mon, 26 Jun 2023 22:01:33 -0400 Subject: [PATCH 6/6] Updating implementation per reviewer recommendations. --- pandas/io/excel/_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 493598534b5cf..fbc3893e9b53a 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1509,6 +1509,7 @@ def __init__( # First argument can also be bytes, so create a buffer if isinstance(path_or_buffer, bytes): + path_or_buffer = BytesIO(path_or_buffer) warnings.warn( "Passing bytes to 'read_excel' is deprecated and " "will be removed in a future version. To read from a "