Skip to content

Commit c642d9b

Browse files
rmhowe425im-vinicius
authored and
im-vinicius
committed
DEPR: Remove bytes input for read_excel (pandas-dev#53830)
* Adding deprecation logic and updating documentation. * Adding PR number to new unit test * Updating read_excel() documentation per reviewer recommendations. * Updating read_excel() documentation per reviewer recommendations. * Updating read_excel() documentation per reviewer recommendations. * Updating implementation per reviewer recommendations.
1 parent 0bf1e55 commit c642d9b

File tree

3 files changed

+30
-2
lines changed

3 files changed

+30
-2
lines changed

doc/source/whatsnew/v2.1.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,7 @@ Deprecations
295295
- Deprecated allowing ``downcast`` keyword other than ``None``, ``False``, "infer", or a dict with these as values in :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`40988`)
296296
- Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`)
297297
- Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`)
298+
- Deprecated bytes input to :func:`read_excel`. To read a file path, use a string or path-like object. (:issue:`53767`)
298299
- Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`)
299300
- Deprecated falling back to filling when ``value`` is not specified in :meth:`DataFrame.replace` and :meth:`Series.replace` with non-dict-like ``to_replace`` (:issue:`33302`)
300301
- Deprecated literal json input to :func:`read_json`. Wrap literal json string input in ``io.StringIO`` instead. (:issue:`53409`)
@@ -305,6 +306,7 @@ Deprecations
305306
- Deprecated the "method" and "limit" keywords on :meth:`Series.fillna`, :meth:`DataFrame.fillna`, :meth:`SeriesGroupBy.fillna`, :meth:`DataFrameGroupBy.fillna`, and :meth:`Resampler.fillna`, use ``obj.bfill()`` or ``obj.ffill()`` instead (:issue:`53394`)
306307
- Deprecated the ``method`` and ``limit`` keywords in :meth:`DataFrame.replace` and :meth:`Series.replace` (:issue:`33302`)
307308
- Deprecated values "pad", "ffill", "bfill", "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate`, use ``obj.ffill()`` or ``obj.bfill()`` instead (:issue:`53581`)
309+
-
308310

309311
.. ---------------------------------------------------------------------------
310312
.. _whatsnew_210.performance:

pandas/io/excel/_base.py

+13
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
cast,
2222
overload,
2323
)
24+
import warnings
2425
import zipfile
2526

2627
from pandas._config import config
@@ -36,6 +37,7 @@
3637
Appender,
3738
doc,
3839
)
40+
from pandas.util._exceptions import find_stack_level
3941
from pandas.util._validators import check_dtype_backend
4042

4143
from pandas.core.dtypes.common import (
@@ -97,6 +99,10 @@
9799
By file-like object, we refer to objects with a ``read()`` method,
98100
such as a file handle (e.g. via builtin ``open`` function)
99101
or ``StringIO``.
102+
103+
.. deprecated:: 2.1.0
104+
Passing byte strings is deprecated. To read from a
105+
byte string, wrap it in a ``BytesIO`` object.
100106
sheet_name : str, int, list, or None, default 0
101107
Strings are used for sheet names. Integers are used in zero-indexed
102108
sheet positions (chart sheets do not count as a sheet position).
@@ -1504,6 +1510,13 @@ def __init__(
15041510
# First argument can also be bytes, so create a buffer
15051511
if isinstance(path_or_buffer, bytes):
15061512
path_or_buffer = BytesIO(path_or_buffer)
1513+
warnings.warn(
1514+
"Passing bytes to 'read_excel' is deprecated and "
1515+
"will be removed in a future version. To read from a "
1516+
"byte string, wrap it in a `BytesIO` object.",
1517+
FutureWarning,
1518+
stacklevel=find_stack_level(),
1519+
)
15071520

15081521
# Could be a str, ExcelFile, Book, etc.
15091522
self.io = path_or_buffer

pandas/tests/io/excel/test_readers.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
time,
44
)
55
from functools import partial
6+
from io import BytesIO
67
import os
78
from pathlib import Path
89
import platform
@@ -873,7 +874,7 @@ def test_corrupt_bytes_raises(self, engine):
873874
error = BadZipFile
874875
msg = "File is not a zip file"
875876
with pytest.raises(error, match=msg):
876-
pd.read_excel(bad_stream)
877+
pd.read_excel(BytesIO(bad_stream))
877878

878879
@pytest.mark.network
879880
@pytest.mark.single_cpu
@@ -1438,6 +1439,18 @@ def test_euro_decimal_format(self, read_ext):
14381439

14391440

14401441
class TestExcelFileRead:
1442+
def test_deprecate_bytes_input(self, engine, read_ext):
1443+
# GH 53830
1444+
msg = (
1445+
"Passing bytes to 'read_excel' is deprecated and "
1446+
"will be removed in a future version. To read from a "
1447+
"byte string, wrap it in a `BytesIO` object."
1448+
)
1449+
1450+
with tm.assert_produces_warning(FutureWarning, match=msg):
1451+
with open("test1" + read_ext, "rb") as f:
1452+
pd.read_excel(f.read(), engine=engine)
1453+
14411454
@pytest.fixture(autouse=True)
14421455
def cd_and_set_engine(self, engine, datapath, monkeypatch):
14431456
"""
@@ -1621,7 +1634,7 @@ def test_excel_read_binary(self, engine, read_ext):
16211634
with open("test1" + read_ext, "rb") as f:
16221635
data = f.read()
16231636

1624-
actual = pd.read_excel(data, engine=engine)
1637+
actual = pd.read_excel(BytesIO(data), engine=engine)
16251638
tm.assert_frame_equal(expected, actual)
16261639

16271640
def test_excel_read_binary_via_read_excel(self, read_ext, engine):

0 commit comments

Comments
 (0)