Skip to content

Commit 5dfca7a

Browse files
authored
REGR: prefer user-provided mode (#39440)
1 parent bdfaea4 commit 5dfca7a

File tree

3 files changed

+38
-4
lines changed

3 files changed

+38
-4
lines changed

doc/source/whatsnew/v1.2.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717
- Fixed regression in :meth:`~DataFrame.to_pickle` failing to create bz2/xz compressed pickle files with ``protocol=5`` (:issue:`39002`)
1818
- Fixed regression in :func:`pandas.testing.assert_series_equal` and :func:`pandas.testing.assert_frame_equal` always raising ``AssertionError`` when comparing extension dtypes (:issue:`39410`)
19+
- Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamWriter`` in binary mode instead of in text mode and ignoring user-provided ``mode`` (:issue:`39247`)
1920
-
2021

2122
.. ---------------------------------------------------------------------------

pandas/io/common.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -857,12 +857,15 @@ def file_exists(filepath_or_buffer: FilePathOrBuffer) -> bool:
857857

858858
def _is_binary_mode(handle: FilePathOrBuffer, mode: str) -> bool:
859859
"""Whether the handle is opened in binary mode"""
860+
# specified by user
861+
if "t" in mode or "b" in mode:
862+
return "b" in mode
863+
860864
# classes that expect string but have 'b' in mode
861-
text_classes = (codecs.StreamReaderWriter,)
862-
if isinstance(handle, text_classes):
865+
text_classes = (codecs.StreamWriter, codecs.StreamReader, codecs.StreamReaderWriter)
866+
if issubclass(type(handle), text_classes):
863867
return False
864868

865869
# classes that expect bytes
866870
binary_classes = (BufferedIOBase, RawIOBase)
867-
868871
return isinstance(handle, binary_classes) or "b" in getattr(handle, "mode", mode)

pandas/tests/io/test_common.py

+31-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Tests for the pandas.io.common functionalities
33
"""
44
import codecs
5-
from io import StringIO
5+
from io import BytesIO, StringIO
66
import mmap
77
import os
88
from pathlib import Path
@@ -446,3 +446,33 @@ def test_codecs_encoding(encoding, format):
446446
else:
447447
df = pd.read_json(handle)
448448
tm.assert_frame_equal(expected, df)
449+
450+
451+
def test_codecs_get_writer_reader():
452+
# GH39247
453+
expected = tm.makeDataFrame()
454+
with tm.ensure_clean() as path:
455+
with open(path, "wb") as handle:
456+
with codecs.getwriter("utf-8")(handle) as encoded:
457+
expected.to_csv(encoded)
458+
with open(path, "rb") as handle:
459+
with codecs.getreader("utf-8")(handle) as encoded:
460+
df = pd.read_csv(encoded, index_col=0)
461+
tm.assert_frame_equal(expected, df)
462+
463+
464+
@pytest.mark.parametrize(
465+
"io_class,mode,msg",
466+
[
467+
(BytesIO, "t", "a bytes-like object is required, not 'str'"),
468+
(StringIO, "b", "string argument expected, got 'bytes'"),
469+
],
470+
)
471+
def test_explicit_encoding(io_class, mode, msg):
472+
# GH39247; this test makes sure that if a user provides mode="*t" or "*b",
473+
# it is used. In the case of this test it leads to an error as intentionally the
474+
# wrong mode is requested
475+
expected = tm.makeDataFrame()
476+
with io_class() as buffer:
477+
with pytest.raises(TypeError, match=msg):
478+
expected.to_csv(buffer, mode=f"w{mode}")

0 commit comments

Comments
 (0)