Skip to content

Commit a61ab39

Browse files
Backport PR #39440: REGR: prefer user-provided mode (#39452)
Co-authored-by: Torsten Wörtwein <[email protected]>
1 parent 62a46fe commit a61ab39

File tree

3 files changed

+38
-4
lines changed

3 files changed

+38
-4
lines changed

doc/source/whatsnew/v1.2.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717
- Fixed regression in :meth:`~DataFrame.to_pickle` failing to create bz2/xz compressed pickle files with ``protocol=5`` (:issue:`39002`)
1818
- Fixed regression in :func:`pandas.testing.assert_series_equal` and :func:`pandas.testing.assert_frame_equal` always raising ``AssertionError`` when comparing extension dtypes (:issue:`39410`)
19+
- Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamWriter`` in binary mode instead of in text mode and ignoring user-provided ``mode`` (:issue:`39247`)
1920
-
2021

2122
.. ---------------------------------------------------------------------------

pandas/io/common.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -845,12 +845,15 @@ def file_exists(filepath_or_buffer: FilePathOrBuffer) -> bool:
845845

846846
def _is_binary_mode(handle: FilePathOrBuffer, mode: str) -> bool:
847847
"""Whether the handle is opened in binary mode"""
848+
# specified by user
849+
if "t" in mode or "b" in mode:
850+
return "b" in mode
851+
848852
# classes that expect string but have 'b' in mode
849-
text_classes = (codecs.StreamReaderWriter,)
850-
if isinstance(handle, text_classes):
853+
text_classes = (codecs.StreamWriter, codecs.StreamReader, codecs.StreamReaderWriter)
854+
if issubclass(type(handle), text_classes):
851855
return False
852856

853857
# classes that expect bytes
854858
binary_classes = (BufferedIOBase, RawIOBase)
855-
856859
return isinstance(handle, binary_classes) or "b" in getattr(handle, "mode", mode)

pandas/tests/io/test_common.py

+31-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Tests for the pandas.io.common functionalities
33
"""
44
import codecs
5-
from io import StringIO
5+
from io import BytesIO, StringIO
66
import mmap
77
import os
88
from pathlib import Path
@@ -443,3 +443,33 @@ def test_codecs_encoding(encoding, format):
443443
else:
444444
df = pd.read_json(handle)
445445
tm.assert_frame_equal(expected, df)
446+
447+
448+
def test_codecs_get_writer_reader():
449+
# GH39247
450+
expected = tm.makeDataFrame()
451+
with tm.ensure_clean() as path:
452+
with open(path, "wb") as handle:
453+
with codecs.getwriter("utf-8")(handle) as encoded:
454+
expected.to_csv(encoded)
455+
with open(path, "rb") as handle:
456+
with codecs.getreader("utf-8")(handle) as encoded:
457+
df = pd.read_csv(encoded, index_col=0)
458+
tm.assert_frame_equal(expected, df)
459+
460+
461+
@pytest.mark.parametrize(
462+
"io_class,mode,msg",
463+
[
464+
(BytesIO, "t", "a bytes-like object is required, not 'str'"),
465+
(StringIO, "b", "string argument expected, got 'bytes'"),
466+
],
467+
)
468+
def test_explicit_encoding(io_class, mode, msg):
469+
# GH39247; this test makes sure that if a user provides mode="*t" or "*b",
470+
# it is used. In the case of this test it leads to an error as intentionally the
471+
# wrong mode is requested
472+
expected = tm.makeDataFrame()
473+
with io_class() as buffer:
474+
with pytest.raises(TypeError, match=msg):
475+
expected.to_csv(buffer, mode=f"w{mode}")

0 commit comments

Comments
 (0)