diff --git a/doc/source/whatsnew/v1.2.2.rst b/doc/source/whatsnew/v1.2.2.rst index 656e779055486..baa0cc2ac9e18 100644 --- a/doc/source/whatsnew/v1.2.2.rst +++ b/doc/source/whatsnew/v1.2.2.rst @@ -16,6 +16,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :meth:`~DataFrame.to_pickle` failing to create bz2/xz compressed pickle files with ``protocol=5`` (:issue:`39002`) - Fixed regression in :func:`pandas.testing.assert_series_equal` and :func:`pandas.testing.assert_frame_equal` always raising ``AssertionError`` when comparing extension dtypes (:issue:`39410`) +- Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamWriter`` in binary mode instead of in text mode and ignoring user-provided ``mode`` (:issue:`39247`) - .. --------------------------------------------------------------------------- diff --git a/pandas/io/common.py b/pandas/io/common.py index f71522b5c9555..e5a1f58ec6cd2 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -857,12 +857,15 @@ def file_exists(filepath_or_buffer: FilePathOrBuffer) -> bool: def _is_binary_mode(handle: FilePathOrBuffer, mode: str) -> bool: """Whether the handle is opened in binary mode""" + # specified by user + if "t" in mode or "b" in mode: + return "b" in mode + # classes that expect string but have 'b' in mode - text_classes = (codecs.StreamReaderWriter,) - if isinstance(handle, text_classes): + text_classes = (codecs.StreamWriter, codecs.StreamReader, codecs.StreamReaderWriter) + if issubclass(type(handle), text_classes): return False # classes that expect bytes binary_classes = (BufferedIOBase, RawIOBase) - return isinstance(handle, binary_classes) or "b" in getattr(handle, "mode", mode) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 8edfd3f16efdf..69a1427cec34f 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -2,7 +2,7 @@ Tests for the pandas.io.common functionalities """ import codecs -from io import StringIO +from io import BytesIO, StringIO import mmap import os from pathlib import Path @@ -446,3 +446,33 @@ def test_codecs_encoding(encoding, format): else: df = pd.read_json(handle) tm.assert_frame_equal(expected, df) + + +def test_codecs_get_writer_reader(): + # GH39247 + expected = tm.makeDataFrame() + with tm.ensure_clean() as path: + with open(path, "wb") as handle: + with codecs.getwriter("utf-8")(handle) as encoded: + expected.to_csv(encoded) + with open(path, "rb") as handle: + with codecs.getreader("utf-8")(handle) as encoded: + df = pd.read_csv(encoded, index_col=0) + tm.assert_frame_equal(expected, df) + + +@pytest.mark.parametrize( + "io_class,mode,msg", + [ + (BytesIO, "t", "a bytes-like object is required, not 'str'"), + (StringIO, "b", "string argument expected, got 'bytes'"), + ], +) +def test_explicit_encoding(io_class, mode, msg): + # GH39247; this test makes sure that if a user provides mode="*t" or "*b", + # it is used. In the case of this test it leads to an error as intentionally the + # wrong mode is requested + expected = tm.makeDataFrame() + with io_class() as buffer: + with pytest.raises(TypeError, match=msg): + expected.to_csv(buffer, mode=f"w{mode}")