diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst index 24ba9be4383eb..4e583ce4d3af9 100644 --- a/doc/source/whatsnew/v1.2.1.rst +++ b/doc/source/whatsnew/v1.2.1.rst @@ -15,6 +15,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :meth:`~DataFrame.to_csv` that created corrupted zip files when there were more rows than ``chunksize`` (:issue:`38714`) +- Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamReaderWriter`` in binary mode instead of in text mode (:issue:`39247`) - Fixed regression in :meth:`read_csv` and other read functions were the encoding error policy (``errors``) did not default to ``"replace"`` when no encoding was specified (:issue:`38989`) - Fixed regression in :func:`read_excel` with non-rawbyte file handles (:issue:`38788`) - Fixed regression in :meth:`DataFrame.to_stata` not removing the created file when an error occured (:issue:`39202`) diff --git a/pandas/io/common.py b/pandas/io/common.py index 47811d47e7238..998e8b63f8336 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -2,6 +2,7 @@ from __future__ import annotations import bz2 +import codecs from collections import abc import dataclasses import gzip @@ -857,9 +858,12 @@ def file_exists(filepath_or_buffer: FilePathOrBuffer) -> bool: def _is_binary_mode(handle: FilePathOrBuffer, mode: str) -> bool: """Whether the handle is opened in binary mode""" + # classes that expect string but have 'b' in mode + text_classes = (codecs.StreamReaderWriter,) + if isinstance(handle, text_classes): + return False + # classes that expect bytes - binary_classes = [BufferedIOBase, RawIOBase] + binary_classes = (BufferedIOBase, RawIOBase) - return isinstance(handle, tuple(binary_classes)) or "b" in getattr( - handle, "mode", mode - ) + return isinstance(handle, binary_classes) or "b" in getattr(handle, "mode", mode) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index d31bee9aca135..8edfd3f16efdf 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -1,6 +1,7 @@ """ Tests for the pandas.io.common functionalities """ +import codecs from io import StringIO import mmap import os @@ -429,3 +430,19 @@ def test_default_errors(): file = Path(path) file.write_bytes(b"\xe4\na\n1") tm.assert_frame_equal(pd.read_csv(file, skiprows=[0]), pd.DataFrame({"a": [1]})) + + +@pytest.mark.parametrize("encoding", [None, "utf-8"]) +@pytest.mark.parametrize("format", ["csv", "json"]) +def test_codecs_encoding(encoding, format): + # GH39247 + expected = tm.makeDataFrame() + with tm.ensure_clean() as path: + with codecs.open(path, mode="w", encoding=encoding) as handle: + getattr(expected, f"to_{format}")(handle) + with codecs.open(path, mode="r", encoding=encoding) as handle: + if format == "csv": + df = pd.read_csv(handle, index_col=0) + else: + df = pd.read_json(handle) + tm.assert_frame_equal(expected, df)