Skip to content

Commit b9b61fc

Browse files
Backport PR #39253: REGR: codecs.open() is always opened in text mode (#39271)
Co-authored-by: Torsten Wörtwein <[email protected]>
1 parent c7aaa49 commit b9b61fc

File tree

3 files changed

+26
-4
lines changed

3 files changed

+26
-4
lines changed

doc/source/whatsnew/v1.2.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ including other versions of pandas.
1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717
- Fixed regression in :meth:`~DataFrame.to_csv` that created corrupted zip files when there were more rows than ``chunksize`` (:issue:`38714`)
18+
- Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamReaderWriter`` in binary mode instead of in text mode (:issue:`39247`)
1819
- Fixed regression in :meth:`read_csv` and other read functions were the encoding error policy (``errors``) did not default to ``"replace"`` when no encoding was specified (:issue:`38989`)
1920
- Fixed regression in :func:`read_excel` with non-rawbyte file handles (:issue:`38788`)
2021
- Fixed regression in :meth:`DataFrame.to_stata` not removing the created file when an error occured (:issue:`39202`)

pandas/io/common.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Common IO api utilities"""
22

33
import bz2
4+
import codecs
45
from collections import abc
56
import dataclasses
67
import gzip
@@ -844,9 +845,12 @@ def file_exists(filepath_or_buffer: FilePathOrBuffer) -> bool:
844845

845846
def _is_binary_mode(handle: FilePathOrBuffer, mode: str) -> bool:
846847
"""Whether the handle is opened in binary mode"""
848+
# classes that expect string but have 'b' in mode
849+
text_classes = (codecs.StreamReaderWriter,)
850+
if isinstance(handle, text_classes):
851+
return False
852+
847853
# classes that expect bytes
848-
binary_classes = [BufferedIOBase, RawIOBase]
854+
binary_classes = (BufferedIOBase, RawIOBase)
849855

850-
return isinstance(handle, tuple(binary_classes)) or "b" in getattr(
851-
handle, "mode", mode
852-
)
856+
return isinstance(handle, binary_classes) or "b" in getattr(handle, "mode", mode)

pandas/tests/io/test_common.py

+17
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
Tests for the pandas.io.common functionalities
33
"""
4+
import codecs
45
from io import StringIO
56
import mmap
67
import os
@@ -426,3 +427,19 @@ def test_default_errors():
426427
file = Path(path)
427428
file.write_bytes(b"\xe4\na\n1")
428429
tm.assert_frame_equal(pd.read_csv(file, skiprows=[0]), pd.DataFrame({"a": [1]}))
430+
431+
432+
@pytest.mark.parametrize("encoding", [None, "utf-8"])
433+
@pytest.mark.parametrize("format", ["csv", "json"])
434+
def test_codecs_encoding(encoding, format):
435+
# GH39247
436+
expected = tm.makeDataFrame()
437+
with tm.ensure_clean() as path:
438+
with codecs.open(path, mode="w", encoding=encoding) as handle:
439+
getattr(expected, f"to_{format}")(handle)
440+
with codecs.open(path, mode="r", encoding=encoding) as handle:
441+
if format == "csv":
442+
df = pd.read_csv(handle, index_col=0)
443+
else:
444+
df = pd.read_json(handle)
445+
tm.assert_frame_equal(expected, df)

0 commit comments

Comments
 (0)