Skip to content

Commit edbd450

Browse files
authored
REGR: codecs.open() is always opened in text mode (#39253)
1 parent 77af016 commit edbd450

File tree

3 files changed

+26
-4
lines changed

3 files changed

+26
-4
lines changed

doc/source/whatsnew/v1.2.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ including other versions of pandas.
1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717
- Fixed regression in :meth:`~DataFrame.to_csv` that created corrupted zip files when there were more rows than ``chunksize`` (:issue:`38714`)
18+
- Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamReaderWriter`` in binary mode instead of in text mode (:issue:`39247`)
1819
- Fixed regression in :meth:`read_csv` and other read functions were the encoding error policy (``errors``) did not default to ``"replace"`` when no encoding was specified (:issue:`38989`)
1920
- Fixed regression in :func:`read_excel` with non-rawbyte file handles (:issue:`38788`)
2021
- Fixed regression in :meth:`DataFrame.to_stata` not removing the created file when an error occured (:issue:`39202`)

pandas/io/common.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from __future__ import annotations
33

44
import bz2
5+
import codecs
56
from collections import abc
67
import dataclasses
78
import gzip
@@ -857,9 +858,12 @@ def file_exists(filepath_or_buffer: FilePathOrBuffer) -> bool:
857858

858859
def _is_binary_mode(handle: FilePathOrBuffer, mode: str) -> bool:
859860
"""Whether the handle is opened in binary mode"""
861+
# classes that expect string but have 'b' in mode
862+
text_classes = (codecs.StreamReaderWriter,)
863+
if isinstance(handle, text_classes):
864+
return False
865+
860866
# classes that expect bytes
861-
binary_classes = [BufferedIOBase, RawIOBase]
867+
binary_classes = (BufferedIOBase, RawIOBase)
862868

863-
return isinstance(handle, tuple(binary_classes)) or "b" in getattr(
864-
handle, "mode", mode
865-
)
869+
return isinstance(handle, binary_classes) or "b" in getattr(handle, "mode", mode)

pandas/tests/io/test_common.py

+17
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
Tests for the pandas.io.common functionalities
33
"""
4+
import codecs
45
from io import StringIO
56
import mmap
67
import os
@@ -429,3 +430,19 @@ def test_default_errors():
429430
file = Path(path)
430431
file.write_bytes(b"\xe4\na\n1")
431432
tm.assert_frame_equal(pd.read_csv(file, skiprows=[0]), pd.DataFrame({"a": [1]}))
433+
434+
435+
@pytest.mark.parametrize("encoding", [None, "utf-8"])
436+
@pytest.mark.parametrize("format", ["csv", "json"])
437+
def test_codecs_encoding(encoding, format):
438+
# GH39247
439+
expected = tm.makeDataFrame()
440+
with tm.ensure_clean() as path:
441+
with codecs.open(path, mode="w", encoding=encoding) as handle:
442+
getattr(expected, f"to_{format}")(handle)
443+
with codecs.open(path, mode="r", encoding=encoding) as handle:
444+
if format == "csv":
445+
df = pd.read_csv(handle, index_col=0)
446+
else:
447+
df = pd.read_json(handle)
448+
tm.assert_frame_equal(expected, df)

0 commit comments

Comments
 (0)