Skip to content

Commit 37577b6

Browse files
Backport PR pandas-dev#31596: BUG: read_csv used in file like object RawIOBase is not recognize encoding option (pandas-dev#31698)
Co-authored-by: paihu <[email protected]>
1 parent 1f47301 commit 37577b6

File tree

5 files changed

+13
-7
lines changed

5 files changed

+13
-7
lines changed

doc/source/whatsnew/v1.0.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ Fixed regressions
3333
- Fixed regression in :meth:`qcut` when passed a nullable integer. (:issue:`31389`)
3434
- Fixed regression in assigning to a :class:`Series` using a nullable integer dtype (:issue:`31446`)
3535
- Fixed performance regression when indexing a ``DataFrame`` or ``Series`` with a :class:`MultiIndex` for the index using a list of labels (:issue:`31648`)
36+
- Fixed regression in :meth:`read_csv` used in file like object ``RawIOBase`` is not recognize ``encoding`` option (:issue:`31575`)
3637

3738
.. ---------------------------------------------------------------------------
3839

pandas/_libs/parsers.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -638,7 +638,7 @@ cdef class TextReader:
638638
raise ValueError(f'Unrecognized compression type: '
639639
f'{self.compression}')
640640

641-
if self.encoding and isinstance(source, io.BufferedIOBase):
641+
if self.encoding and isinstance(source, (io.BufferedIOBase, io.RawIOBase)):
642642
source = io.TextIOWrapper(
643643
source, self.encoding.decode('utf-8'), newline='')
644644

pandas/io/common.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import bz2
44
from collections import abc
55
import gzip
6-
from io import BufferedIOBase, BytesIO
6+
from io import BufferedIOBase, BytesIO, RawIOBase
77
import mmap
88
import os
99
import pathlib
@@ -361,9 +361,9 @@ def get_handle(
361361
try:
362362
from s3fs import S3File
363363

364-
need_text_wrapping = (BufferedIOBase, S3File)
364+
need_text_wrapping = (BufferedIOBase, RawIOBase, S3File)
365365
except ImportError:
366-
need_text_wrapping = BufferedIOBase # type: ignore
366+
need_text_wrapping = (BufferedIOBase, RawIOBase) # type: ignore
367367

368368
handles: List[IO] = list()
369369
f = path_or_buf
@@ -439,7 +439,7 @@ def get_handle(
439439
from io import TextIOWrapper
440440

441441
g = TextIOWrapper(f, encoding=encoding, newline="")
442-
if not isinstance(f, BufferedIOBase):
442+
if not isinstance(f, (BufferedIOBase, RawIOBase)):
443443
handles.append(g)
444444
f = g
445445

pandas/io/parsers.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from collections import abc, defaultdict
66
import csv
77
import datetime
8-
from io import BufferedIOBase, StringIO, TextIOWrapper
8+
from io import BufferedIOBase, RawIOBase, StringIO, TextIOWrapper
99
import re
1010
import sys
1111
from textwrap import fill
@@ -1876,7 +1876,7 @@ def __init__(self, src, **kwds):
18761876

18771877
# Handle the file object with universal line mode enabled.
18781878
# We will handle the newline character ourselves later on.
1879-
if isinstance(src, BufferedIOBase):
1879+
if isinstance(src, (BufferedIOBase, RawIOBase)):
18801880
src = TextIOWrapper(src, encoding=encoding, newline="")
18811881

18821882
kwds["encoding"] = "utf-8"

pandas/tests/io/parser/test_encoding.py

+5
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ def test_read_csv_utf_aliases(all_parsers, utf_value, encoding_fmt):
142142
)
143143
def test_binary_mode_file_buffers(all_parsers, csv_dir_path, fname, encoding):
144144
# gh-23779: Python csv engine shouldn't error on files opened in binary.
145+
# gh-31575: Python csv engine shouldn't error on files opened in raw binary.
145146
parser = all_parsers
146147

147148
fpath = os.path.join(csv_dir_path, fname)
@@ -155,6 +156,10 @@ def test_binary_mode_file_buffers(all_parsers, csv_dir_path, fname, encoding):
155156
result = parser.read_csv(fb, encoding=encoding)
156157
tm.assert_frame_equal(expected, result)
157158

159+
with open(fpath, mode="rb", buffering=0) as fb:
160+
result = parser.read_csv(fb, encoding=encoding)
161+
tm.assert_frame_equal(expected, result)
162+
158163

159164
@pytest.mark.parametrize("pass_encoding", [True, False])
160165
def test_encoding_temp_file(all_parsers, utf_value, encoding_fmt, pass_encoding):

0 commit comments

Comments
 (0)