diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst index 2ca836d9c5508..222690b2aed8d 100644 --- a/doc/source/whatsnew/v1.0.1.rst +++ b/doc/source/whatsnew/v1.0.1.rst @@ -33,6 +33,7 @@ Fixed regressions - Fixed regression in :meth:`qcut` when passed a nullable integer. (:issue:`31389`) - Fixed regression in assigning to a :class:`Series` using a nullable integer dtype (:issue:`31446`) - Fixed performance regression when indexing a ``DataFrame`` or ``Series`` with a :class:`MultiIndex` for the index using a list of labels (:issue:`31648`) +- Fixed regression in :meth:`read_csv` used in file like object ``RawIOBase`` is not recognize ``encoding`` option (:issue:`31575`) .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 377d49f2bbd29..3077f73a8d1a4 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -638,7 +638,7 @@ cdef class TextReader: raise ValueError(f'Unrecognized compression type: ' f'{self.compression}') - if self.encoding and isinstance(source, io.BufferedIOBase): + if self.encoding and isinstance(source, (io.BufferedIOBase, io.RawIOBase)): source = io.TextIOWrapper( source, self.encoding.decode('utf-8'), newline='') diff --git a/pandas/io/common.py b/pandas/io/common.py index 771a302d647ec..9617965915aa5 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -3,7 +3,7 @@ import bz2 from collections import abc import gzip -from io import BufferedIOBase, BytesIO +from io import BufferedIOBase, BytesIO, RawIOBase import mmap import os import pathlib @@ -361,9 +361,9 @@ def get_handle( try: from s3fs import S3File - need_text_wrapping = (BufferedIOBase, S3File) + need_text_wrapping = (BufferedIOBase, RawIOBase, S3File) except ImportError: - need_text_wrapping = BufferedIOBase # type: ignore + need_text_wrapping = (BufferedIOBase, RawIOBase) # type: ignore handles: List[IO] = list() f = path_or_buf @@ -439,7 +439,7 @@ def get_handle( from io import TextIOWrapper g = TextIOWrapper(f, encoding=encoding, newline="") - if not isinstance(f, BufferedIOBase): + if not isinstance(f, (BufferedIOBase, RawIOBase)): handles.append(g) f = g diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index b4eb2fb1411d0..cb108362f4dc7 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -5,7 +5,7 @@ from collections import abc, defaultdict import csv import datetime -from io import BufferedIOBase, StringIO, TextIOWrapper +from io import BufferedIOBase, RawIOBase, StringIO, TextIOWrapper import re import sys from textwrap import fill @@ -1876,7 +1876,7 @@ def __init__(self, src, **kwds): # Handle the file object with universal line mode enabled. # We will handle the newline character ourselves later on. - if isinstance(src, BufferedIOBase): + if isinstance(src, (BufferedIOBase, RawIOBase)): src = TextIOWrapper(src, encoding=encoding, newline="") kwds["encoding"] = "utf-8" diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index 33abf4bb7d9ee..620f837935718 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -142,6 +142,7 @@ def test_read_csv_utf_aliases(all_parsers, utf_value, encoding_fmt): ) def test_binary_mode_file_buffers(all_parsers, csv_dir_path, fname, encoding): # gh-23779: Python csv engine shouldn't error on files opened in binary. + # gh-31575: Python csv engine shouldn't error on files opened in raw binary. parser = all_parsers fpath = os.path.join(csv_dir_path, fname) @@ -155,6 +156,10 @@ def test_binary_mode_file_buffers(all_parsers, csv_dir_path, fname, encoding): result = parser.read_csv(fb, encoding=encoding) tm.assert_frame_equal(expected, result) + with open(fpath, mode="rb", buffering=0) as fb: + result = parser.read_csv(fb, encoding=encoding) + tm.assert_frame_equal(expected, result) + @pytest.mark.parametrize("pass_encoding", [True, False]) def test_encoding_temp_file(all_parsers, utf_value, encoding_fmt, pass_encoding):