diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 9841df0507138..8db47000480ed 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -25,6 +25,7 @@ Fixed regressions - Fixed regression in :meth:`pandas.core.groupby.GroupBy.agg` calling a user-provided function an extra time on an empty input (:issue:`31760`) - Joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` will preserve ``freq`` in simple cases (:issue:`32166`) - Fixed bug in the repr of an object-dtype :class:`Index` with bools and missing values (:issue:`32146`) +- Fixed regression in :meth:`read_csv` in which the ``encoding`` option was not recognized with certain file-like objects (:issue:`31819`) - .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 2fd227694800c..3a42a64046abd 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -638,7 +638,8 @@ cdef class TextReader: raise ValueError(f'Unrecognized compression type: ' f'{self.compression}') - if self.encoding and isinstance(source, (io.BufferedIOBase, io.RawIOBase)): + if (self.encoding and hasattr(source, "read") and + not hasattr(source, "encoding")): source = io.TextIOWrapper( source, self.encoding.decode('utf-8'), newline='') diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index bc2fb9f0f41bc..50b5db0274aa5 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -5,7 +5,7 @@ from collections import abc, defaultdict import csv import datetime -from io import BufferedIOBase, RawIOBase, StringIO, TextIOWrapper +from io import StringIO, TextIOWrapper import re import sys from textwrap import fill @@ -1870,7 +1870,7 @@ def __init__(self, src, **kwds): # Handle the file object with universal line mode enabled. # We will handle the newline character ourselves later on. - if isinstance(src, (BufferedIOBase, RawIOBase)): + if hasattr(src, "read") and not hasattr(src, "encoding"): src = TextIOWrapper(src, encoding=encoding, newline="") kwds["encoding"] = "utf-8" diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index 3661e4e056db2..13b74cf29f857 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -5,6 +5,7 @@ from io import BytesIO import os +import tempfile import numpy as np import pytest @@ -174,3 +175,25 @@ def test_encoding_temp_file(all_parsers, utf_value, encoding_fmt, pass_encoding) result = parser.read_csv(f, encoding=encoding if pass_encoding else None) tm.assert_frame_equal(result, expected) + + +def test_encoding_named_temp_file(all_parsers): + # see gh-31819 + parser = all_parsers + encoding = "shift-jis" + + if parser.engine == "python": + pytest.skip("NamedTemporaryFile does not work with Python engine") + + title = "てすと" + data = "こむ" + + expected = DataFrame({title: [data]}) + + with tempfile.NamedTemporaryFile() as f: + f.write(f"{title}\n{data}".encode(encoding)) + + f.seek(0) + + result = parser.read_csv(f, encoding=encoding) + tm.assert_frame_equal(result, expected)