Skip to content

Commit 0e035ce

Browse files
Backport PR #32577: REG: Restore read_csv function for some file-likes (#32609)
Co-authored-by: gfyoung <[email protected]>
1 parent daae221 commit 0e035ce

File tree

4 files changed

+27
-3
lines changed

4 files changed

+27
-3
lines changed

doc/source/whatsnew/v1.0.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ Fixed regressions
2525
- Fixed regression in :meth:`pandas.core.groupby.GroupBy.agg` calling a user-provided function an extra time on an empty input (:issue:`31760`)
2626
- Joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` will preserve ``freq`` in simple cases (:issue:`32166`)
2727
- Fixed bug in the repr of an object-dtype :class:`Index` with bools and missing values (:issue:`32146`)
28+
- Fixed regression in :meth:`read_csv` in which the ``encoding`` option was not recognized with certain file-like objects (:issue:`31819`)
2829
-
2930

3031
.. ---------------------------------------------------------------------------

pandas/_libs/parsers.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -638,7 +638,8 @@ cdef class TextReader:
638638
raise ValueError(f'Unrecognized compression type: '
639639
f'{self.compression}')
640640

641-
if self.encoding and isinstance(source, (io.BufferedIOBase, io.RawIOBase)):
641+
if (self.encoding and hasattr(source, "read") and
642+
not hasattr(source, "encoding")):
642643
source = io.TextIOWrapper(
643644
source, self.encoding.decode('utf-8'), newline='')
644645

pandas/io/parsers.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from collections import abc, defaultdict
66
import csv
77
import datetime
8-
from io import BufferedIOBase, RawIOBase, StringIO, TextIOWrapper
8+
from io import StringIO, TextIOWrapper
99
import re
1010
import sys
1111
from textwrap import fill
@@ -1876,7 +1876,7 @@ def __init__(self, src, **kwds):
18761876

18771877
# Handle the file object with universal line mode enabled.
18781878
# We will handle the newline character ourselves later on.
1879-
if isinstance(src, (BufferedIOBase, RawIOBase)):
1879+
if hasattr(src, "read") and not hasattr(src, "encoding"):
18801880
src = TextIOWrapper(src, encoding=encoding, newline="")
18811881

18821882
kwds["encoding"] = "utf-8"

pandas/tests/io/parser/test_encoding.py

+22
Original file line numberDiff line numberDiff line change
@@ -175,3 +175,25 @@ def test_encoding_temp_file(all_parsers, utf_value, encoding_fmt, pass_encoding)
175175

176176
result = parser.read_csv(f, encoding=encoding if pass_encoding else None)
177177
tm.assert_frame_equal(result, expected)
178+
179+
180+
def test_encoding_named_temp_file(all_parsers):
181+
# see gh-31819
182+
parser = all_parsers
183+
encoding = "shift-jis"
184+
185+
if parser.engine == "python":
186+
pytest.skip("NamedTemporaryFile does not work with Python engine")
187+
188+
title = "てすと"
189+
data = "こむ"
190+
191+
expected = DataFrame({title: [data]})
192+
193+
with tempfile.NamedTemporaryFile() as f:
194+
f.write(f"{title}\n{data}".encode(encoding))
195+
196+
f.seek(0)
197+
198+
result = parser.read_csv(f, encoding=encoding)
199+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)