Skip to content

Commit 6e0ab71

Browse files
fiendishTomAugspurger
authored andcommitted
BUG: Help python csv engine read binary buffers (#27925)
* BUG: Help python csv engine read binary buffers The file buffer given to read_csv could have been opened in binary mode, but the python csv reader errors on binary buffers. closes #23779
1 parent ba94f9b commit 6e0ab71

File tree

3 files changed

+36
-9
lines changed

3 files changed

+36
-9
lines changed

doc/source/whatsnew/v1.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ MultiIndex
158158
I/O
159159
^^^
160160

161-
-
161+
- :meth:`read_csv` now accepts binary mode file buffers when using the Python csv engine (:issue:`23779`)
162162
-
163163

164164
Plotting

pandas/io/common.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import csv
66
import gzip
77
from http.client import HTTPException # noqa
8-
from io import BytesIO
8+
from io import BufferedIOBase, BytesIO
99
import mmap
1010
import os
1111
import pathlib
@@ -344,9 +344,9 @@ def _get_handle(
344344
try:
345345
from s3fs import S3File
346346

347-
need_text_wrapping = (BytesIO, S3File)
347+
need_text_wrapping = (BufferedIOBase, S3File)
348348
except ImportError:
349-
need_text_wrapping = (BytesIO,)
349+
need_text_wrapping = BufferedIOBase
350350

351351
handles = list()
352352
f = path_or_buf
@@ -422,8 +422,10 @@ def _get_handle(
422422
if is_text and (compression or isinstance(f, need_text_wrapping)):
423423
from io import TextIOWrapper
424424

425-
f = TextIOWrapper(f, encoding=encoding, newline="")
426-
handles.append(f)
425+
g = TextIOWrapper(f, encoding=encoding, newline="")
426+
if not isinstance(f, BufferedIOBase):
427+
handles.append(g)
428+
f = g
427429

428430
if memory_map and hasattr(f, "fileno"):
429431
try:

pandas/tests/io/parser/test_common.py

+28-3
Original file line numberDiff line numberDiff line change
@@ -2020,9 +2020,34 @@ def test_file_handles_with_open(all_parsers, csv1):
20202020
# Don't close user provided file handles.
20212021
parser = all_parsers
20222022

2023-
with open(csv1, "r") as f:
2024-
parser.read_csv(f)
2025-
assert not f.closed
2023+
for mode in ["r", "rb"]:
2024+
with open(csv1, mode) as f:
2025+
parser.read_csv(f)
2026+
assert not f.closed
2027+
2028+
2029+
@pytest.mark.parametrize(
2030+
"fname,encoding",
2031+
[
2032+
("test1.csv", "utf-8"),
2033+
("unicode_series.csv", "latin-1"),
2034+
("sauron.SHIFT_JIS.csv", "shiftjis"),
2035+
],
2036+
)
2037+
def test_binary_mode_file_buffers(all_parsers, csv_dir_path, fname, encoding):
2038+
# gh-23779: Python csv engine shouldn't error on files opened in binary.
2039+
parser = all_parsers
2040+
2041+
fpath = os.path.join(csv_dir_path, fname)
2042+
expected = parser.read_csv(fpath, encoding=encoding)
2043+
2044+
with open(fpath, mode="r", encoding=encoding) as fa:
2045+
result = parser.read_csv(fa)
2046+
tm.assert_frame_equal(expected, result)
2047+
2048+
with open(fpath, mode="rb") as fb:
2049+
result = parser.read_csv(fb, encoding=encoding)
2050+
tm.assert_frame_equal(expected, result)
20262051

20272052

20282053
def test_invalid_file_buffer_class(all_parsers):

0 commit comments

Comments
 (0)