Skip to content

BUG: Help python csv engine read binary buffers #27925

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Aug 19, 2019
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ MultiIndex
I/O
^^^

-
- :meth:`read_csv` now accepts binary mode file buffers when using the Python csv engine (:issue:`23779`)
-

Plotting
Expand Down
12 changes: 7 additions & 5 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import csv
import gzip
from http.client import HTTPException # noqa
from io import BytesIO
from io import BufferedIOBase, BytesIO
import mmap
import os
import pathlib
Expand Down Expand Up @@ -344,9 +344,9 @@ def _get_handle(
try:
from s3fs import S3File

need_text_wrapping = (BytesIO, S3File)
need_text_wrapping = (BufferedIOBase, S3File)
except ImportError:
need_text_wrapping = (BytesIO,)
need_text_wrapping = BufferedIOBase

handles = list()
f = path_or_buf
Expand Down Expand Up @@ -422,8 +422,10 @@ def _get_handle(
if is_text and (compression or isinstance(f, need_text_wrapping)):
from io import TextIOWrapper

f = TextIOWrapper(f, encoding=encoding, newline="")
handles.append(f)
g = TextIOWrapper(f, encoding=encoding, newline="")
if not isinstance(f, BufferedIOBase):
handles.append(g)
f = g

if memory_map and hasattr(f, "fileno"):
try:
Expand Down
31 changes: 28 additions & 3 deletions pandas/tests/io/parser/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2020,9 +2020,34 @@ def test_file_handles_with_open(all_parsers, csv1):
# Don't close user provided file handles.
parser = all_parsers

with open(csv1, "r") as f:
parser.read_csv(f)
assert not f.closed
for mode in ["r", "rb"]:
with open(csv1, mode) as f:
parser.read_csv(f)
assert not f.closed


@pytest.mark.parametrize(
"fname,encoding",
[
("test1.csv", "utf-8"),
("unicode_series.csv", "latin-1"),
("sauron.SHIFT_JIS.csv", "shiftjis"),
],
)
def test_binary_mode_file_buffers(all_parsers, csv_dir_path, fname, encoding):
# gh-23779: Python csv engine shouldn't error on files opened in binary.
parser = all_parsers

fpath = os.path.join(csv_dir_path, fname)
expected = parser.read_csv(fpath, encoding=encoding)

with open(fpath, mode="r", encoding=encoding) as fa:
result = parser.read_csv(fa)
tm.assert_frame_equal(expected, result)

with open(fpath, mode="rb") as fb:
result = parser.read_csv(fb, encoding=encoding)
tm.assert_frame_equal(expected, result)


def test_invalid_file_buffer_class(all_parsers):
Expand Down