From ea895abf249b31b45507bb37b4cdcefe565b0897 Mon Sep 17 00:00:00 2001 From: paihu Date: Mon, 3 Feb 2020 13:20:02 +0900 Subject: [PATCH 1/4] TST: add test case (#31575) --- pandas/tests/io/parser/test_encoding.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index 406e7bedfd298..25ed5614c5564 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -5,6 +5,7 @@ from io import BytesIO import os +import tempfile import numpy as np import pytest @@ -141,6 +142,7 @@ def test_read_csv_utf_aliases(all_parsers, utf_value, encoding_fmt): ) def test_binary_mode_file_buffers(all_parsers, csv_dir_path, fname, encoding): # gh-23779: Python csv engine shouldn't error on files opened in binary. + # gh-31575: Python csv engine shouldn't error on files opened in binary with buffering=0. parser = all_parsers fpath = os.path.join(csv_dir_path, fname) @@ -154,6 +156,10 @@ def test_binary_mode_file_buffers(all_parsers, csv_dir_path, fname, encoding): result = parser.read_csv(fb, encoding=encoding) tm.assert_frame_equal(expected, result) + with open(fpath, mode="rb", buffering=0) as fb: + result = parser.read_csv(fb, encoding=encoding) + tm.assert_frame_equal(expected, result) + @pytest.mark.parametrize("pass_encoding", [True, False]) def test_encoding_temp_file(all_parsers, utf_value, encoding_fmt, pass_encoding): From ef8f8b768b564d08f2dea72667f85fbce2ff6729 Mon Sep 17 00:00:00 2001 From: paihu Date: Mon, 3 Feb 2020 13:53:43 +0900 Subject: [PATCH 2/4] BUG: fix reac_csv with RawIOBase broken (#31575) --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/_libs/parsers.pyx | 2 +- pandas/io/common.py | 8 ++++---- pandas/io/parsers.py | 4 ++-- pandas/tests/io/parser/test_encoding.py | 3 +-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 9fdda83abe944..f62ceb750765a 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -109,6 +109,7 @@ Datetimelike - :meth:`DatetimeArray.searchsorted`, :meth:`TimedeltaArray.searchsorted`, :meth:`PeriodArray.searchsorted` not recognizing non-pandas scalars and incorrectly raising ``ValueError`` instead of ``TypeError`` (:issue:`30950`) - Bug in :class:`Timestamp` where constructing :class:`Timestamp` with dateutil timezone less than 128 nanoseconds before daylight saving time switch from winter to summer would result in nonexistent time (:issue:`31043`) - Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with a tz-aware index (:issue:`26683`) +- Bug in :meth: `read_csv` used in file like object `RawIOBase` is not recognize `encoding` option (:issue:`31575`) Timedelta ^^^^^^^^^ diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 377d49f2bbd29..3077f73a8d1a4 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -638,7 +638,7 @@ cdef class TextReader: raise ValueError(f'Unrecognized compression type: ' f'{self.compression}') - if self.encoding and isinstance(source, io.BufferedIOBase): + if self.encoding and isinstance(source, (io.BufferedIOBase, io.RawIOBase)): source = io.TextIOWrapper( source, self.encoding.decode('utf-8'), newline='') diff --git a/pandas/io/common.py b/pandas/io/common.py index 00f2961e41617..e506cc155d48d 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -3,7 +3,7 @@ import bz2 from collections import abc import gzip -from io import BufferedIOBase, BytesIO +from io import BufferedIOBase, BytesIO, RawIOBase import mmap import os import pathlib @@ -359,9 +359,9 @@ def get_handle( try: from s3fs import S3File - need_text_wrapping = (BufferedIOBase, S3File) + need_text_wrapping = (BufferedIOBase, RawIOBase, S3File) except ImportError: - need_text_wrapping = BufferedIOBase # type: ignore + need_text_wrapping = (BufferedIOBase, RawIOBase) # type: ignore handles: List[IO] = list() f = path_or_buf @@ -437,7 +437,7 @@ def get_handle( from io import TextIOWrapper g = TextIOWrapper(f, encoding=encoding, newline="") - if not isinstance(f, BufferedIOBase): + if not isinstance(f, (BufferedIOBase, RawIOBase)): handles.append(g) f = g diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index a33d81ff437bf..cf4d2e4b4656b 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -5,7 +5,7 @@ from collections import abc, defaultdict import csv import datetime -from io import BufferedIOBase, StringIO, TextIOWrapper +from io import BufferedIOBase, RawIOBase, StringIO, TextIOWrapper import re import sys from textwrap import fill @@ -1868,7 +1868,7 @@ def __init__(self, src, **kwds): # Handle the file object with universal line mode enabled. # We will handle the newline character ourselves later on. - if isinstance(src, BufferedIOBase): + if isinstance(src, (BufferedIOBase, RawIOBase)): src = TextIOWrapper(src, encoding=encoding, newline="") kwds["encoding"] = "utf-8" diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index 25ed5614c5564..13f72a0414bac 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -5,7 +5,6 @@ from io import BytesIO import os -import tempfile import numpy as np import pytest @@ -142,7 +141,7 @@ def test_read_csv_utf_aliases(all_parsers, utf_value, encoding_fmt): ) def test_binary_mode_file_buffers(all_parsers, csv_dir_path, fname, encoding): # gh-23779: Python csv engine shouldn't error on files opened in binary. - # gh-31575: Python csv engine shouldn't error on files opened in binary with buffering=0. + # gh-31575: Python csv engine shouldn't error on files opened in raw binary. parser = all_parsers fpath = os.path.join(csv_dir_path, fname) From 02c51b3280ecb46ea4eab4adcfdbc864c23d51d4 Mon Sep 17 00:00:00 2001 From: paihu Date: Mon, 3 Feb 2020 23:51:23 +0900 Subject: [PATCH 3/4] DOC: fix change log position and delete blank space --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index f62ceb750765a..faef27d598f96 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -109,7 +109,6 @@ Datetimelike - :meth:`DatetimeArray.searchsorted`, :meth:`TimedeltaArray.searchsorted`, :meth:`PeriodArray.searchsorted` not recognizing non-pandas scalars and incorrectly raising ``ValueError`` instead of ``TypeError`` (:issue:`30950`) - Bug in :class:`Timestamp` where constructing :class:`Timestamp` with dateutil timezone less than 128 nanoseconds before daylight saving time switch from winter to summer would result in nonexistent time (:issue:`31043`) - Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with a tz-aware index (:issue:`26683`) -- Bug in :meth: `read_csv` used in file like object `RawIOBase` is not recognize `encoding` option (:issue:`31575`) Timedelta ^^^^^^^^^ @@ -179,6 +178,7 @@ I/O ^^^ - Bug in :meth:`read_json` where integer overflow was occuring when json contains big number strings. (:issue:`30320`) - Bug in :meth:`DataFrame.to_json` was raising ``NotFoundError`` when ``path_or_buf`` was an S3 URI (:issue:`28375`) +- Bug in :meth:`read_csv` used in file like object ``RawIOBase`` is not recognize ``encoding`` option (:issue:`31575`) - Plotting From 17baa553e5af1457c317bf698950e5f5c5b954e2 Mon Sep 17 00:00:00 2001 From: paihu Date: Wed, 5 Feb 2020 10:40:18 +0900 Subject: [PATCH 4/4] DOC: move doc whatsnew 1.1.0 to 1.0.1 --- doc/source/whatsnew/v1.0.1.rst | 2 +- doc/source/whatsnew/v1.1.0.rst | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst index 56b11cdae15ae..c1b98adf18f76 100644 --- a/doc/source/whatsnew/v1.0.1.rst +++ b/doc/source/whatsnew/v1.0.1.rst @@ -99,7 +99,7 @@ I/O ^^^ - Fixed regression in :meth:`~DataFrame.to_csv` where specifying an ``na_rep`` might truncate the values written (:issue:`31447`) -- +- Bug in :meth:`read_csv` used in file like object ``RawIOBase`` is not recognize ``encoding`` option (:issue:`31575`) - Plotting diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index faef27d598f96..9fdda83abe944 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -178,7 +178,6 @@ I/O ^^^ - Bug in :meth:`read_json` where integer overflow was occuring when json contains big number strings. (:issue:`30320`) - Bug in :meth:`DataFrame.to_json` was raising ``NotFoundError`` when ``path_or_buf`` was an S3 URI (:issue:`28375`) -- Bug in :meth:`read_csv` used in file like object ``RawIOBase`` is not recognize ``encoding`` option (:issue:`31575`) - Plotting