From 5e3c3e33c8ddd394c1a8411ba5cda1f7e12e68fc Mon Sep 17 00:00:00 2001
From: Avi Kelman <patcherton.fixesthings@gmail.com>
Date: Wed, 14 Aug 2019 20:40:23 -0400
Subject: [PATCH 1/9] BUG: Help python csv engine read binary buffers

The file buffer given to read_csv could have been opened in
binary mode, but the python csv reader errors on binary buffers.

closes #23779
---
 pandas/io/common.py                               | 6 +++---
 pandas/tests/io/parser/test_python_parser_only.py | 7 +++++++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/pandas/io/common.py b/pandas/io/common.py
index e01e473047b88..3497bb7862a29 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -5,7 +5,7 @@
 import csv
 import gzip
 from http.client import HTTPException  # noqa
-from io import BytesIO
+from io import BufferedIOBase, BytesIO
 import lzma
 import mmap
 import os
@@ -342,9 +342,9 @@ def _get_handle(
     try:
         from s3fs import S3File
 
-        need_text_wrapping = (BytesIO, S3File)
+        need_text_wrapping = (BufferedIOBase, BytesIO, S3File)
     except ImportError:
-        need_text_wrapping = (BytesIO,)
+        need_text_wrapping = (BufferedIOBase, BytesIO)
 
     handles = list()
     f = path_or_buf
diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
index 5b381e43e3e19..6557cbfdc9ca9 100644
--- a/pandas/tests/io/parser/test_python_parser_only.py
+++ b/pandas/tests/io/parser/test_python_parser_only.py
@@ -296,3 +296,10 @@ def test_malformed_skipfooter(python_parser_only):
     msg = "Expected 3 fields in line 4, saw 5"
     with pytest.raises(ParserError, match=msg):
         parser.read_csv(StringIO(data), header=1, comment="#", skipfooter=1)
+
+
+def test_binary_buffer(python_parser_only, csv1):
+    # see gh-23779
+    parser = python_parser_only
+    with open(csv1, "rb") as f:
+        parser.read_csv(f)

From 0ac1bcebc03d77718514be161513f1f1ba6d559e Mon Sep 17 00:00:00 2001
From: Avi Kelman <patcherton.fixesthings@gmail.com>
Date: Thu, 15 Aug 2019 00:13:49 -0400
Subject: [PATCH 2/9] whatsnew entry

---
 doc/source/whatsnew/v0.25.1.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst
index 21f1fa7ddec1f..a3de14a2a40f9 100644
--- a/doc/source/whatsnew/v0.25.1.rst
+++ b/doc/source/whatsnew/v0.25.1.rst
@@ -105,7 +105,7 @@ I/O
 ^^^
 
 - Avoid calling ``S3File.s3`` when reading parquet, as this was removed in s3fs version 0.3.0 (:issue:`27756`)
--
+- read_csv now accepts binary mode file buffers when using the Python csv engine (:issue:`23779`)
 -
 
 Plotting

From 7e270d259395dcdd350d5ae7600ed14ec1897d4c Mon Sep 17 00:00:00 2001
From: Avi Kelman <patcherton.fixesthings@gmail.com>
Date: Thu, 15 Aug 2019 12:04:22 -0400
Subject: [PATCH 3/9] satisfy gh-14418 for binary mode files

---
 pandas/io/common.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/pandas/io/common.py b/pandas/io/common.py
index 3497bb7862a29..862ff4e090bc6 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -346,6 +346,8 @@ def _get_handle(
     except ImportError:
         need_text_wrapping = (BufferedIOBase, BytesIO)
 
+    no_close = (BufferedIOBase)
+
     handles = list()
     f = path_or_buf
 
@@ -420,8 +422,10 @@ def _get_handle(
     if is_text and (compression or isinstance(f, need_text_wrapping)):
         from io import TextIOWrapper
 
-        f = TextIOWrapper(f, encoding=encoding, newline="")
-        handles.append(f)
+        g = TextIOWrapper(f, encoding=encoding, newline="")
+        if not isinstance(f, no_close):
+            handles.append(g)
+        f = g
 
     if memory_map and hasattr(f, "fileno"):
         try:

From b3b9d2f518c190791cbf86e9ffa79c3661e28078 Mon Sep 17 00:00:00 2001
From: Avi Kelman <patcherton.fixesthings@gmail.com>
Date: Thu, 15 Aug 2019 12:05:02 -0400
Subject: [PATCH 4/9] update tests per requested changes

test both csv engines,
assert equality between ascii and binary modes,
colocate with other buffer tests
---
 pandas/tests/io/parser/test_common.py         | 28 +++++++++++++++++--
 .../io/parser/test_python_parser_only.py      |  7 -----
 2 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
index b94d5cd497ccf..587cca6192c16 100644
--- a/pandas/tests/io/parser/test_common.py
+++ b/pandas/tests/io/parser/test_common.py
@@ -2020,9 +2020,31 @@ def test_file_handles_with_open(all_parsers, csv1):
     # Don't close user provided file handles.
     parser = all_parsers
 
-    with open(csv1, "r") as f:
-        parser.read_csv(f)
-        assert not f.closed
+    for mode in ["r", "rb"]:
+        with open(csv1, mode) as f:
+            parser.read_csv(f)
+            assert not f.closed
+
+
+@pytest.mark.parametrize(
+    "fname,encoding", [
+        ("test1.csv", "utf-8"),
+        ("unicode_series.csv", "latin-1"),
+        ("sauron.SHIFT_JIS.csv", "shiftjis")
+    ]
+)
+def test_binary_mode_file_buffers(all_parsers, csv_dir_path, fname, encoding):
+    # gh-23779: Python csv engine shouldn't error on files opened in binary.
+    parser = all_parsers
+
+    fpath = os.path.join(csv_dir_path, fname)
+
+    with open(fpath, mode="r", encoding=encoding) as fa:
+        df_a = parser.read_csv(fa)
+    with open(fpath, mode="rb") as fb:
+        df_b = parser.read_csv(fb, encoding=encoding)
+
+    tm.assert_frame_equal(df_b, df_a)
 
 
 def test_invalid_file_buffer_class(all_parsers):
diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
index 6557cbfdc9ca9..5b381e43e3e19 100644
--- a/pandas/tests/io/parser/test_python_parser_only.py
+++ b/pandas/tests/io/parser/test_python_parser_only.py
@@ -296,10 +296,3 @@ def test_malformed_skipfooter(python_parser_only):
     msg = "Expected 3 fields in line 4, saw 5"
     with pytest.raises(ParserError, match=msg):
         parser.read_csv(StringIO(data), header=1, comment="#", skipfooter=1)
-
-
-def test_binary_buffer(python_parser_only, csv1):
-    # see gh-23779
-    parser = python_parser_only
-    with open(csv1, "rb") as f:
-        parser.read_csv(f)

From 0a60af245766b10d38baed5d51b4add788ab7998 Mon Sep 17 00:00:00 2001
From: Avi Kelman <patcherton.fixesthings@gmail.com>
Date: Thu, 15 Aug 2019 12:07:51 -0400
Subject: [PATCH 5/9] move whatsnew note to 1.0

---
 doc/source/whatsnew/v0.25.1.rst | 2 +-
 doc/source/whatsnew/v1.0.0.rst  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst
index a3de14a2a40f9..21f1fa7ddec1f 100644
--- a/doc/source/whatsnew/v0.25.1.rst
+++ b/doc/source/whatsnew/v0.25.1.rst
@@ -105,7 +105,7 @@ I/O
 ^^^
 
 - Avoid calling ``S3File.s3`` when reading parquet, as this was removed in s3fs version 0.3.0 (:issue:`27756`)
-- read_csv now accepts binary mode file buffers when using the Python csv engine (:issue:`23779`)
+-
 -
 
 Plotting
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index aeed3668fe774..39e640dcc4935 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -157,7 +157,7 @@ MultiIndex
 I/O
 ^^^
 
--
+- :meth:`read_csv` now accepts binary mode file buffers when using the Python csv engine (:issue:`23779`)
 -
 
 Plotting

From 9d73aaa534d858fcdb0fa2860309fa99a80a325a Mon Sep 17 00:00:00 2001
From: Avi Kelman <patcherton.fixesthings@gmail.com>
Date: Thu, 15 Aug 2019 12:09:35 -0400
Subject: [PATCH 6/9] black formatting

---
 pandas/io/common.py                   | 2 +-
 pandas/tests/io/parser/test_common.py | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/pandas/io/common.py b/pandas/io/common.py
index 862ff4e090bc6..37d7cb988e46b 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -346,7 +346,7 @@ def _get_handle(
     except ImportError:
         need_text_wrapping = (BufferedIOBase, BytesIO)
 
-    no_close = (BufferedIOBase)
+    no_close = BufferedIOBase
 
     handles = list()
     f = path_or_buf
diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
index 587cca6192c16..daae92a1e5c8d 100644
--- a/pandas/tests/io/parser/test_common.py
+++ b/pandas/tests/io/parser/test_common.py
@@ -2027,11 +2027,12 @@ def test_file_handles_with_open(all_parsers, csv1):
 
 
 @pytest.mark.parametrize(
-    "fname,encoding", [
+    "fname,encoding",
+    [
         ("test1.csv", "utf-8"),
         ("unicode_series.csv", "latin-1"),
-        ("sauron.SHIFT_JIS.csv", "shiftjis")
-    ]
+        ("sauron.SHIFT_JIS.csv", "shiftjis"),
+    ],
 )
 def test_binary_mode_file_buffers(all_parsers, csv_dir_path, fname, encoding):
     # gh-23779: Python csv engine shouldn't error on files opened in binary.

From 03bceb1336a00b600d7685fd03a3b9e0196f900c Mon Sep 17 00:00:00 2001
From: Avi Kelman <patcherton.fixesthings@gmail.com>
Date: Thu, 15 Aug 2019 13:30:38 -0400
Subject: [PATCH 7/9] BytesIO is a subclass of BufferedIOBase

---
 pandas/io/common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/io/common.py b/pandas/io/common.py
index 37d7cb988e46b..5261148a334fc 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -342,9 +342,9 @@ def _get_handle(
     try:
         from s3fs import S3File
 
-        need_text_wrapping = (BufferedIOBase, BytesIO, S3File)
+        need_text_wrapping = (BufferedIOBase, S3File)
     except ImportError:
-        need_text_wrapping = (BufferedIOBase, BytesIO)
+        need_text_wrapping = (BufferedIOBase)
 
     no_close = BufferedIOBase
 

From ab0878d4dc9356a4b1cd1ba1e18ce2d860d1623c Mon Sep 17 00:00:00 2001
From: Avi Kelman <patcherton.fixesthings@gmail.com>
Date: Thu, 15 Aug 2019 13:34:04 -0400
Subject: [PATCH 8/9] black formatting

---
 pandas/io/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/common.py b/pandas/io/common.py
index 5261148a334fc..9fa61ca0cb6de 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -344,7 +344,7 @@ def _get_handle(
 
         need_text_wrapping = (BufferedIOBase, S3File)
     except ImportError:
-        need_text_wrapping = (BufferedIOBase)
+        need_text_wrapping = BufferedIOBase
 
     no_close = BufferedIOBase
 

From d6625325c5930d5c4888056a6feb0a64c4037cc9 Mon Sep 17 00:00:00 2001
From: Avi Kelman <patcherton.fixesthings@gmail.com>
Date: Fri, 16 Aug 2019 12:03:40 -0400
Subject: [PATCH 9/9] remove no_close and compare with non-open read

---
 pandas/io/common.py                   |  4 +---
 pandas/tests/io/parser/test_common.py | 10 ++++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/pandas/io/common.py b/pandas/io/common.py
index 9fa61ca0cb6de..25d62effb8d44 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -346,8 +346,6 @@ def _get_handle(
     except ImportError:
         need_text_wrapping = BufferedIOBase
 
-    no_close = BufferedIOBase
-
     handles = list()
     f = path_or_buf
 
@@ -423,7 +421,7 @@ def _get_handle(
         from io import TextIOWrapper
 
         g = TextIOWrapper(f, encoding=encoding, newline="")
-        if not isinstance(f, no_close):
+        if not isinstance(f, BufferedIOBase):
             handles.append(g)
         f = g
 
diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
index daae92a1e5c8d..e5366a8357adb 100644
--- a/pandas/tests/io/parser/test_common.py
+++ b/pandas/tests/io/parser/test_common.py
@@ -2039,13 +2039,15 @@ def test_binary_mode_file_buffers(all_parsers, csv_dir_path, fname, encoding):
     parser = all_parsers
 
     fpath = os.path.join(csv_dir_path, fname)
+    expected = parser.read_csv(fpath, encoding=encoding)
 
     with open(fpath, mode="r", encoding=encoding) as fa:
-        df_a = parser.read_csv(fa)
-    with open(fpath, mode="rb") as fb:
-        df_b = parser.read_csv(fb, encoding=encoding)
+        result = parser.read_csv(fa)
+    tm.assert_frame_equal(expected, result)
 
-    tm.assert_frame_equal(df_b, df_a)
+    with open(fpath, mode="rb") as fb:
+        result = parser.read_csv(fb, encoding=encoding)
+    tm.assert_frame_equal(expected, result)
 
 
 def test_invalid_file_buffer_class(all_parsers):