REGR: prefer user-provided mode (#39440)

twoertwein · web-flow · commit 5dfca7aa257b · 2021-01-28T14:50:03.000-05:00
diff --git a/doc/source/whatsnew/v1.2.2.rst b/doc/source/whatsnew/v1.2.2.rst
@@ -16,6 +16,7 @@ Fixed regressions
 ~~~~~~~~~~~~~~~~~
 - Fixed regression in :meth:`~DataFrame.to_pickle` failing to create bz2/xz compressed pickle files with ``protocol=5`` (:issue:`39002`)
 - Fixed regression in :func:`pandas.testing.assert_series_equal` and :func:`pandas.testing.assert_frame_equal` always raising ``AssertionError`` when comparing extension dtypes (:issue:`39410`)
+- Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamWriter`` in binary mode instead of in text mode and ignoring user-provided ``mode`` (:issue:`39247`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -857,12 +857,15 @@ def file_exists(filepath_or_buffer: FilePathOrBuffer) -> bool:
 
 def _is_binary_mode(handle: FilePathOrBuffer, mode: str) -> bool:
     """Whether the handle is opened in binary mode"""
+    # specified by user
+    if "t" in mode or "b" in mode:
+        return "b" in mode
+
     # classes that expect string but have 'b' in mode
-    text_classes = (codecs.StreamReaderWriter,)
-    if isinstance(handle, text_classes):
+    text_classes = (codecs.StreamWriter, codecs.StreamReader, codecs.StreamReaderWriter)
+    if issubclass(type(handle), text_classes):
         return False
 
     # classes that expect bytes
     binary_classes = (BufferedIOBase, RawIOBase)
-
     return isinstance(handle, binary_classes) or "b" in getattr(handle, "mode", mode)
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
@@ -2,7 +2,7 @@
 Tests for the pandas.io.common functionalities
 """
 import codecs
-from io import StringIO
+from io import BytesIO, StringIO
 import mmap
 import os
 from pathlib import Path
@@ -446,3 +446,33 @@ def test_codecs_encoding(encoding, format):
             else:
                 df = pd.read_json(handle)
     tm.assert_frame_equal(expected, df)
+
+
+def test_codecs_get_writer_reader():
+    # GH39247
+    expected = tm.makeDataFrame()
+    with tm.ensure_clean() as path:
+        with open(path, "wb") as handle:
+            with codecs.getwriter("utf-8")(handle) as encoded:
+                expected.to_csv(encoded)
+        with open(path, "rb") as handle:
+            with codecs.getreader("utf-8")(handle) as encoded:
+                df = pd.read_csv(encoded, index_col=0)
+    tm.assert_frame_equal(expected, df)
+
+
+@pytest.mark.parametrize(
+    "io_class,mode,msg",
+    [
+        (BytesIO, "t", "a bytes-like object is required, not 'str'"),
+        (StringIO, "b", "string argument expected, got 'bytes'"),
+    ],
+)
+def test_explicit_encoding(io_class, mode, msg):
+    # GH39247; this test makes sure that if a user provides mode="*t" or "*b",
+    # it is used. In the case of this test it leads to an error as intentionally the
+    # wrong mode is requested
+    expected = tm.makeDataFrame()
+    with io_class() as buffer:
+        with pytest.raises(TypeError, match=msg):
+            expected.to_csv(buffer, mode=f"w{mode}")

Original file line number	Diff line number	Diff line change
`@@ -16,6 +16,7 @@ Fixed regressions`
`16`	`16`	`~~~~~~~~~~~~~~~~~`
`17`	`17`	- Fixed regression in :meth:`~DataFrame.to_pickle` failing to create bz2/xz compressed pickle files with ``protocol=5`` (:issue:`39002`)
`18`	`18`	- Fixed regression in :func:`pandas.testing.assert_series_equal` and :func:`pandas.testing.assert_frame_equal` always raising ``AssertionError`` when comparing extension dtypes (:issue:`39410`)
	`19`	+- Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamWriter`` in binary mode instead of in text mode and ignoring user-provided ``mode`` (:issue:`39247`)
`19`	`20`	`-`
`20`	`21`
`21`	`22`	`.. ---------------------------------------------------------------------------`