pass more

lithomas1 · lithomas1 · commit 8328120b215b · 2023-07-22T17:17:28.000-07:00
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -3729,7 +3729,7 @@ def to_csv(
         header: bool_t | list[str] = True,
         index: bool_t = True,
         index_label: IndexLabel | None = None,
-        mode: str = "w",
+        mode: str | None = None,
         encoding: str | None = None,
         compression: CompressionOptions = "infer",
         quoting: int | None = None,
@@ -3786,14 +3786,16 @@ def to_csv(
             sequence should be given if the object uses MultiIndex. If
             False do not print fields for index names. Use index_label=False
             for easier importing in R.
-        mode : {{'w', 'x', 'a'}}, default 'w'
+        mode : {{'w', 'x', 'a'}}, default 'w' (Python engine) or 'wb' (Pyarrow engine)
             Forwarded to either `open(mode=)` or `fsspec.open(mode=)` to control
             the file opening. Typical values include:
 
             - 'w', truncate the file first.
             - 'x', exclusive creation, failing if the file already exists.
             - 'a', append to the end of file if it exists.
 
+            NOTE: The pyarrow engine can only handle binary buffers.
+
         encoding : str, optional
             A string representing the encoding to use in the output file,
             defaults to 'utf-8'. `encoding` is not supported if `path_or_buf`
@@ -3903,6 +3905,11 @@ def to_csv(
             decimal=decimal,
         )
 
+        if mode is None:
+            mode = "w"
+            if engine == "pyarrow":
+                mode += "b"
+
         return DataFrameRenderer(formatter).to_csv(
             path_or_buf,
             engine=engine,
diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
@@ -20,6 +20,7 @@
 import numpy as np
 
 from pandas._libs import writers as libwriters
+from pandas.compat import pa_version_under11p0
 from pandas.compat._optional import import_optional_dependency
 from pandas.util._decorators import cache_readonly
 
@@ -253,6 +254,8 @@ def save(self) -> None:
             errors=self.errors,
             compression=self.compression,
             storage_options=self.storage_options,
+            # pyarrow engine exclusively writes bytes
+            is_text=self.engine == "python",
         ) as handles:
             # Note: self.encoding is irrelevant here
             self._save(handles.handle)
@@ -262,13 +265,17 @@ def _save_pyarrow(self, handle) -> None:
         pa_csv = import_optional_dependency("pyarrow.csv")
         # Convert index to column and rename name to empty string
         # since we serialize the index as basically a column with no name
-        # TODO: this won't work for multi-indexes
-        obj = self.obj.reset_index(names=[""])
+        # TODO: this won't work for multi-indexes (without names)
+        obj = self.obj
+        if self.index:
+            new_names = [
+                label if label is not None else "" for label in self.obj.index.names
+            ]
+            obj = self.obj.reset_index(names=new_names)
 
         table = pa.Table.from_pandas(obj)
 
         # Map quoting arg to pyarrow equivalents
-        pa_quoting = None
         if self.quoting == csvlib.QUOTE_MINIMAL:
             pa_quoting = "needed"
         elif self.quoting == csvlib.QUOTE_ALL:
@@ -278,18 +285,21 @@ def _save_pyarrow(self, handle) -> None:
         elif self.quoting == csvlib.QUOTE_NONE:
             pa_quoting = "none"
         else:
-            raise ValueError(
+            raise NotImplementedError(
                 f"Quoting option {self.quoting} is not supported with engine='pyarrow'"
             )
 
-        write_options = pa_csv.WriteOptions(
-            include_header=self._need_to_save_header,
-            batch_size=self.chunksize,
-            delimiter=self.sep,
-            quoting_style=pa_quoting,
-        )
-        # pa_csv.write_csv(table, handle, write_options)
-        pa_csv.write_csv(table, self.filepath_or_buffer, write_options)
+        kwargs = {
+            "include_header": self._need_to_save_header,
+            "batch_size": self.chunksize,
+            "delimiter": self.sep,
+        }
+
+        if not pa_version_under11p0:
+            kwargs["quoting_style"] = pa_quoting
+
+        write_options = pa_csv.WriteOptions(**kwargs)
+        pa_csv.write_csv(table, handle, write_options)
 
     def _save(self, handle) -> None:
         if self.engine == "pyarrow":
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -18,7 +18,10 @@
 )
 from decimal import Decimal
 from functools import partial
-from io import StringIO
+from io import (
+    BytesIO,
+    StringIO,
+)
 import math
 import re
 from shutil import get_terminal_size
@@ -1127,7 +1130,7 @@ def to_csv(
 
         if path_or_buf is None:
             created_buffer = True
-            path_or_buf = StringIO()
+            path_or_buf = StringIO() if engine == "python" else BytesIO()
         else:
             created_buffer = False
 
@@ -1154,8 +1157,11 @@ def to_csv(
         csv_formatter.save()
 
         if created_buffer:
-            assert isinstance(path_or_buf, StringIO)
             content = path_or_buf.getvalue()
+            if isinstance(path_or_buf, BytesIO):
+                # Need to decode into string since the
+                # pyarrow engine only writes binary data
+                content = content.decode("utf-8")
             path_or_buf.close()
             return content
 
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
@@ -546,7 +546,8 @@ def test_to_csv_write_to_open_file(self, engine):
 z
 """
         with tm.ensure_clean("test.txt") as path:
-            with open(path, "w", encoding="utf-8") as f:
+            # TODO: open in bytes mode for pyarrow
+            with open(path, encoding="utf-8") as f:
                 f.write("manual header\n")
                 df.to_csv(f, header=None, index=None, engine=engine)
             with open(path, encoding="utf-8") as f:
@@ -559,6 +560,7 @@ def test_to_csv_write_to_open_file_with_newline_py3(self, engine):
         expected_rows = ["x", "y", "z"]
         expected = "manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows)
         with tm.ensure_clean("test.txt") as path:
+            # TODO: Open in bytes mode for pyarrow
             with open(path, "w", newline="", encoding="utf-8") as f:
                 f.write("manual header\n")
                 df.to_csv(f, header=None, index=None, engine=engine)