Skip to content

Commit 8328120

Browse files
committed
pass more
1 parent 4b7f880 commit 8328120

File tree

4 files changed

+43
-18
lines changed

4 files changed

+43
-18
lines changed

pandas/core/generic.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -3729,7 +3729,7 @@ def to_csv(
37293729
header: bool_t | list[str] = True,
37303730
index: bool_t = True,
37313731
index_label: IndexLabel | None = None,
3732-
mode: str = "w",
3732+
mode: str | None = None,
37333733
encoding: str | None = None,
37343734
compression: CompressionOptions = "infer",
37353735
quoting: int | None = None,
@@ -3786,14 +3786,16 @@ def to_csv(
37863786
sequence should be given if the object uses MultiIndex. If
37873787
False do not print fields for index names. Use index_label=False
37883788
for easier importing in R.
3789-
mode : {{'w', 'x', 'a'}}, default 'w'
3789+
mode : {{'w', 'x', 'a'}}, default 'w' (Python engine) or 'wb' (Pyarrow engine)
37903790
Forwarded to either `open(mode=)` or `fsspec.open(mode=)` to control
37913791
the file opening. Typical values include:
37923792
37933793
- 'w', truncate the file first.
37943794
- 'x', exclusive creation, failing if the file already exists.
37953795
- 'a', append to the end of file if it exists.
37963796
3797+
NOTE: The pyarrow engine can only handle binary buffers.
3798+
37973799
encoding : str, optional
37983800
A string representing the encoding to use in the output file,
37993801
defaults to 'utf-8'. `encoding` is not supported if `path_or_buf`
@@ -3903,6 +3905,11 @@ def to_csv(
39033905
decimal=decimal,
39043906
)
39053907

3908+
if mode is None:
3909+
mode = "w"
3910+
if engine == "pyarrow":
3911+
mode += "b"
3912+
39063913
return DataFrameRenderer(formatter).to_csv(
39073914
path_or_buf,
39083915
engine=engine,

pandas/io/formats/csvs.py

+22-12
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import numpy as np
2121

2222
from pandas._libs import writers as libwriters
23+
from pandas.compat import pa_version_under11p0
2324
from pandas.compat._optional import import_optional_dependency
2425
from pandas.util._decorators import cache_readonly
2526

@@ -253,6 +254,8 @@ def save(self) -> None:
253254
errors=self.errors,
254255
compression=self.compression,
255256
storage_options=self.storage_options,
257+
# pyarrow engine exclusively writes bytes
258+
is_text=self.engine == "python",
256259
) as handles:
257260
# Note: self.encoding is irrelevant here
258261
self._save(handles.handle)
@@ -262,13 +265,17 @@ def _save_pyarrow(self, handle) -> None:
262265
pa_csv = import_optional_dependency("pyarrow.csv")
263266
# Convert index to column and rename name to empty string
264267
# since we serialize the index as basically a column with no name
265-
# TODO: this won't work for multi-indexes
266-
obj = self.obj.reset_index(names=[""])
268+
# TODO: this won't work for multi-indexes (without names)
269+
obj = self.obj
270+
if self.index:
271+
new_names = [
272+
label if label is not None else "" for label in self.obj.index.names
273+
]
274+
obj = self.obj.reset_index(names=new_names)
267275

268276
table = pa.Table.from_pandas(obj)
269277

270278
# Map quoting arg to pyarrow equivalents
271-
pa_quoting = None
272279
if self.quoting == csvlib.QUOTE_MINIMAL:
273280
pa_quoting = "needed"
274281
elif self.quoting == csvlib.QUOTE_ALL:
@@ -278,18 +285,21 @@ def _save_pyarrow(self, handle) -> None:
278285
elif self.quoting == csvlib.QUOTE_NONE:
279286
pa_quoting = "none"
280287
else:
281-
raise ValueError(
288+
raise NotImplementedError(
282289
f"Quoting option {self.quoting} is not supported with engine='pyarrow'"
283290
)
284291

285-
write_options = pa_csv.WriteOptions(
286-
include_header=self._need_to_save_header,
287-
batch_size=self.chunksize,
288-
delimiter=self.sep,
289-
quoting_style=pa_quoting,
290-
)
291-
# pa_csv.write_csv(table, handle, write_options)
292-
pa_csv.write_csv(table, self.filepath_or_buffer, write_options)
292+
kwargs = {
293+
"include_header": self._need_to_save_header,
294+
"batch_size": self.chunksize,
295+
"delimiter": self.sep,
296+
}
297+
298+
if not pa_version_under11p0:
299+
kwargs["quoting_style"] = pa_quoting
300+
301+
write_options = pa_csv.WriteOptions(**kwargs)
302+
pa_csv.write_csv(table, handle, write_options)
293303

294304
def _save(self, handle) -> None:
295305
if self.engine == "pyarrow":

pandas/io/formats/format.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@
1818
)
1919
from decimal import Decimal
2020
from functools import partial
21-
from io import StringIO
21+
from io import (
22+
BytesIO,
23+
StringIO,
24+
)
2225
import math
2326
import re
2427
from shutil import get_terminal_size
@@ -1127,7 +1130,7 @@ def to_csv(
11271130

11281131
if path_or_buf is None:
11291132
created_buffer = True
1130-
path_or_buf = StringIO()
1133+
path_or_buf = StringIO() if engine == "python" else BytesIO()
11311134
else:
11321135
created_buffer = False
11331136

@@ -1154,8 +1157,11 @@ def to_csv(
11541157
csv_formatter.save()
11551158

11561159
if created_buffer:
1157-
assert isinstance(path_or_buf, StringIO)
11581160
content = path_or_buf.getvalue()
1161+
if isinstance(path_or_buf, BytesIO):
1162+
# Need to decode into string since the
1163+
# pyarrow engine only writes binary data
1164+
content = content.decode("utf-8")
11591165
path_or_buf.close()
11601166
return content
11611167

pandas/tests/io/formats/test_to_csv.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,8 @@ def test_to_csv_write_to_open_file(self, engine):
546546
z
547547
"""
548548
with tm.ensure_clean("test.txt") as path:
549-
with open(path, "w", encoding="utf-8") as f:
549+
# TODO: open in bytes mode for pyarrow
550+
with open(path, encoding="utf-8") as f:
550551
f.write("manual header\n")
551552
df.to_csv(f, header=None, index=None, engine=engine)
552553
with open(path, encoding="utf-8") as f:
@@ -559,6 +560,7 @@ def test_to_csv_write_to_open_file_with_newline_py3(self, engine):
559560
expected_rows = ["x", "y", "z"]
560561
expected = "manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows)
561562
with tm.ensure_clean("test.txt") as path:
563+
# TODO: Open in bytes mode for pyarrow
562564
with open(path, "w", newline="", encoding="utf-8") as f:
563565
f.write("manual header\n")
564566
df.to_csv(f, header=None, index=None, engine=engine)

0 commit comments

Comments
 (0)