20
20
import numpy as np
21
21
22
22
from pandas ._libs import writers as libwriters
23
+ from pandas .compat ._optional import import_optional_dependency
23
24
from pandas .util ._decorators import cache_readonly
24
25
25
26
from pandas .core .dtypes .generic import (
@@ -57,6 +58,7 @@ def __init__(
57
58
self ,
58
59
formatter : DataFrameFormatter ,
59
60
path_or_buf : FilePath | WriteBuffer [str ] | WriteBuffer [bytes ] = "" ,
61
+ engine : str = "python" ,
60
62
sep : str = "," ,
61
63
cols : Sequence [Hashable ] | None = None ,
62
64
index_label : IndexLabel | None = None ,
@@ -78,6 +80,7 @@ def __init__(
78
80
self .obj = self .fmt .frame
79
81
80
82
self .filepath_or_buffer = path_or_buf
83
+ self .engine = engine
81
84
self .encoding = encoding
82
85
self .compression : CompressionOptions = compression
83
86
self .mode = mode
@@ -252,22 +255,58 @@ def save(self) -> None:
252
255
storage_options = self .storage_options ,
253
256
) as handles :
254
257
# Note: self.encoding is irrelevant here
258
+ self ._save (handles .handle )
259
+
260
+ def _save_pyarrow (self , handle ) -> None :
261
+ pa = import_optional_dependency ("pyarrow" )
262
+ pa_csv = import_optional_dependency ("pyarrow.csv" )
263
+ # Convert index to column and rename name to empty string
264
+ # since we serialize the index as basically a column with no name
265
+ # TODO: this won't work for multi-indexes
266
+ obj = self .obj .reset_index (names = ["" ])
267
+
268
+ table = pa .Table .from_pandas (obj )
269
+
270
+ # Map quoting arg to pyarrow equivalents
271
+ pa_quoting = None
272
+ if self .quoting == csvlib .QUOTE_MINIMAL :
273
+ pa_quoting = "needed"
274
+ elif self .quoting == csvlib .QUOTE_ALL :
275
+ # TODO: Is this a 1-1 mapping?
276
+ # This doesn't quote nulls, check if Python does this
277
+ pa_quoting = "all_valid"
278
+ elif self .quoting == csvlib .QUOTE_NONE :
279
+ pa_quoting = "none"
280
+ else :
281
+ raise ValueError (
282
+ f"Quoting option { self .quoting } is not supported with engine='pyarrow'"
283
+ )
284
+
285
+ write_options = pa_csv .WriteOptions (
286
+ include_header = self ._need_to_save_header ,
287
+ batch_size = self .chunksize ,
288
+ delimiter = self .sep ,
289
+ quoting_style = pa_quoting ,
290
+ )
291
+ # pa_csv.write_csv(table, handle, write_options)
292
+ pa_csv .write_csv (table , self .filepath_or_buffer , write_options )
293
+
294
+ def _save (self , handle ) -> None :
295
+ if self .engine == "pyarrow" :
296
+ self ._save_pyarrow (handle )
297
+ else :
255
298
self .writer = csvlib .writer (
256
- handles . handle ,
299
+ handle ,
257
300
lineterminator = self .lineterminator ,
258
301
delimiter = self .sep ,
259
302
quoting = self .quoting ,
260
303
doublequote = self .doublequote ,
261
304
escapechar = self .escapechar ,
262
305
quotechar = self .quotechar ,
263
306
)
264
-
265
- self ._save ()
266
-
267
- def _save (self ) -> None :
268
- if self ._need_to_save_header :
269
- self ._save_header ()
270
- self ._save_body ()
307
+ if self ._need_to_save_header :
308
+ self ._save_header ()
309
+ self ._save_body ()
271
310
272
311
def _save_header (self ) -> None :
273
312
if not self .has_mi_columns or self ._has_aliases :
0 commit comments