@@ -103,19 +103,27 @@ def __init__(self):
103
103
self .api = pyarrow
104
104
105
105
def write (self , df , path , compression = 'snappy' ,
106
- coerce_timestamps = 'ms' , ** kwargs ):
106
+ coerce_timestamps = 'ms' , index = None , ** kwargs ):
107
107
self .validate_dataframe (df )
108
- if self ._pyarrow_lt_070 :
108
+
109
+ # Only validate the index if we're writing it.
110
+ if self ._pyarrow_lt_070 and index is not False :
109
111
self ._validate_write_lt_070 (df )
110
112
path , _ , _ , _ = get_filepath_or_buffer (path , mode = 'wb' )
111
113
114
+ if index is None :
115
+ from_pandas_kwargs = {}
116
+ else :
117
+ from_pandas_kwargs = {'preserve_index' : index }
118
+
112
119
if self ._pyarrow_lt_060 :
113
- table = self .api .Table .from_pandas (df , timestamps_to_ms = True )
120
+ table = self .api .Table .from_pandas (df , timestamps_to_ms = True ,
121
+ ** from_pandas_kwargs )
114
122
self .api .parquet .write_table (
115
123
table , path , compression = compression , ** kwargs )
116
124
117
125
else :
118
- table = self .api .Table .from_pandas (df )
126
+ table = self .api .Table .from_pandas (df , ** from_pandas_kwargs )
119
127
self .api .parquet .write_table (
120
128
table , path , compression = compression ,
121
129
coerce_timestamps = coerce_timestamps , ** kwargs )
@@ -197,7 +205,7 @@ def __init__(self):
197
205
)
198
206
self .api = fastparquet
199
207
200
- def write (self , df , path , compression = 'snappy' , ** kwargs ):
208
+ def write (self , df , path , compression = 'snappy' , index = None , ** kwargs ):
201
209
self .validate_dataframe (df )
202
210
# thriftpy/protocol/compact.py:339:
203
211
# DeprecationWarning: tostring() is deprecated.
@@ -214,8 +222,8 @@ def write(self, df, path, compression='snappy', **kwargs):
214
222
path , _ , _ , _ = get_filepath_or_buffer (path )
215
223
216
224
with catch_warnings (record = True ):
217
- self .api .write (path , df ,
218
- compression = compression , ** kwargs )
225
+ self .api .write (path , df , compression = compression ,
226
+ write_index = index , ** kwargs )
219
227
220
228
def read (self , path , columns = None , ** kwargs ):
221
229
if is_s3_url (path ):
@@ -234,7 +242,8 @@ def read(self, path, columns=None, **kwargs):
234
242
return parquet_file .to_pandas (columns = columns , ** kwargs )
235
243
236
244
237
- def to_parquet (df , path , engine = 'auto' , compression = 'snappy' , ** kwargs ):
245
+ def to_parquet (df , path , engine = 'auto' , compression = 'snappy' , index = None ,
246
+ ** kwargs ):
238
247
"""
239
248
Write a DataFrame to the parquet format.
240
249
@@ -250,11 +259,17 @@ def to_parquet(df, path, engine='auto', compression='snappy', **kwargs):
250
259
'pyarrow' is unavailable.
251
260
compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy'
252
261
Name of the compression to use. Use ``None`` for no compression.
262
+ index : bool, default None
263
+ If ``True``, include the dataframe's index(es) in the file output. If
264
+ ``False``, they will not be written to the file. If ``None``, the
265
+ engine's default behavior will be used.
266
+
267
+ .. versionadded 0.24.0
253
268
kwargs
254
269
Additional keyword arguments passed to the engine
255
270
"""
256
271
impl = get_engine (engine )
257
- return impl .write (df , path , compression = compression , ** kwargs )
272
+ return impl .write (df , path , compression = compression , index = index , ** kwargs )
258
273
259
274
260
275
def read_parquet (path , engine = 'auto' , columns = None , ** kwargs ):
0 commit comments