@@ -204,6 +204,22 @@ def test_cross_engine_fp_pa(df_cross_compat, pa, fp):
204
204
tm .assert_frame_equal (result , df [['a' , 'd' ]])
205
205
206
206
207
+ def check_round_trip_equals (df , path , engine ,
208
+ write_kwargs , read_kwargs ,
209
+ expected , check_names ):
210
+
211
+ df .to_parquet (path , engine , ** write_kwargs )
212
+ actual = read_parquet (path , engine , ** read_kwargs )
213
+ tm .assert_frame_equal (expected , actual ,
214
+ check_names = check_names )
215
+
216
+ # repeat
217
+ df .to_parquet (path , engine , ** write_kwargs )
218
+ actual = read_parquet (path , engine , ** read_kwargs )
219
+ tm .assert_frame_equal (expected , actual ,
220
+ check_names = check_names )
221
+
222
+
207
223
class Base (object ):
208
224
209
225
def check_error_on_write (self , df , engine , exc ):
@@ -212,28 +228,32 @@ def check_error_on_write(self, df, engine, exc):
212
228
with tm .ensure_clean () as path :
213
229
to_parquet (df , path , engine , compression = None )
214
230
215
- def check_round_trip (self , df , engine , expected = None ,
231
+ def check_round_trip (self , df , engine , expected = None , path = None ,
216
232
write_kwargs = None , read_kwargs = None ,
217
233
check_names = True ):
234
+
218
235
if write_kwargs is None :
219
- write_kwargs = {}
236
+ write_kwargs = {'compression' : None }
237
+
220
238
if read_kwargs is None :
221
239
read_kwargs = {}
222
- with tm .ensure_clean () as path :
223
- df .to_parquet (path , engine , ** write_kwargs )
224
- result = read_parquet (path , engine , ** read_kwargs )
225
240
226
- if expected is None :
227
- expected = df
228
- tm .assert_frame_equal (result , expected , check_names = check_names )
229
-
230
- # repeat
231
- to_parquet (df , path , engine , ** write_kwargs )
232
- result = pd .read_parquet (path , engine , ** read_kwargs )
241
+ if expected is None :
242
+ expected = df
233
243
234
- if expected is None :
235
- expected = df
236
- tm .assert_frame_equal (result , expected , check_names = check_names )
244
+ if path is None :
245
+ with tm .ensure_clean () as path :
246
+ check_round_trip_equals (df , path , engine ,
247
+ write_kwargs = write_kwargs ,
248
+ read_kwargs = read_kwargs ,
249
+ expected = expected ,
250
+ check_names = check_names )
251
+ else :
252
+ check_round_trip_equals (df , path , engine ,
253
+ write_kwargs = write_kwargs ,
254
+ read_kwargs = read_kwargs ,
255
+ expected = expected ,
256
+ check_names = check_names )
237
257
238
258
239
259
class TestBasic (Base ):
@@ -251,7 +271,7 @@ def test_columns_dtypes(self, engine):
251
271
252
272
# unicode
253
273
df .columns = [u'foo' , u'bar' ]
254
- self .check_round_trip (df , engine , write_kwargs = { 'compression' : None } )
274
+ self .check_round_trip (df , engine )
255
275
256
276
def test_columns_dtypes_invalid (self , engine ):
257
277
@@ -292,7 +312,6 @@ def test_read_columns(self, engine):
292
312
293
313
expected = pd .DataFrame ({'string' : list ('abc' )})
294
314
self .check_round_trip (df , engine , expected = expected ,
295
- write_kwargs = {'compression' : None },
296
315
read_kwargs = {'columns' : ['string' ]})
297
316
298
317
def test_write_index (self , engine ):
@@ -304,7 +323,7 @@ def test_write_index(self, engine):
304
323
pytest .skip ("pyarrow is < 0.7.0" )
305
324
306
325
df = pd .DataFrame ({'A' : [1 , 2 , 3 ]})
307
- self .check_round_trip (df , engine , write_kwargs = { 'compression' : None } )
326
+ self .check_round_trip (df , engine )
308
327
309
328
indexes = [
310
329
[2 , 3 , 4 ],
@@ -315,15 +334,12 @@ def test_write_index(self, engine):
315
334
# non-default index
316
335
for index in indexes :
317
336
df .index = index
318
- self .check_round_trip (
319
- df , engine ,
320
- write_kwargs = {'compression' : None },
321
- check_names = check_names )
337
+ self .check_round_trip (df , engine , check_names = check_names )
322
338
323
339
# index with meta-data
324
340
df .index = [0 , 1 , 2 ]
325
341
df .index .name = 'foo'
326
- self .check_round_trip (df , engine , write_kwargs = { 'compression' : None } )
342
+ self .check_round_trip (df , engine )
327
343
328
344
def test_write_multiindex (self , pa_ge_070 ):
329
345
# Not suppoprted in fastparquet as of 0.1.3 or older pyarrow version
@@ -332,7 +348,7 @@ def test_write_multiindex(self, pa_ge_070):
332
348
df = pd .DataFrame ({'A' : [1 , 2 , 3 ]})
333
349
index = pd .MultiIndex .from_tuples ([('a' , 1 ), ('a' , 2 ), ('b' , 1 )])
334
350
df .index = index
335
- self .check_round_trip (df , engine , write_kwargs = { 'compression' : None } )
351
+ self .check_round_trip (df , engine )
336
352
337
353
def test_write_column_multiindex (self , engine ):
338
354
# column multi-index
@@ -426,6 +442,11 @@ def test_categorical_unsupported(self, pa_lt_070):
426
442
df = pd .DataFrame ({'a' : pd .Categorical (list ('abc' ))})
427
443
self .check_error_on_write (df , pa , NotImplementedError )
428
444
445
+ def test_s3_roundtrip (self , df_compat , s3_resource , pa ):
446
+ # GH #19134
447
+ self .check_round_trip (df_compat , pa ,
448
+ path = 's3://pandas-test/pyarrow.parquet' )
449
+
429
450
430
451
class TestParquetFastParquet (Base ):
431
452
@@ -436,7 +457,7 @@ def test_basic(self, fp, df_full):
436
457
# additional supported types for fastparquet
437
458
df ['timedelta' ] = pd .timedelta_range ('1 day' , periods = 3 )
438
459
439
- self .check_round_trip (df , fp , write_kwargs = { 'compression' : None } )
460
+ self .check_round_trip (df , fp )
440
461
441
462
@pytest .mark .skip (reason = "not supported" )
442
463
def test_duplicate_columns (self , fp ):
@@ -449,8 +470,7 @@ def test_duplicate_columns(self, fp):
449
470
def test_bool_with_none (self , fp ):
450
471
df = pd .DataFrame ({'a' : [True , None , False ]})
451
472
expected = pd .DataFrame ({'a' : [1.0 , np .nan , 0.0 ]}, dtype = 'float16' )
452
- self .check_round_trip (df , fp , expected = expected ,
453
- write_kwargs = {'compression' : None })
473
+ self .check_round_trip (df , fp , expected = expected )
454
474
455
475
def test_unsupported (self , fp ):
456
476
@@ -466,7 +486,7 @@ def test_categorical(self, fp):
466
486
if LooseVersion (fastparquet .__version__ ) < LooseVersion ("0.1.3" ):
467
487
pytest .skip ("CategoricalDtype not supported for older fp" )
468
488
df = pd .DataFrame ({'a' : pd .Categorical (list ('abc' ))})
469
- self .check_round_trip (df , fp , write_kwargs = { 'compression' : None } )
489
+ self .check_round_trip (df , fp )
470
490
471
491
def test_datetime_tz (self , fp ):
472
492
# doesn't preserve tz
@@ -475,8 +495,7 @@ def test_datetime_tz(self, fp):
475
495
476
496
# warns on the coercion
477
497
with catch_warnings (record = True ):
478
- self .check_round_trip (df , fp , df .astype ('datetime64[ns]' ),
479
- write_kwargs = {'compression' : None })
498
+ self .check_round_trip (df , fp , df .astype ('datetime64[ns]' ))
480
499
481
500
def test_filter_row_groups (self , fp ):
482
501
d = {'a' : list (range (0 , 3 ))}
@@ -486,3 +505,8 @@ def test_filter_row_groups(self, fp):
486
505
row_group_offsets = 1 )
487
506
result = read_parquet (path , fp , filters = [('a' , '==' , 0 )])
488
507
assert len (result ) == 1
508
+
509
+ def test_s3_roundtrip (self , df_compat , s3_resource , fp ):
510
+ # GH #19134
511
+ self .check_round_trip (df_compat , fp ,
512
+ path = 's3://pandas-test/fastparquet.parquet' )
0 commit comments