|
2 | 2 | import datetime
|
3 | 3 | from distutils.version import LooseVersion
|
4 | 4 | import locale
|
| 5 | +from io import BytesIO |
5 | 6 | import os
|
6 | 7 | from warnings import catch_warnings
|
7 | 8 |
|
@@ -494,6 +495,50 @@ def test_s3_roundtrip(self, df_compat, s3_resource, pa):
|
494 | 495 | # GH #19134
|
495 | 496 | check_round_trip(df_compat, pa, path="s3://pandas-test/pyarrow.parquet")
|
496 | 497 |
|
| 498 | + @td.skip_if_no("s3fs") |
| 499 | + @pytest.mark.parametrize("partition_col", [["A"], []]) |
| 500 | + def test_s3_roundtrip_for_dir(self, df_compat, s3_resource, pa, partition_col): |
| 501 | + from pandas.io.s3 import get_fs as get_s3_fs |
| 502 | + |
| 503 | + # GH #26388 |
| 504 | + # https://github.com/apache/arrow/blob/master/python/pyarrow/tests/test_parquet.py#L2716 |
| 505 | + # As per pyarrow partitioned columns become 'categorical' dtypes |
| 506 | + # and are added to back of dataframe on read |
| 507 | + |
| 508 | + expected_df = df_compat.copy() |
| 509 | + if partition_col: |
| 510 | + expected_df[partition_col] = expected_df[partition_col].astype("category") |
| 511 | + check_round_trip( |
| 512 | + df_compat, |
| 513 | + pa, |
| 514 | + expected=expected_df, |
| 515 | + path="s3://pandas-test/parquet_dir", |
| 516 | + write_kwargs={ |
| 517 | + "partition_cols": partition_col, |
| 518 | + "compression": None, |
| 519 | + "filesystem": get_s3_fs(), |
| 520 | + }, |
| 521 | + check_like=True, |
| 522 | + repeat=1, |
| 523 | + ) |
| 524 | + |
| 525 | + @tm.network |
| 526 | + @td.skip_if_no("pyarrow") |
| 527 | + def test_parquet_read_from_url(self, df_compat): |
| 528 | + url = ( |
| 529 | + "https://raw.githubusercontent.com/pandas-dev/pandas/" |
| 530 | + "master/pandas/tests/io/data/parquet/simple.parquet" |
| 531 | + ) |
| 532 | + df = pd.read_parquet(url) |
| 533 | + tm.assert_frame_equal(df, df_compat) |
| 534 | + |
| 535 | + @td.skip_if_no("pyarrow") |
| 536 | + def test_read_file_like_obj_support(self, df_compat): |
| 537 | + buffer = BytesIO() |
| 538 | + df_compat.to_parquet(buffer) |
| 539 | + df_from_buf = pd.read_parquet(buffer) |
| 540 | + tm.assert_frame_equal(df_compat, df_from_buf) |
| 541 | + |
497 | 542 | def test_partition_cols_supported(self, pa, df_full):
|
498 | 543 | # GH #23283
|
499 | 544 | partition_cols = ["bool", "int"]
|
|
0 commit comments