Skip to content

Commit b17884b

Browse files
alimcmaster1jorisvandenbossche
authored andcommitted
Backport Test Only from PR pandas-dev#34500 on branch 1.0.x (REG: Fix read_parquet from file-like objects)
Co-authored-by: Joris Van den Bossche <[email protected]>
1 parent 29edbab commit b17884b

File tree

2 files changed

+45
-0
lines changed

2 files changed

+45
-0
lines changed
2.11 KB
Binary file not shown.

pandas/tests/io/test_parquet.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import datetime
33
from distutils.version import LooseVersion
44
import locale
5+
from io import BytesIO
56
import os
67
from warnings import catch_warnings
78

@@ -494,6 +495,50 @@ def test_s3_roundtrip(self, df_compat, s3_resource, pa):
494495
# GH #19134
495496
check_round_trip(df_compat, pa, path="s3://pandas-test/pyarrow.parquet")
496497

498+
@td.skip_if_no("s3fs")
499+
@pytest.mark.parametrize("partition_col", [["A"], []])
500+
def test_s3_roundtrip_for_dir(self, df_compat, s3_resource, pa, partition_col):
501+
from pandas.io.s3 import get_fs as get_s3_fs
502+
503+
# GH #26388
504+
# https://github.com/apache/arrow/blob/master/python/pyarrow/tests/test_parquet.py#L2716
505+
# As per pyarrow partitioned columns become 'categorical' dtypes
506+
# and are added to back of dataframe on read
507+
508+
expected_df = df_compat.copy()
509+
if partition_col:
510+
expected_df[partition_col] = expected_df[partition_col].astype("category")
511+
check_round_trip(
512+
df_compat,
513+
pa,
514+
expected=expected_df,
515+
path="s3://pandas-test/parquet_dir",
516+
write_kwargs={
517+
"partition_cols": partition_col,
518+
"compression": None,
519+
"filesystem": get_s3_fs(),
520+
},
521+
check_like=True,
522+
repeat=1,
523+
)
524+
525+
@tm.network
526+
@td.skip_if_no("pyarrow")
527+
def test_parquet_read_from_url(self, df_compat):
528+
url = (
529+
"https://raw.githubusercontent.com/pandas-dev/pandas/"
530+
"master/pandas/tests/io/data/parquet/simple.parquet"
531+
)
532+
df = pd.read_parquet(url)
533+
tm.assert_frame_equal(df, df_compat)
534+
535+
@td.skip_if_no("pyarrow")
536+
def test_read_file_like_obj_support(self, df_compat):
537+
buffer = BytesIO()
538+
df_compat.to_parquet(buffer)
539+
df_from_buf = pd.read_parquet(buffer)
540+
tm.assert_frame_equal(df_compat, df_from_buf)
541+
497542
def test_partition_cols_supported(self, pa, df_full):
498543
# GH #23283
499544
partition_cols = ["bool", "int"]

0 commit comments

Comments
 (0)