Skip to content

Commit 54ea0cd

Browse files
TST: Fix test_parquet failures for pyarrow 1.0 (#35814) (#35887)
Co-authored-by: Ali McMaster <[email protected]>
1 parent 4ed1492 commit 54ea0cd

File tree

1 file changed

+16
-6
lines changed

1 file changed

+16
-6
lines changed

pandas/tests/io/test_parquet.py

+16-6
Original file line numberDiff line numberDiff line change
@@ -557,13 +557,23 @@ def test_s3_roundtrip(self, df_compat, s3_resource, pa):
557557
@pytest.mark.parametrize("partition_col", [["A"], []])
558558
def test_s3_roundtrip_for_dir(self, df_compat, s3_resource, pa, partition_col):
559559
# GH #26388
560-
# https://github.com/apache/arrow/blob/master/python/pyarrow/tests/test_parquet.py#L2716
561-
# As per pyarrow partitioned columns become 'categorical' dtypes
562-
# and are added to back of dataframe on read
563-
564560
expected_df = df_compat.copy()
565-
if partition_col:
566-
expected_df[partition_col] = expected_df[partition_col].astype("category")
561+
562+
# GH #35791
563+
# read_table uses the new Arrow Datasets API since pyarrow 1.0.0
564+
# Previous behaviour was pyarrow partitioned columns become 'category' dtypes
565+
# These are added to back of dataframe on read. In new API category dtype is
566+
# only used if partition field is string.
567+
legacy_read_table = LooseVersion(pyarrow.__version__) < LooseVersion("1.0.0")
568+
if partition_col and legacy_read_table:
569+
partition_col_type = "category"
570+
else:
571+
partition_col_type = "int32"
572+
573+
expected_df[partition_col] = expected_df[partition_col].astype(
574+
partition_col_type
575+
)
576+
567577
check_round_trip(
568578
df_compat,
569579
pa,

0 commit comments

Comments
 (0)