Skip to content

Commit d3d74c5

Browse files
authored
TST: Fix test_parquet failures for pyarrow 1.0 (#35814)
1 parent 8607a93 commit d3d74c5

File tree

1 file changed

+15
-8
lines changed

1 file changed

+15
-8
lines changed

pandas/tests/io/test_parquet.py

+15-8
Original file line numberDiff line numberDiff line change
@@ -565,15 +565,22 @@ def test_s3_roundtrip(self, df_compat, s3_resource, pa):
565565
@pytest.mark.parametrize("partition_col", [["A"], []])
566566
def test_s3_roundtrip_for_dir(self, df_compat, s3_resource, pa, partition_col):
567567
# GH #26388
568-
# https://github.com/apache/arrow/blob/master/python/pyarrow/tests/test_parquet.py#L2716
569-
# As per pyarrow partitioned columns become 'categorical' dtypes
570-
# and are added to back of dataframe on read
571-
if partition_col and pd.compat.is_platform_windows():
572-
pytest.skip("pyarrow/win incompatibility #35791")
573-
574568
expected_df = df_compat.copy()
575-
if partition_col:
576-
expected_df[partition_col] = expected_df[partition_col].astype("category")
569+
570+
# GH #35791
571+
# read_table uses the new Arrow Datasets API since pyarrow 1.0.0
572+
# Previous behaviour was pyarrow partitioned columns become 'category' dtypes
573+
# These are added to back of dataframe on read. In new API category dtype is
574+
# only used if partition field is string.
575+
legacy_read_table = LooseVersion(pyarrow.__version__) < LooseVersion("1.0.0")
576+
if partition_col and legacy_read_table:
577+
partition_col_type = "category"
578+
else:
579+
partition_col_type = "int32"
580+
581+
expected_df[partition_col] = expected_df[partition_col].astype(
582+
partition_col_type
583+
)
577584

578585
check_round_trip(
579586
df_compat,

0 commit comments

Comments
 (0)