diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 3a3ba99484a3a..4e0c16c71a6a8 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -565,15 +565,22 @@ def test_s3_roundtrip(self, df_compat, s3_resource, pa): @pytest.mark.parametrize("partition_col", [["A"], []]) def test_s3_roundtrip_for_dir(self, df_compat, s3_resource, pa, partition_col): # GH #26388 - # https://github.com/apache/arrow/blob/master/python/pyarrow/tests/test_parquet.py#L2716 - # As per pyarrow partitioned columns become 'categorical' dtypes - # and are added to back of dataframe on read - if partition_col and pd.compat.is_platform_windows(): - pytest.skip("pyarrow/win incompatibility #35791") - expected_df = df_compat.copy() - if partition_col: - expected_df[partition_col] = expected_df[partition_col].astype("category") + + # GH #35791 + # read_table uses the new Arrow Datasets API since pyarrow 1.0.0 + # Previous behaviour was pyarrow partitioned columns become 'category' dtypes + # These are added to back of dataframe on read. In new API category dtype is + # only used if partition field is string. + legacy_read_table = LooseVersion(pyarrow.__version__) < LooseVersion("1.0.0") + if partition_col and legacy_read_table: + partition_col_type = "category" + else: + partition_col_type = "int32" + + expected_df[partition_col] = expected_df[partition_col].astype( + partition_col_type + ) check_round_trip( df_compat,