diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 002271ead1e38..8d3d4cc347019 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -614,16 +614,20 @@ def test_s3_roundtrip_for_dir( # read_table uses the new Arrow Datasets API since pyarrow 1.0.0 # Previous behaviour was pyarrow partitioned columns become 'category' dtypes # These are added to back of dataframe on read. In new API category dtype is - # only used if partition field is string. - legacy_read_table = LooseVersion(pyarrow.__version__) < LooseVersion("1.0.0") - if partition_col and legacy_read_table: - partition_col_type = "category" - else: - partition_col_type = "int32" - - expected_df[partition_col] = expected_df[partition_col].astype( - partition_col_type + # only used if partition field is string, but this changed again to use + # category dtype for all types (not only strings) in pyarrow 2.0.0 + pa10 = (LooseVersion(pyarrow.__version__) >= LooseVersion("1.0.0")) and ( + LooseVersion(pyarrow.__version__) < LooseVersion("2.0.0") ) + if partition_col: + if pa10: + partition_col_type = "int32" + else: + partition_col_type = "category" + + expected_df[partition_col] = expected_df[partition_col].astype( + partition_col_type + ) check_round_trip( df_compat,