Skip to content

Commit 10717de

Browse files
Backport PR #37304: TST: correct parquet test expected partition column dtype for pyarrow 2.0 (#37308)
Co-authored-by: Joris Van den Bossche <[email protected]>
1 parent 9fe01dd commit 10717de

File tree

1 file changed

+13
-9
lines changed

1 file changed

+13
-9
lines changed

pandas/tests/io/test_parquet.py

+13-9
Original file line numberDiff line numberDiff line change
@@ -563,16 +563,20 @@ def test_s3_roundtrip_for_dir(self, df_compat, s3_resource, pa, partition_col):
563563
# read_table uses the new Arrow Datasets API since pyarrow 1.0.0
564564
# Previous behaviour was pyarrow partitioned columns become 'category' dtypes
565565
# These are added to back of dataframe on read. In new API category dtype is
566-
# only used if partition field is string.
567-
legacy_read_table = LooseVersion(pyarrow.__version__) < LooseVersion("1.0.0")
568-
if partition_col and legacy_read_table:
569-
partition_col_type = "category"
570-
else:
571-
partition_col_type = "int32"
572-
573-
expected_df[partition_col] = expected_df[partition_col].astype(
574-
partition_col_type
566+
# only used if partition field is string, but this changed again to use
567+
# category dtype for all types (not only strings) in pyarrow 2.0.0
568+
pa10 = (LooseVersion(pyarrow.__version__) >= LooseVersion("1.0.0")) and (
569+
LooseVersion(pyarrow.__version__) < LooseVersion("2.0.0")
575570
)
571+
if partition_col:
572+
if pa10:
573+
partition_col_type = "int32"
574+
else:
575+
partition_col_type = "category"
576+
577+
expected_df[partition_col] = expected_df[partition_col].astype(
578+
partition_col_type
579+
)
576580

577581
check_round_trip(
578582
df_compat,

0 commit comments

Comments
 (0)