Skip to content

Commit 1c450d7

Browse files
TST: correct parquet test expected partition column dtype for pyarrow 2.0 (#37304)
1 parent c23ff03 commit 1c450d7

File tree

1 file changed

+13
-9
lines changed

1 file changed

+13
-9
lines changed

pandas/tests/io/test_parquet.py

+13-9
Original file line numberDiff line numberDiff line change
@@ -614,16 +614,20 @@ def test_s3_roundtrip_for_dir(
614614
# read_table uses the new Arrow Datasets API since pyarrow 1.0.0
615615
# Previous behaviour was pyarrow partitioned columns become 'category' dtypes
616616
# These are added to back of dataframe on read. In new API category dtype is
617-
# only used if partition field is string.
618-
legacy_read_table = LooseVersion(pyarrow.__version__) < LooseVersion("1.0.0")
619-
if partition_col and legacy_read_table:
620-
partition_col_type = "category"
621-
else:
622-
partition_col_type = "int32"
623-
624-
expected_df[partition_col] = expected_df[partition_col].astype(
625-
partition_col_type
617+
# only used if partition field is string, but this changed again to use
618+
# category dtype for all types (not only strings) in pyarrow 2.0.0
619+
pa10 = (LooseVersion(pyarrow.__version__) >= LooseVersion("1.0.0")) and (
620+
LooseVersion(pyarrow.__version__) < LooseVersion("2.0.0")
626621
)
622+
if partition_col:
623+
if pa10:
624+
partition_col_type = "int32"
625+
else:
626+
partition_col_type = "category"
627+
628+
expected_df[partition_col] = expected_df[partition_col].astype(
629+
partition_col_type
630+
)
627631

628632
check_round_trip(
629633
df_compat,

0 commit comments

Comments
 (0)