Backport PR #37304: TST: correct parquet test expected partition column dtype for pyarrow 2.0 (#37308)

meeseeksmachine · jorisvandenbossche · web-flow · commit 10717de6f166 · 2020-10-21T14:20:25.000+01:00
Co-authored-by: Joris Van den Bossche &lt;jorisvandenbossche@gmail.com&gt;
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
@@ -563,16 +563,20 @@ def test_s3_roundtrip_for_dir(self, df_compat, s3_resource, pa, partition_col):
         # read_table uses the new Arrow Datasets API since pyarrow 1.0.0
         # Previous behaviour was pyarrow partitioned columns become 'category' dtypes
         # These are added to back of dataframe on read. In new API category dtype is
-        # only used if partition field is string.
-        legacy_read_table = LooseVersion(pyarrow.__version__) < LooseVersion("1.0.0")
-        if partition_col and legacy_read_table:
-            partition_col_type = "category"
-        else:
-            partition_col_type = "int32"
-
-        expected_df[partition_col] = expected_df[partition_col].astype(
-            partition_col_type
+        # only used if partition field is string, but this changed again to use
+        # category dtype for all types (not only strings) in pyarrow 2.0.0
+        pa10 = (LooseVersion(pyarrow.__version__) >= LooseVersion("1.0.0")) and (
+            LooseVersion(pyarrow.__version__) < LooseVersion("2.0.0")
         )
+        if partition_col:
+            if pa10:
+                partition_col_type = "int32"
+            else:
+                partition_col_type = "category"
+
+            expected_df[partition_col] = expected_df[partition_col].astype(
+                partition_col_type
+            )
 
         check_round_trip(
             df_compat,