TST: Fix test_parquet failures for pyarrow 1.0 (#35814) (#35887)

jorisvandenbossche · alimcmaster1 · web-flow · commit 54ea0cd9b23f · 2020-08-25T11:04:01.000+01:00
Co-authored-by: Ali McMaster &lt;alimcmaster1@gmail.com&gt;
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
@@ -557,13 +557,23 @@ def test_s3_roundtrip(self, df_compat, s3_resource, pa):
     @pytest.mark.parametrize("partition_col", [["A"], []])
     def test_s3_roundtrip_for_dir(self, df_compat, s3_resource, pa, partition_col):
         # GH #26388
-        # https://github.com/apache/arrow/blob/master/python/pyarrow/tests/test_parquet.py#L2716
-        # As per pyarrow partitioned columns become 'categorical' dtypes
-        # and are added to back of dataframe on read
-
         expected_df = df_compat.copy()
-        if partition_col:
-            expected_df[partition_col] = expected_df[partition_col].astype("category")
+
+        # GH #35791
+        # read_table uses the new Arrow Datasets API since pyarrow 1.0.0
+        # Previous behaviour was pyarrow partitioned columns become 'category' dtypes
+        # These are added to back of dataframe on read. In new API category dtype is
+        # only used if partition field is string.
+        legacy_read_table = LooseVersion(pyarrow.__version__) < LooseVersion("1.0.0")
+        if partition_col and legacy_read_table:
+            partition_col_type = "category"
+        else:
+            partition_col_type = "int32"
+
+        expected_df[partition_col] = expected_df[partition_col].astype(
+            partition_col_type
+        )
+
         check_round_trip(
             df_compat,
             pa,