TST: Fix test_parquet failures for pyarrow 1.0 (#35814)

alimcmaster1 · web-flow · commit d3d74c590e25 · 2020-08-25T08:41:48.000+02:00
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
@@ -565,15 +565,22 @@ def test_s3_roundtrip(self, df_compat, s3_resource, pa):
     @pytest.mark.parametrize("partition_col", [["A"], []])
     def test_s3_roundtrip_for_dir(self, df_compat, s3_resource, pa, partition_col):
         # GH #26388
-        # https://github.com/apache/arrow/blob/master/python/pyarrow/tests/test_parquet.py#L2716
-        # As per pyarrow partitioned columns become 'categorical' dtypes
-        # and are added to back of dataframe on read
-        if partition_col and pd.compat.is_platform_windows():
-            pytest.skip("pyarrow/win incompatibility #35791")
-
         expected_df = df_compat.copy()
-        if partition_col:
-            expected_df[partition_col] = expected_df[partition_col].astype("category")
+
+        # GH #35791
+        # read_table uses the new Arrow Datasets API since pyarrow 1.0.0
+        # Previous behaviour was pyarrow partitioned columns become 'category' dtypes
+        # These are added to back of dataframe on read. In new API category dtype is
+        # only used if partition field is string.
+        legacy_read_table = LooseVersion(pyarrow.__version__) < LooseVersion("1.0.0")
+        if partition_col and legacy_read_table:
+            partition_col_type = "category"
+        else:
+            partition_col_type = "int32"
+
+        expected_df[partition_col] = expected_df[partition_col].astype(
+            partition_col_type
+        )
 
         check_round_trip(
             df_compat,