diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py index c501c06b93813..163934bee509c 100644 --- a/pandas/compat/pyarrow.py +++ b/pandas/compat/pyarrow.py @@ -19,6 +19,7 @@ pa_version_under17p0 = _palv < Version("17.0.0") pa_version_under18p0 = _palv < Version("18.0.0") pa_version_under19p0 = _palv < Version("19.0.0") + pa_version_under20p0 = _palv < Version("20.0.0") HAS_PYARROW = True except ImportError: pa_version_under10p1 = True @@ -32,4 +33,5 @@ pa_version_under17p0 = True pa_version_under18p0 = True pa_version_under19p0 = True + pa_version_under20p0 = True HAS_PYARROW = False diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 56a8e4c439164..78f39b649cb9a 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -18,6 +18,7 @@ pa_version_under15p0, pa_version_under17p0, pa_version_under19p0, + pa_version_under20p0, ) import pandas as pd @@ -1075,27 +1076,34 @@ def test_read_dtype_backend_pyarrow_config_index(self, pa): expected=expected, ) - @pytest.mark.xfail( - pa_version_under17p0, reason="pa.pandas_compat passes 'datetime64' to .astype" + @pytest.mark.parametrize( + "columns", + [ + [0, 1], + pytest.param( + [b"foo", b"bar"], + marks=pytest.mark.xfail( + pa_version_under20p0, + raises=NotImplementedError, + reason="https://github.com/apache/arrow/pull/44171", + ), + ), + pytest.param( + [ + datetime.datetime(2011, 1, 1, 0, 0), + datetime.datetime(2011, 1, 1, 1, 1), + ], + marks=pytest.mark.xfail( + pa_version_under17p0, + reason="pa.pandas_compat passes 'datetime64' to .astype", + ), + ), + ], ) - def test_columns_dtypes_not_invalid(self, pa): + def test_columns_dtypes_not_invalid(self, pa, columns): df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))}) - # numeric - df.columns = [0, 1] - check_round_trip(df, pa) - - # bytes - df.columns = [b"foo", b"bar"] - with pytest.raises(NotImplementedError, match="|S3"): - # Bytes fails on read_parquet - check_round_trip(df, pa) - - # python object - df.columns = [ - datetime.datetime(2011, 1, 1, 0, 0), - datetime.datetime(2011, 1, 1, 1, 1), - ] + df.columns = columns check_round_trip(df, pa) def test_empty_columns(self, pa):