From c8c5dccf36f650644f7f10bd819c769eac26ea95 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 21 Jan 2022 13:01:16 +0100 Subject: [PATCH] REGR: fix period dtype <-> arrow roundtrip for pyarrow < 4 --- pandas/core/internals/api.py | 7 +++++++ pandas/tests/arrays/period/test_arrow_compat.py | 11 ----------- pandas/tests/io/test_feather.py | 8 +------- pandas/tests/io/test_parquet.py | 12 +----------- 4 files changed, 9 insertions(+), 29 deletions(-) diff --git a/pandas/core/internals/api.py b/pandas/core/internals/api.py index 537ae8f2a4320..371f1e2443b36 100644 --- a/pandas/core/internals/api.py +++ b/pandas/core/internals/api.py @@ -24,6 +24,7 @@ from pandas.core.internals.blocks import ( Block, DatetimeTZBlock, + ExtensionBlock, check_ndim, ensure_block_shape, extract_pandas_array, @@ -51,6 +52,12 @@ def make_block( values, dtype = extract_pandas_array(values, dtype, ndim) + if klass is ExtensionBlock and is_period_dtype(values.dtype): + # GH-44681 changed PeriodArray to be stored in the 2D + # NDArrayBackedExtensionBlock instead of ExtensionBlock + # -> still allow ExtensionBlock to be passed in this case for back compat + klass = None + if klass is None: dtype = dtype or values.dtype klass = get_block_type(dtype) diff --git a/pandas/tests/arrays/period/test_arrow_compat.py b/pandas/tests/arrays/period/test_arrow_compat.py index bc557f2e119d5..560299a4a47f5 100644 --- a/pandas/tests/arrays/period/test_arrow_compat.py +++ b/pandas/tests/arrays/period/test_arrow_compat.py @@ -1,7 +1,5 @@ import pytest -from pandas.compat import pa_version_under4p0 - from pandas.core.dtypes.dtypes import PeriodDtype import pandas as pd @@ -71,9 +69,6 @@ def test_arrow_array_missing(): assert result.storage.equals(expected) -@pytest.mark.xfail( - pa_version_under4p0, reason="pyarrow incorrectly uses pandas internals API" -) def test_arrow_table_roundtrip(): from pandas.core.arrays._arrow_utils import ArrowPeriodType @@ -93,9 +88,6 @@ def test_arrow_table_roundtrip(): tm.assert_frame_equal(result, expected) -@pytest.mark.xfail( - pa_version_under4p0, reason="pyarrow incorrectly uses pandas internals API" -) def test_arrow_load_from_zero_chunks(): # GH-41040 @@ -114,9 +106,6 @@ def test_arrow_load_from_zero_chunks(): tm.assert_frame_equal(result, df) -@pytest.mark.xfail( - pa_version_under4p0, reason="pyarrow incorrectly uses pandas internals API" -) def test_arrow_table_roundtrip_without_metadata(): arr = PeriodArray([1, 2, 3], freq="H") arr[1] = pd.NaT diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 4fcb43c1c3e47..df858070f698a 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -2,8 +2,6 @@ import numpy as np import pytest -from pandas.compat.pyarrow import pa_version_under4p0 - import pandas as pd import pandas._testing as tm @@ -87,11 +85,7 @@ def test_basic(self): ), } ) - if not pa_version_under4p0: - # older pyarrow incorrectly uses pandas internal API, so - # constructs invalid Block - df["periods"] = pd.period_range("2013", freq="M", periods=3) - + df["periods"] = pd.period_range("2013", freq="M", periods=3) df["timedeltas"] = pd.timedelta_range("1 day", periods=3) df["intervals"] = pd.interval_range(0, 3, 3) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index decdf02dd3072..b60ff00f9d59e 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -15,7 +15,6 @@ from pandas.compat.pyarrow import ( pa_version_under2p0, - pa_version_under4p0, pa_version_under5p0, pa_version_under6p0, ) @@ -652,13 +651,7 @@ def test_use_nullable_dtypes(self, engine, request): "object", "datetime64[ns, UTC]", "float", - pytest.param( - "period[D]", - marks=pytest.mark.xfail( - pa_version_under4p0, - reason="pyarrow uses pandas internal API incorrectly", - ), - ), + "period[D]", "Float64", "string", ], @@ -897,9 +890,6 @@ def test_pyarrow_backed_string_array(self, pa, string_storage): check_round_trip(df, pa, expected=df.astype(f"string[{string_storage}]")) @td.skip_if_no("pyarrow") - @pytest.mark.xfail( - pa_version_under4p0, reason="pyarrow uses pandas internal API incorrectly" - ) def test_additional_extension_types(self, pa): # test additional ExtensionArrays that are supported through the # __arrow_array__ protocol + by defining a custom ExtensionType