From 2c984c272ec2bb0a8b9ea900dede2d0535137428 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 15 Jun 2022 18:08:33 -0700 Subject: [PATCH 01/11] ENH/TST: Add BaseInterfaceTests tests for ArrowExtensionArray --- pandas/core/arrays/arrow/array.py | 40 +++++++++++++++++++++++++++- pandas/core/arrays/string_arrow.py | 40 +--------------------------- pandas/tests/extension/test_arrow.py | 8 +++++- 3 files changed, 47 insertions(+), 41 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 1f35013075751..bfd615ff782f9 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -31,6 +31,7 @@ ) from pandas.core.dtypes.missing import isna +from pandas.core.arraylike import OpsMixin from pandas.core.arrays.base import ExtensionArray from pandas.core.indexers import ( check_array_indexer, @@ -45,13 +46,22 @@ from pandas.core.arrays.arrow._arrow_utils import fallback_performancewarning from pandas.core.arrays.arrow.dtype import ArrowDtype + ARROW_CMP_FUNCS = { + "eq": pc.equal, + "ne": pc.not_equal, + "lt": pc.less, + "gt": pc.greater, + "le": pc.less_equal, + "ge": pc.greater_equal, + } + if TYPE_CHECKING: from pandas import Series ArrowExtensionArrayT = TypeVar("ArrowExtensionArrayT", bound="ArrowExtensionArray") -class ArrowExtensionArray(ExtensionArray): +class ArrowExtensionArray(OpsMixin, ExtensionArray): """ Base class for ExtensionArray backed by Arrow ChunkedArray. """ @@ -179,6 +189,34 @@ def __arrow_array__(self, type=None): """Convert myself to a pyarrow ChunkedArray.""" return self._data + def _cmp_method(self, other, op): + from pandas.arrays import BooleanArray + + pc_func = ARROW_CMP_FUNCS[op.__name__] + if isinstance(other, ArrowExtensionArray): + result = pc_func(self._data, other._data) + elif isinstance(other, (np.ndarray, list)): + result = pc_func(self._data, other) + elif is_scalar(other): + try: + result = pc_func(self._data, pa.scalar(other)) + except (pa.lib.ArrowNotImplementedError, pa.lib.ArrowInvalid): + mask = isna(self) | isna(other) + valid = ~mask + result = np.zeros(len(self), dtype="bool") + result[valid] = op(np.array(self)[valid], other) + return BooleanArray(result, mask) + else: + return NotImplementedError( + f"{op.__name__} not implemented for {type(other)}" + ) + + if pa_version_under2p0: + result = result.to_pandas().values + else: + result = result.to_numpy() + return BooleanArray._from_sequence(result) + def equals(self, other) -> bool: if not isinstance(other, ArrowExtensionArray): return False diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index a07f748fa0c8c..c4d1a35315d7d 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -34,7 +34,6 @@ ) from pandas.core.dtypes.missing import isna -from pandas.core.arraylike import OpsMixin from pandas.core.arrays.arrow import ArrowExtensionArray from pandas.core.arrays.boolean import BooleanDtype from pandas.core.arrays.integer import Int64Dtype @@ -51,15 +50,6 @@ from pandas.core.arrays.arrow._arrow_utils import fallback_performancewarning - ARROW_CMP_FUNCS = { - "eq": pc.equal, - "ne": pc.not_equal, - "lt": pc.less, - "gt": pc.greater, - "le": pc.less_equal, - "ge": pc.greater_equal, - } - ArrowStringScalarOrNAT = Union[str, libmissing.NAType] @@ -74,9 +64,7 @@ def _chk_pyarrow_available() -> None: # fallback for the ones that pyarrow doesn't yet support -class ArrowStringArray( - OpsMixin, ArrowExtensionArray, BaseStringArray, ObjectStringArrayMixin -): +class ArrowStringArray(ArrowExtensionArray, BaseStringArray, ObjectStringArrayMixin): """ Extension array for string data in a ``pyarrow.ChunkedArray``. @@ -190,32 +178,6 @@ def to_numpy( result[mask] = na_value return result - def _cmp_method(self, other, op): - from pandas.arrays import BooleanArray - - pc_func = ARROW_CMP_FUNCS[op.__name__] - if isinstance(other, ArrowStringArray): - result = pc_func(self._data, other._data) - elif isinstance(other, (np.ndarray, list)): - result = pc_func(self._data, other) - elif is_scalar(other): - try: - result = pc_func(self._data, pa.scalar(other)) - except (pa.lib.ArrowNotImplementedError, pa.lib.ArrowInvalid): - mask = isna(self) | isna(other) - valid = ~mask - result = np.zeros(len(self), dtype="bool") - result[valid] = op(np.array(self)[valid], other) - return BooleanArray(result, mask) - else: - return NotImplemented - - if pa_version_under2p0: - result = result.to_pandas().values - else: - result = result.to_numpy() - return BooleanArray._from_sequence(result) - def insert(self, loc: int, item): if not isinstance(item, str) and item is not libmissing.NA: raise TypeError("Scalar must be NA or str") diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 03616267c3f86..c7691a438c372 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -34,7 +34,7 @@ from pandas.core.arrays.arrow.dtype import ArrowDtype # isort:skip -@pytest.fixture(params=tm.ALL_PYARROW_DTYPES) +@pytest.fixture(params=tm.ALL_PYARROW_DTYPES, ids=str) def dtype(request): return ArrowDtype(pyarrow_dtype=request.param) @@ -201,6 +201,12 @@ class TestBaseIndex(base.BaseIndexTests): pass +class TestBaseInterface(base.BaseInterfaceTests): + @pytest.mark.xfail(reason="pyarrow.ChunkedArray does not support views.") + def test_view(self, data): + super().test_view(data) + + def test_arrowdtype_construct_from_string_type_with_parameters(): with pytest.raises(NotImplementedError, match="Passing pyarrow type"): ArrowDtype.construct_from_string("timestamp[s][pyarrow]") From a766ed11eb5b958a1bde24c7b5b8c5e3dec1d0c0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 15 Jun 2022 20:50:32 -0700 Subject: [PATCH 02/11] Fix mock ArrowExtensionArray --- pandas/tests/extension/arrow/arrays.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py index 22595c4e461d7..26b94ebe5a8da 100644 --- a/pandas/tests/extension/arrow/arrays.py +++ b/pandas/tests/extension/arrow/arrays.py @@ -23,7 +23,6 @@ take, ) from pandas.api.types import is_scalar -from pandas.core.arraylike import OpsMixin from pandas.core.arrays.arrow import ArrowExtensionArray as _ArrowExtensionArray from pandas.core.construction import extract_array @@ -72,7 +71,7 @@ def construct_array_type(cls) -> type_t[ArrowStringArray]: return ArrowStringArray -class ArrowExtensionArray(OpsMixin, _ArrowExtensionArray): +class ArrowExtensionArray(_ArrowExtensionArray): _data: pa.ChunkedArray @classmethod From e09ac0a3d25fcfab80f6e8ba61b510938599a3c3 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 16 Jun 2022 15:16:23 -0700 Subject: [PATCH 03/11] Pyarrow compat --- pandas/tests/extension/test_arrow.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index c7691a438c372..2c3633abb6127 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -202,6 +202,20 @@ class TestBaseIndex(base.BaseIndexTests): class TestBaseInterface(base.BaseInterfaceTests): + def test_contains(self, data, data_missing, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + unit = getattr(data.dtype.pyarrow_dtype, "unit", None) + if pa_version_under2p0 and tz not in (None, "UTC") and unit == "us": + request.node.add_marker( + pytest.mark.xfail( + reason=( + f"Not supported by pyarrow < 2.0 " + f"with timestamp type {tz} and {unit}" + ) + ) + ) + super().test_contains(data, data_missing) + @pytest.mark.xfail(reason="pyarrow.ChunkedArray does not support views.") def test_view(self, data): super().test_view(data) From 2c06f2641a92f1f82a213542251da37ae99d5a33 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 21 Jun 2022 16:48:20 -0700 Subject: [PATCH 04/11] Add BaseMissingTests too --- pandas/core/arrays/arrow/array.py | 2 +- pandas/tests/extension/test_arrow.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index bfd615ff782f9..b2a8ec6bf62e8 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -619,7 +619,7 @@ def _replace_with_indices( # fast path for a contiguous set of indices arrays = [ chunk[:start], - pa.array(value, type=chunk.type), + pa.array(value, type=chunk.type, from_pandas=True), chunk[stop + 1 :], ] arrays = [arr for arr in arrays if len(arr)] diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 5598bccfd2ba0..b701f5aae9580 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -93,6 +93,18 @@ def data_missing(data): return type(data)._from_sequence([None, data[0]]) +@pytest.fixture(params=["data", "data_missing"]) +def all_data(request, data, data_missing): + """Parametrized fixture returning 'data' or 'data_missing' integer arrays. + + Used to test dtype conversion with and without missing values. + """ + if request.param == "data": + return data + elif request.param == "data_missing": + return data_missing + + @pytest.fixture def na_value(): """The scalar missing value for this type. Default 'None'""" @@ -291,6 +303,10 @@ def test_view(self, data): super().test_view(data) +class TestBaseMissing(base.BaseMissingTests): + pass + + def test_arrowdtype_construct_from_string_type_with_unsupported_parameters(): with pytest.raises(NotImplementedError, match="Passing pyarrow type"): ArrowDtype.construct_from_string("timestamp[s, tz=UTC][pyarrow]") From 607baedd33ab4c0bb4890f58028772eedb67a4a7 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 21 Jun 2022 16:55:12 -0700 Subject: [PATCH 05/11] Add setitem tests too --- pandas/tests/extension/test_arrow.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index b701f5aae9580..9eeaf39959f29 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -307,6 +307,12 @@ class TestBaseMissing(base.BaseMissingTests): pass +class TestBaseSetitemTests(base.BaseSetitemTests): + @pytest.mark.xfail(reason="GH 45419: pyarrow.ChunkedArray does not support views") + def test_setitem_preserves_views(self, data): + super().test_setitem_preserves_views(data) + + def test_arrowdtype_construct_from_string_type_with_unsupported_parameters(): with pytest.raises(NotImplementedError, match="Passing pyarrow type"): ArrowDtype.construct_from_string("timestamp[s, tz=UTC][pyarrow]") From 31f9345468cd7d98280a3b4f3f1c14aca86e17dc Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 22 Jun 2022 10:44:29 -0700 Subject: [PATCH 06/11] Timezone types with min pyarrow version and setitem tests not supported --- pandas/tests/extension/test_arrow.py | 229 +++++++++++++++++++++++++++ 1 file changed, 229 insertions(+) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 9eeaf39959f29..a2cfc6cca5e4f 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -18,6 +18,7 @@ timedelta, ) +import numpy as np import pytest from pandas.compat import ( @@ -308,6 +309,234 @@ class TestBaseMissing(base.BaseMissingTests): class TestBaseSetitemTests(base.BaseSetitemTests): + def test_setitem_scalar_series(self, data, box_in_series, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_scalar_series(data, box_in_series) + + def test_setitem_sequence(self, data, box_in_series, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_sequence(data, box_in_series) + + def test_setitem_sequence_broadcasts(self, data, box_in_series, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_sequence_broadcasts(data, box_in_series) + + @pytest.mark.parametrize("setter", ["loc", "iloc"]) + def test_setitem_scalar(self, data, setter, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_scalar(data, setter) + + def test_setitem_loc_scalar_mixed(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_loc_scalar_mixed(data) + + def test_setitem_loc_scalar_single(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_loc_scalar_single(data) + + def test_setitem_loc_scalar_multiple_homogoneous(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_loc_scalar_multiple_homogoneous(data) + + def test_setitem_iloc_scalar_mixed(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_iloc_scalar_mixed(data) + + def test_setitem_iloc_scalar_single(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_iloc_scalar_single(data) + + def test_setitem_iloc_scalar_multiple_homogoneous(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_iloc_scalar_multiple_homogoneous(data) + + @pytest.mark.parametrize( + "mask", + [ + np.array([True, True, True, False, False]), + pd.array([True, True, True, False, False], dtype="boolean"), + pd.array([True, True, True, pd.NA, pd.NA], dtype="boolean"), + ], + ids=["numpy-array", "boolean-array", "boolean-array-na"], + ) + def test_setitem_mask(self, data, mask, box_in_series, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_mask(data, mask, box_in_series) + + def test_setitem_mask_boolean_array_with_na(self, data, box_in_series, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_mask_boolean_array_with_na(data, box_in_series) + + @pytest.mark.parametrize( + "idx", + [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], + ids=["list", "integer-array", "numpy-array"], + ) + def test_setitem_integer_array(self, data, idx, box_in_series, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_integer_array(data, idx, box_in_series) + + @pytest.mark.parametrize("as_callable", [True, False]) + @pytest.mark.parametrize("setter", ["loc", None]) + def test_setitem_mask_aligned(self, data, as_callable, setter, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_mask_aligned(data, as_callable, setter) + + @pytest.mark.parametrize("setter", ["loc", None]) + def test_setitem_mask_broadcast(self, data, setter, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_mask_broadcast(data, setter) + + def test_setitem_tuple_index(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_tuple_index(data) + + def test_setitem_slice(self, data, box_in_series, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_slice(data, box_in_series) + + def test_setitem_loc_iloc_slice(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_loc_iloc_slice(data) + + def test_setitem_slice_array(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_slice_array(data) + + def test_setitem_with_expansion_dataframe_column(self, data, full_indexer, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_with_expansion_dataframe_column(data, full_indexer) + + def test_setitem_frame_2d_values(self, data, request): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + super().test_setitem_frame_2d_values(data) + @pytest.mark.xfail(reason="GH 45419: pyarrow.ChunkedArray does not support views") def test_setitem_preserves_views(self, data): super().test_setitem_preserves_views(data) From 541cdf72dd8171b4a03a67520050b4d721d56b89 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 22 Jun 2022 11:11:44 -0700 Subject: [PATCH 07/11] xfail for duration type with arraymanager --- pandas/tests/extension/test_arrow.py | 258 +++++++++++++++++++++++++-- 1 file changed, 239 insertions(+), 19 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index a2cfc6cca5e4f..98297ce44e00a 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -305,7 +305,62 @@ def test_view(self, data): class TestBaseMissing(base.BaseMissingTests): - pass + def test_fillna_limit_pad(self, data_missing, using_array_manager, request): + if using_array_manager and pa.types.is_duration( + data_missing.dtype.pyarrow_dtype + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) + super().test_fillna_limit_pad(data_missing) + + def test_fillna_limit_backfill(self, data_missing, using_array_manager, request): + if using_array_manager and pa.types.is_duration( + data_missing.dtype.pyarrow_dtype + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) + super().test_fillna_limit_backfill(data_missing) + + def test_fillna_series(self, data_missing, using_array_manager, request): + if using_array_manager and pa.types.is_duration( + data_missing.dtype.pyarrow_dtype + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) + super().test_fillna_series(data_missing) + + def test_fillna_series_method( + self, data_missing, fillna_method, using_array_manager, request + ): + if using_array_manager and pa.types.is_duration( + data_missing.dtype.pyarrow_dtype + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) + super().test_fillna_series_method(data_missing, fillna_method) + + def test_fillna_frame(self, data_missing, using_array_manager, request): + if using_array_manager and pa.types.is_duration( + data_missing.dtype.pyarrow_dtype + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) + super().test_fillna_frame(data_missing) class TestBaseSetitemTests(base.BaseSetitemTests): @@ -319,7 +374,7 @@ def test_setitem_scalar_series(self, data, box_in_series, request): ) super().test_setitem_scalar_series(data, box_in_series) - def test_setitem_sequence(self, data, box_in_series, request): + def test_setitem_sequence(self, data, box_in_series, using_array_manager, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -327,9 +382,32 @@ def test_setitem_sequence(self, data, box_in_series, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) + elif using_array_manager and pa.types.is_duration( + data_missing.dtype.pyarrow_dtype + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) super().test_setitem_sequence(data, box_in_series) - def test_setitem_sequence_broadcasts(self, data, box_in_series, request): + def test_setitem_empty_indexer( + self, data, box_in_series, using_array_manager, request + ): + if using_array_manager and pa.types.is_duration( + data_missing.dtype.pyarrow_dtype + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) + super().test_setitem_empty_indexer(data, box_in_series) + + def test_setitem_sequence_broadcasts( + self, data, box_in_series, using_array_manager, request + ): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -337,10 +415,18 @@ def test_setitem_sequence_broadcasts(self, data, box_in_series, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) + elif using_array_manager and pa.types.is_duration( + data_missing.dtype.pyarrow_dtype + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) super().test_setitem_sequence_broadcasts(data, box_in_series) @pytest.mark.parametrize("setter", ["loc", "iloc"]) - def test_setitem_scalar(self, data, setter, request): + def test_setitem_scalar(self, data, setter, using_array_manager, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -348,9 +434,17 @@ def test_setitem_scalar(self, data, setter, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) + elif using_array_manager and pa.types.is_duration( + data_missing.dtype.pyarrow_dtype + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) super().test_setitem_scalar(data, setter) - def test_setitem_loc_scalar_mixed(self, data, request): + def test_setitem_loc_scalar_mixed(self, data, using_array_manager, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -358,9 +452,17 @@ def test_setitem_loc_scalar_mixed(self, data, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) + elif using_array_manager and pa.types.is_duration( + data_missing.dtype.pyarrow_dtype + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) super().test_setitem_loc_scalar_mixed(data) - def test_setitem_loc_scalar_single(self, data, request): + def test_setitem_loc_scalar_single(self, data, using_array_manager, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -368,9 +470,19 @@ def test_setitem_loc_scalar_single(self, data, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) + elif using_array_manager and pa.types.is_duration( + data_missing.dtype.pyarrow_dtype + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) super().test_setitem_loc_scalar_single(data) - def test_setitem_loc_scalar_multiple_homogoneous(self, data, request): + def test_setitem_loc_scalar_multiple_homogoneous( + self, data, using_array_manager, request + ): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -378,9 +490,17 @@ def test_setitem_loc_scalar_multiple_homogoneous(self, data, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) + elif using_array_manager and pa.types.is_duration( + data_missing.dtype.pyarrow_dtype + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) super().test_setitem_loc_scalar_multiple_homogoneous(data) - def test_setitem_iloc_scalar_mixed(self, data, request): + def test_setitem_iloc_scalar_mixed(self, data, using_array_manager, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -390,7 +510,7 @@ def test_setitem_iloc_scalar_mixed(self, data, request): ) super().test_setitem_iloc_scalar_mixed(data) - def test_setitem_iloc_scalar_single(self, data, request): + def test_setitem_iloc_scalar_single(self, data, using_array_manager, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -398,9 +518,19 @@ def test_setitem_iloc_scalar_single(self, data, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) + elif using_array_manager and pa.types.is_duration( + data_missing.dtype.pyarrow_dtype + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) super().test_setitem_iloc_scalar_single(data) - def test_setitem_iloc_scalar_multiple_homogoneous(self, data, request): + def test_setitem_iloc_scalar_multiple_homogoneous( + self, data, using_array_manager, request + ): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -408,6 +538,14 @@ def test_setitem_iloc_scalar_multiple_homogoneous(self, data, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) + elif using_array_manager and pa.types.is_duration( + data_missing.dtype.pyarrow_dtype + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) super().test_setitem_iloc_scalar_multiple_homogoneous(data) @pytest.mark.parametrize( @@ -419,7 +557,9 @@ def test_setitem_iloc_scalar_multiple_homogoneous(self, data, request): ], ids=["numpy-array", "boolean-array", "boolean-array-na"], ) - def test_setitem_mask(self, data, mask, box_in_series, request): + def test_setitem_mask( + self, data, mask, box_in_series, using_array_manager, request + ): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -427,9 +567,19 @@ def test_setitem_mask(self, data, mask, box_in_series, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) + elif using_array_manager and pa.types.is_duration( + data_missing.dtype.pyarrow_dtype + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) super().test_setitem_mask(data, mask, box_in_series) - def test_setitem_mask_boolean_array_with_na(self, data, box_in_series, request): + def test_setitem_mask_boolean_array_with_na( + self, data, box_in_series, using_array_manager, request + ): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -437,6 +587,14 @@ def test_setitem_mask_boolean_array_with_na(self, data, box_in_series, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) + elif using_array_manager and pa.types.is_duration( + data_missing.dtype.pyarrow_dtype + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) super().test_setitem_mask_boolean_array_with_na(data, box_in_series) @pytest.mark.parametrize( @@ -444,7 +602,9 @@ def test_setitem_mask_boolean_array_with_na(self, data, box_in_series, request): [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], ids=["list", "integer-array", "numpy-array"], ) - def test_setitem_integer_array(self, data, idx, box_in_series, request): + def test_setitem_integer_array( + self, data, idx, box_in_series, using_array_manager, request + ): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -452,11 +612,21 @@ def test_setitem_integer_array(self, data, idx, box_in_series, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) + elif using_array_manager and pa.types.is_duration( + data_missing.dtype.pyarrow_dtype + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) super().test_setitem_integer_array(data, idx, box_in_series) @pytest.mark.parametrize("as_callable", [True, False]) @pytest.mark.parametrize("setter", ["loc", None]) - def test_setitem_mask_aligned(self, data, as_callable, setter, request): + def test_setitem_mask_aligned( + self, data, as_callable, setter, using_array_manager, request + ): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -464,10 +634,18 @@ def test_setitem_mask_aligned(self, data, as_callable, setter, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) + elif using_array_manager and pa.types.is_duration( + data_missing.dtype.pyarrow_dtype + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) super().test_setitem_mask_aligned(data, as_callable, setter) @pytest.mark.parametrize("setter", ["loc", None]) - def test_setitem_mask_broadcast(self, data, setter, request): + def test_setitem_mask_broadcast(self, data, setter, using_array_manager, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -475,6 +653,14 @@ def test_setitem_mask_broadcast(self, data, setter, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) + elif using_array_manager and pa.types.is_duration( + data_missing.dtype.pyarrow_dtype + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) super().test_setitem_mask_broadcast(data, setter) def test_setitem_tuple_index(self, data, request): @@ -487,7 +673,7 @@ def test_setitem_tuple_index(self, data, request): ) super().test_setitem_tuple_index(data) - def test_setitem_slice(self, data, box_in_series, request): + def test_setitem_slice(self, data, box_in_series, using_array_manager, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -495,9 +681,17 @@ def test_setitem_slice(self, data, box_in_series, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) + elif using_array_manager and pa.types.is_duration( + data_missing.dtype.pyarrow_dtype + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) super().test_setitem_slice(data, box_in_series) - def test_setitem_loc_iloc_slice(self, data, request): + def test_setitem_loc_iloc_slice(self, data, using_array_manager, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -505,6 +699,14 @@ def test_setitem_loc_iloc_slice(self, data, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) + elif using_array_manager and pa.types.is_duration( + data_missing.dtype.pyarrow_dtype + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) super().test_setitem_loc_iloc_slice(data) def test_setitem_slice_array(self, data, request): @@ -517,7 +719,9 @@ def test_setitem_slice_array(self, data, request): ) super().test_setitem_slice_array(data) - def test_setitem_with_expansion_dataframe_column(self, data, full_indexer, request): + def test_setitem_with_expansion_dataframe_column( + self, data, full_indexer, using_array_manager, request + ): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -525,9 +729,17 @@ def test_setitem_with_expansion_dataframe_column(self, data, full_indexer, reque reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) + elif using_array_manager and pa.types.is_duration( + data_missing.dtype.pyarrow_dtype + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) super().test_setitem_with_expansion_dataframe_column(data, full_indexer) - def test_setitem_frame_2d_values(self, data, request): + def test_setitem_frame_2d_values(self, data, using_array_manager, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -535,6 +747,14 @@ def test_setitem_frame_2d_values(self, data, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) + elif using_array_manager and pa.types.is_duration( + data_missing.dtype.pyarrow_dtype + ): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) super().test_setitem_frame_2d_values(data) @pytest.mark.xfail(reason="GH 45419: pyarrow.ChunkedArray does not support views") From ba0641708384db53700cba959bbac30eb775ab14 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 22 Jun 2022 14:13:49 -0700 Subject: [PATCH 08/11] Fix some typos --- pandas/tests/extension/test_arrow.py | 83 +++++++--------------------- 1 file changed, 21 insertions(+), 62 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 98297ce44e00a..495f08254a5a1 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -382,9 +382,7 @@ def test_setitem_sequence(self, data, box_in_series, using_array_manager, reques reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): + elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -395,9 +393,7 @@ def test_setitem_sequence(self, data, box_in_series, using_array_manager, reques def test_setitem_empty_indexer( self, data, box_in_series, using_array_manager, request ): - if using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): + if using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -415,9 +411,7 @@ def test_setitem_sequence_broadcasts( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): + elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -434,9 +428,7 @@ def test_setitem_scalar(self, data, setter, using_array_manager, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): + elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -452,9 +444,7 @@ def test_setitem_loc_scalar_mixed(self, data, using_array_manager, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): + elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -470,9 +460,7 @@ def test_setitem_loc_scalar_single(self, data, using_array_manager, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): + elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -490,9 +478,7 @@ def test_setitem_loc_scalar_multiple_homogoneous( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): + elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -518,9 +504,7 @@ def test_setitem_iloc_scalar_single(self, data, using_array_manager, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): + elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -538,9 +522,7 @@ def test_setitem_iloc_scalar_multiple_homogoneous( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): + elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -567,9 +549,7 @@ def test_setitem_mask( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): + elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -580,16 +560,7 @@ def test_setitem_mask( def test_setitem_mask_boolean_array_with_na( self, data, box_in_series, using_array_manager, request ): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") - ) - ) - elif using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): + if using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -612,9 +583,7 @@ def test_setitem_integer_array( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): + elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -634,9 +603,7 @@ def test_setitem_mask_aligned( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): + elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -653,9 +620,7 @@ def test_setitem_mask_broadcast(self, data, setter, using_array_manager, request reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): + elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -681,9 +646,7 @@ def test_setitem_slice(self, data, box_in_series, using_array_manager, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): + elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -699,9 +662,7 @@ def test_setitem_loc_iloc_slice(self, data, using_array_manager, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): + elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -722,16 +683,16 @@ def test_setitem_slice_array(self, data, request): def test_setitem_with_expansion_dataframe_column( self, data, full_indexer, using_array_manager, request ): + # Is there a way to get the full_indexer id "null_slice"? + is_null_slice = full_indexer(pd.Series(dtype=object)) == slice(None) tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): + if pa_version_under2p0 and tz not in (None, "UTC") and not is_null_slice: request.node.add_marker( pytest.mark.xfail( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): + elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -747,9 +708,7 @@ def test_setitem_frame_2d_values(self, data, using_array_manager, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration( - data_missing.dtype.pyarrow_dtype - ): + elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" From 0322c8a35985c47c24e901f0eb1956fb7b1c5e2b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 22 Jun 2022 15:53:00 -0700 Subject: [PATCH 09/11] Fix more min version tests --- pandas/tests/extension/base/setitem.py | 14 ++++++++++++ pandas/tests/extension/test_arrow.py | 30 ++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 9e016e0101ef6..04fa3c11a6c40 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -357,6 +357,20 @@ def test_setitem_with_expansion_dataframe_column(self, data, full_indexer): self.assert_frame_equal(result, expected) + def test_setitem_with_expansion_row(self, data, na_value): + df = pd.DataFrame({"data": data[:1]}) + + df.loc[1, "data"] = data[1] + expected = pd.DataFrame({"data": data[:2]}) + self.assert_frame_equal(df, expected) + + # https://github.com/pandas-dev/pandas/issues/47284 + df.loc[2, "data"] = na_value + expected = pd.DataFrame( + {"data": pd.Series([data[0], data[1], na_value], dtype=data.dtype)} + ) + self.assert_frame_equal(df, expected) + def test_setitem_series(self, data, full_indexer): # https://github.com/pandas-dev/pandas/issues/32395 ser = pd.Series(data, name="data") diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 495f08254a5a1..e716017b860ed 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -363,7 +363,7 @@ def test_fillna_frame(self, data_missing, using_array_manager, request): super().test_fillna_frame(data_missing) -class TestBaseSetitemTests(base.BaseSetitemTests): +class TestBaseSetitem(base.BaseSetitemTests): def test_setitem_scalar_series(self, data, box_in_series, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): @@ -560,7 +560,15 @@ def test_setitem_mask( def test_setitem_mask_boolean_array_with_na( self, data, box_in_series, using_array_manager, request ): - if using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + unit = getattr(data.dtype.pyarrow_dtype, "unit", None) + if pa_version_under2p0 and tz not in (None, "UTC") and unit == "us": + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -700,6 +708,24 @@ def test_setitem_with_expansion_dataframe_column( ) super().test_setitem_with_expansion_dataframe_column(data, full_indexer) + def test_setitem_with_expansion_row( + self, data, na_value, using_array_manager, request + ): + tz = getattr(data.dtype.pyarrow_dtype, "tz", None) + if pa_version_under2p0 and tz not in (None, "UTC"): + request.node.add_marker( + pytest.mark.xfail( + reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") + ) + ) + elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) + super().test_setitem_with_expansion_row(data, na_value) + def test_setitem_frame_2d_values(self, data, using_array_manager, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): From b983c494be2bdf5906cd040809b713afd24a903e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 22 Jun 2022 21:15:33 -0700 Subject: [PATCH 10/11] address more datamanager tests --- pandas/tests/extension/test_arrow.py | 64 ++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 9 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index e716017b860ed..eadf917bb44f3 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -364,7 +364,9 @@ def test_fillna_frame(self, data_missing, using_array_manager, request): class TestBaseSetitem(base.BaseSetitemTests): - def test_setitem_scalar_series(self, data, box_in_series, request): + def test_setitem_scalar_series( + self, data, box_in_series, using_array_manager, request + ): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -372,6 +374,12 @@ def test_setitem_scalar_series(self, data, box_in_series, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) + elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) super().test_setitem_scalar_series(data, box_in_series) def test_setitem_sequence(self, data, box_in_series, using_array_manager, request): @@ -382,7 +390,11 @@ def test_setitem_sequence(self, data, box_in_series, using_array_manager, reques reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): + elif ( + using_array_manager + and pa.types.is_duration(data.dtype.pyarrow_dtype) + and box_in_series + ): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -393,7 +405,11 @@ def test_setitem_sequence(self, data, box_in_series, using_array_manager, reques def test_setitem_empty_indexer( self, data, box_in_series, using_array_manager, request ): - if using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): + if ( + using_array_manager + and pa.types.is_duration(data.dtype.pyarrow_dtype) + and box_in_series + ): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -411,7 +427,11 @@ def test_setitem_sequence_broadcasts( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): + elif ( + using_array_manager + and pa.types.is_duration(data.dtype.pyarrow_dtype) + and box_in_series + ): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -494,6 +514,12 @@ def test_setitem_iloc_scalar_mixed(self, data, using_array_manager, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) + elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) super().test_setitem_iloc_scalar_mixed(data) def test_setitem_iloc_scalar_single(self, data, using_array_manager, request): @@ -549,7 +575,11 @@ def test_setitem_mask( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): + elif ( + using_array_manager + and pa.types.is_duration(data.dtype.pyarrow_dtype) + and box_in_series + ): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -568,7 +598,11 @@ def test_setitem_mask_boolean_array_with_na( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): + elif ( + using_array_manager + and pa.types.is_duration(data.dtype.pyarrow_dtype) + and box_in_series + ): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -591,7 +625,11 @@ def test_setitem_integer_array( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): + elif ( + using_array_manager + and pa.types.is_duration(data.dtype.pyarrow_dtype) + and box_in_series + ): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -654,7 +692,11 @@ def test_setitem_slice(self, data, box_in_series, using_array_manager, request): reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): + elif ( + using_array_manager + and pa.types.is_duration(data.dtype.pyarrow_dtype) + and box_in_series + ): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" @@ -700,7 +742,11 @@ def test_setitem_with_expansion_dataframe_column( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): + elif ( + using_array_manager + and pa.types.is_duration(data.dtype.pyarrow_dtype) + and not is_null_slice + ): request.node.add_marker( pytest.mark.xfail( reason="Checking ndim when using arraymanager with duration type" From 5f6fd17dfbf13518a0c883849e0dfb5a46e4f3ea Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 23 Jun 2022 16:15:59 -0700 Subject: [PATCH 11/11] More datamanager tests --- pandas/tests/extension/test_arrow.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index eadf917bb44f3..06ce6901effc7 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -364,9 +364,7 @@ def test_fillna_frame(self, data_missing, using_array_manager, request): class TestBaseSetitem(base.BaseSetitemTests): - def test_setitem_scalar_series( - self, data, box_in_series, using_array_manager, request - ): + def test_setitem_scalar_series(self, data, box_in_series, request): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) if pa_version_under2p0 and tz not in (None, "UTC"): request.node.add_marker( @@ -374,12 +372,6 @@ def test_setitem_scalar_series( reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") ) ) - elif using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): - request.node.add_marker( - pytest.mark.xfail( - reason="Checking ndim when using arraymanager with duration type" - ) - ) super().test_setitem_scalar_series(data, box_in_series) def test_setitem_sequence(self, data, box_in_series, using_array_manager, request): @@ -402,6 +394,17 @@ def test_setitem_sequence(self, data, box_in_series, using_array_manager, reques ) super().test_setitem_sequence(data, box_in_series) + def test_setitem_sequence_mismatched_length_raises( + self, data, as_array, using_array_manager, request + ): + if using_array_manager and pa.types.is_duration(data.dtype.pyarrow_dtype): + request.node.add_marker( + pytest.mark.xfail( + reason="Checking ndim when using arraymanager with duration type" + ) + ) + super().test_setitem_sequence_mismatched_length_raises(data, as_array) + def test_setitem_empty_indexer( self, data, box_in_series, using_array_manager, request ):