From 2733d7292e2c3bb8268575ff19c7fb5b99c9a70d Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 14 Dec 2023 13:10:32 -0800 Subject: [PATCH 1/5] TST: use single-class pattern in test_string.py --- pandas/core/arrays/arrow/array.py | 3 + pandas/tests/extension/test_string.py | 85 ++++++++++++++------------- 2 files changed, 47 insertions(+), 41 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 9f0a039126d1d..a590ed5e79d08 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -620,6 +620,9 @@ def __invert__(self) -> Self: # This is a bit wise op for integer types if pa.types.is_integer(self._pa_array.type): return type(self)(pc.bit_wise_not(self._pa_array)) + elif pa.types.is_string(self._pa_array.type): + # Raise TypeError instead of pa.ArrowNotImplementedError + raise TypeError("__invert__ is not supported for string dtypes") else: return type(self)(pc.invert(self._pa_array)) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index bf6db6e9f16ec..55006ad756c3b 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -90,7 +90,7 @@ def data_for_grouping(dtype, chunked): return maybe_split_array(arr, chunked) -class TestDtype(base.BaseDtypeTests): +class TestStringArray(base.ExtensionTests): def test_eq_with_str(self, dtype): assert dtype == f"string[{dtype.storage}]" super().test_eq_with_str(dtype) @@ -100,43 +100,25 @@ def test_is_not_string_type(self, dtype): # because StringDtype is a string type assert is_string_dtype(dtype) - -class TestInterface(base.BaseInterfaceTests): def test_view(self, data, request, arrow_string_storage): if data.dtype.storage in arrow_string_storage: pytest.skip(reason="2D support not implemented for ArrowStringArray") super().test_view(data) - -class TestConstructors(base.BaseConstructorsTests): def test_from_dtype(self, data): # base test uses string representation of dtype pass - -class TestReshaping(base.BaseReshapingTests): def test_transpose(self, data, request, arrow_string_storage): if data.dtype.storage in arrow_string_storage: pytest.skip(reason="2D support not implemented for ArrowStringArray") super().test_transpose(data) - -class TestGetitem(base.BaseGetitemTests): - pass - - -class TestSetitem(base.BaseSetitemTests): def test_setitem_preserves_views(self, data, request, arrow_string_storage): if data.dtype.storage in arrow_string_storage: pytest.skip(reason="2D support not implemented for ArrowStringArray") super().test_setitem_preserves_views(data) - -class TestIndex(base.BaseIndexTests): - pass - - -class TestMissing(base.BaseMissingTests): def test_dropna_array(self, data_missing): result = data_missing.dropna() expected = data_missing[[1]] @@ -154,8 +136,47 @@ def test_fillna_no_op_returns_copy(self, data): assert result is not data tm.assert_extension_array_equal(result, data) + def _get_expected_exception( + self, op_name: str, obj, other + ) -> type[Exception] | None: + if op_name in ["__divmod__", "__rdivmod__"]: + if isinstance(obj, pd.Series) and tm.get_dtype(obj).storage in [ + "pyarrow", + "pyarrow_numpy", + ]: + # TODO: re-raise as TypeError? + return NotImplementedError + elif isinstance(other, pd.Series) and tm.get_dtype(other).storage in [ + "pyarrow", + "pyarrow_numpy", + ]: + # TODO: re-raise as TypeError? + return NotImplementedError + return TypeError + elif op_name in ["__mod__", "__rmod__", "__pow__", "__rpow__"]: + if tm.get_dtype(obj).storage in ["pyarrow", "pyarrow_numpy"]: + return NotImplementedError + return TypeError + elif op_name in ["__mul__", "__rmul__"]: + # Can only multiply strings by integers + return TypeError + elif op_name in [ + "__truediv__", + "__rtruediv__", + "__floordiv__", + "__rfloordiv__", + "__sub__", + "__rsub__", + ]: + if tm.get_dtype(obj).storage in ["pyarrow", "pyarrow_numpy"]: + import pyarrow as pa + + # TODO: better to re-raise as TypeError? + return pa.ArrowNotImplementedError + return TypeError + + return None -class TestReduce(base.BaseReduceTests): def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: return ( op_name in ["min", "max"] @@ -163,21 +184,13 @@ def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: and op_name in ("any", "all") ) - -class TestMethods(base.BaseMethodsTests): - pass - - -class TestCasting(base.BaseCastingTests): - pass - - -class TestComparisonOps(base.BaseComparisonOpsTests): def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result): dtype = tm.get_dtype(obj) # error: Item "dtype[Any]" of "dtype[Any] | ExtensionDtype" has no # attribute "storage" - if dtype.storage == "pyarrow": # type: ignore[union-attr] + if op_name in ["__add__", "__radd__"]: + cast_to = dtype + elif dtype.storage == "pyarrow": # type: ignore[union-attr] cast_to = "boolean[pyarrow]" elif dtype.storage == "pyarrow_numpy": # type: ignore[union-attr] cast_to = np.bool_ # type: ignore[assignment] @@ -189,16 +202,6 @@ def test_compare_scalar(self, data, comparison_op): ser = pd.Series(data) self._compare_other(ser, data, comparison_op, "abc") - -class TestParsing(base.BaseParsingTests): - pass - - -class TestPrinting(base.BasePrintingTests): - pass - - -class TestGroupBy(base.BaseGroupbyTests): @pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning") def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op): super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op) From 60c2fb4402df904e0938e6c79092669506b0d5de Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 14 Dec 2023 13:44:41 -0800 Subject: [PATCH 2/5] mypy fixup --- pandas/tests/extension/test_string.py | 29 ++++++++++++++++++--------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 55006ad756c3b..300319a797617 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -14,6 +14,7 @@ """ import string +from typing import cast import numpy as np import pytest @@ -140,13 +141,17 @@ def _get_expected_exception( self, op_name: str, obj, other ) -> type[Exception] | None: if op_name in ["__divmod__", "__rdivmod__"]: - if isinstance(obj, pd.Series) and tm.get_dtype(obj).storage in [ + if isinstance(obj, pd.Series) and cast( + StringDtype, tm.get_dtype(obj) + ).storage in [ "pyarrow", "pyarrow_numpy", ]: # TODO: re-raise as TypeError? return NotImplementedError - elif isinstance(other, pd.Series) and tm.get_dtype(other).storage in [ + elif isinstance(other, pd.Series) and cast( + StringDtype, tm.get_dtype(other) + ).storage in [ "pyarrow", "pyarrow_numpy", ]: @@ -154,7 +159,10 @@ def _get_expected_exception( return NotImplementedError return TypeError elif op_name in ["__mod__", "__rmod__", "__pow__", "__rpow__"]: - if tm.get_dtype(obj).storage in ["pyarrow", "pyarrow_numpy"]: + if cast(StringDtype, tm.get_dtype(obj)).storage in [ + "pyarrow", + "pyarrow_numpy", + ]: return NotImplementedError return TypeError elif op_name in ["__mul__", "__rmul__"]: @@ -168,7 +176,10 @@ def _get_expected_exception( "__sub__", "__rsub__", ]: - if tm.get_dtype(obj).storage in ["pyarrow", "pyarrow_numpy"]: + if cast(StringDtype, tm.get_dtype(obj).storage) in [ + "pyarrow", + "pyarrow_numpy", + ]: import pyarrow as pa # TODO: better to re-raise as TypeError? @@ -185,14 +196,12 @@ def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: ) def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result): - dtype = tm.get_dtype(obj) - # error: Item "dtype[Any]" of "dtype[Any] | ExtensionDtype" has no - # attribute "storage" + dtype = cast(StringDtype, tm.get_dtype(obj)) if op_name in ["__add__", "__radd__"]: cast_to = dtype - elif dtype.storage == "pyarrow": # type: ignore[union-attr] - cast_to = "boolean[pyarrow]" - elif dtype.storage == "pyarrow_numpy": # type: ignore[union-attr] + elif dtype.storage == "pyarrow": + cast_to = "boolean[pyarrow]" # type: ignore[assignment] + elif dtype.storage == "pyarrow_numpy": cast_to = np.bool_ # type: ignore[assignment] else: cast_to = "boolean" From 9113f311048c0e2fcb5f4f0f84acc1d38f0d0378 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 14 Dec 2023 15:47:50 -0800 Subject: [PATCH 3/5] update pyarrow test --- pandas/tests/extension/test_arrow.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index de9f872aca01d..4835cb11db042 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -753,7 +753,11 @@ def test_EA_types(self, engine, data, dtype_backend, request): def test_invert(self, data, request): pa_dtype = data.dtype.pyarrow_dtype - if not (pa.types.is_boolean(pa_dtype) or pa.types.is_integer(pa_dtype)): + if not ( + pa.types.is_boolean(pa_dtype) + or pa.types.is_integer(pa_dtype) + or pa.types.is_string(pa_dtype) + ): request.applymarker( pytest.mark.xfail( raises=pa.ArrowNotImplementedError, From 407f7553ab9c0ef6d8acd6bf115f3aca421f5b32 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 14 Dec 2023 16:23:08 -0800 Subject: [PATCH 4/5] __future__ import --- pandas/tests/extension/test_string.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 300319a797617..b56764f60450b 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -13,6 +13,8 @@ be added to the array-specific tests in `pandas/tests/arrays/`. """ +from __future__ import annotations + import string from typing import cast From 33f0073a9138fae101c7f30fbbb44591209aaed8 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 15 Dec 2023 09:23:39 -0800 Subject: [PATCH 5/5] mypy fixup --- pandas/tests/extension/test_string.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index b56764f60450b..2d5a134f8560a 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -178,7 +178,7 @@ def _get_expected_exception( "__sub__", "__rsub__", ]: - if cast(StringDtype, tm.get_dtype(obj).storage) in [ + if cast(StringDtype, tm.get_dtype(obj)).storage in [ "pyarrow", "pyarrow_numpy", ]: @@ -206,7 +206,7 @@ def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result): elif dtype.storage == "pyarrow_numpy": cast_to = np.bool_ # type: ignore[assignment] else: - cast_to = "boolean" + cast_to = "boolean" # type: ignore[assignment] return pointwise_result.astype(cast_to) def test_compare_scalar(self, data, comparison_op):