From 3a3e69acdc24d5521d80fbc76b9041956b7357b7 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 14 Nov 2024 12:15:26 +0100 Subject: [PATCH 1/2] TST (string dtype): resolve xfails in pandas/tests/apply + raise TypeError for ArrowArray accumulate --- pandas/core/arrays/arrow/array.py | 6 +++++- pandas/tests/apply/test_invalid_arg.py | 30 +++++++++----------------- pandas/tests/apply/test_str.py | 13 ++++++----- 3 files changed, 23 insertions(+), 26 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index fcc50c5b6b20f..e0c93db0afb07 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1644,7 +1644,11 @@ def _accumulate( else: data_to_accum = data_to_accum.cast(pa.int64()) - result = pyarrow_meth(data_to_accum, skip_nulls=skipna, **kwargs) + try: + result = pyarrow_meth(data_to_accum, skip_nulls=skipna, **kwargs) + except pa.ArrowNotImplementedError as err: + msg = f"operation '{name}' not supported for dtype '{self.dtype}'" + raise TypeError(msg) from err if convert_to_int: result = result.cast(pa_dtype) diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py index e19c21f81b3e1..0503bf9166ec7 100644 --- a/pandas/tests/apply/test_invalid_arg.py +++ b/pandas/tests/apply/test_invalid_arg.py @@ -218,18 +218,12 @@ def transform(row): def test_agg_cython_table_raises_frame(df, func, expected, axis, using_infer_string): # GH 21224 if using_infer_string: - if df.dtypes.iloc[0].storage == "pyarrow": - import pyarrow as pa - - # TODO(infer_string) - # should raise a proper TypeError instead of propagating the pyarrow error - - expected = (expected, pa.lib.ArrowNotImplementedError) - else: - expected = (expected, NotImplementedError) + expected = (expected, NotImplementedError) msg = ( - "can't multiply sequence by non-int of type 'str'|has no kernel|cannot perform" + "can't multiply sequence by non-int of type 'str'" + "|cannot perform cumprod with type str" # NotImplementedError python backend + "|operation 'cumprod' not supported for dtype 'str'" # TypeError pyarrow ) warn = None if isinstance(func, str) else FutureWarning with pytest.raises(expected, match=msg): @@ -259,16 +253,12 @@ def test_agg_cython_table_raises_series(series, func, expected, using_infer_stri if func == "median" or func is np.nanmedian or func is np.median: msg = r"Cannot convert \['a' 'b' 'c'\] to numeric" - if using_infer_string: - if series.dtype.storage == "pyarrow": - import pyarrow as pa - - # TODO(infer_string) - # should raise a proper TypeError instead of propagating the pyarrow error - expected = (expected, pa.lib.ArrowNotImplementedError) - else: - expected = (expected, NotImplementedError) - msg = msg + "|does not support|has no kernel|Cannot perform|cannot perform" + if using_infer_string and func == "cumprod": + expected = (expected, NotImplementedError) + + msg = ( + msg + "|does not support|has no kernel|Cannot perform|cannot perform|operation" + ) warn = None if isinstance(func, str) else FutureWarning with pytest.raises(expected, match=msg): diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py index 732652f24e2eb..c52168ae48ca8 100644 --- a/pandas/tests/apply/test_str.py +++ b/pandas/tests/apply/test_str.py @@ -4,8 +4,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas.compat import WASM from pandas.core.dtypes.common import is_number @@ -81,7 +79,6 @@ def test_apply_np_transformer(float_frame, op, how): tm.assert_frame_equal(result, expected) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "series, func, expected", chain( @@ -140,7 +137,6 @@ def test_agg_cython_table_series(series, func, expected): assert result == expected -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "series, func, expected", chain( @@ -163,10 +159,17 @@ def test_agg_cython_table_series(series, func, expected): ), ), ) -def test_agg_cython_table_transform_series(series, func, expected): +def test_agg_cython_table_transform_series(request, series, func, expected): # GH21224 # test transforming functions in # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) + if series.dtype == "string" and func == "cumsum": + request.applymarker( + pytest.mark.xfail( + raises=(TypeError, NotImplementedError), + reason="TODO(infer_string) cumsum not yet implemented for string", + ) + ) warn = None if isinstance(func, str) else FutureWarning with tm.assert_produces_warning(warn, match="is currently using Series.*"): result = series.agg(func) From ec1157ab6a623be2099fb7625a3926d3e420f3b7 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 14 Nov 2024 13:32:26 +0100 Subject: [PATCH 2/2] fix arrow test --- pandas/tests/extension/test_arrow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index f0ff11e5fa3f7..9defb97394635 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -441,7 +441,7 @@ def test_accumulate_series(self, data, all_numeric_accumulations, skipna, reques request.applymarker( pytest.mark.xfail( reason=f"{all_numeric_accumulations} not implemented for {pa_type}", - raises=NotImplementedError, + raises=TypeError, ) )