From 5013d0741bd11152fe44679673d04e5676d80ccb Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 15 Nov 2024 16:53:15 +0100 Subject: [PATCH 1/2] Backport PR #60312 on branch 2.3.x (TST (string dtype): resolve xfails in pandas/tests/apply + raise TypeError for ArrowArray accumulate) (cherry picked from commit fba5f08f048215a6e0a578f8bad7b7f2c9ee8eef) --- pandas/core/arrays/arrow/array.py | 6 +++++- pandas/tests/apply/test_invalid_arg.py | 30 +++++++++----------------- pandas/tests/apply/test_str.py | 13 ++++++----- pandas/tests/extension/test_arrow.py | 2 +- 4 files changed, 24 insertions(+), 27 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 13e10c8d3a738..0c1e1d0c63c85 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1633,7 +1633,11 @@ def _accumulate( else: data_to_accum = data_to_accum.cast(pa.int64()) - result = pyarrow_meth(data_to_accum, skip_nulls=skipna, **kwargs) + try: + result = pyarrow_meth(data_to_accum, skip_nulls=skipna, **kwargs) + except pa.ArrowNotImplementedError as err: + msg = f"operation '{name}' not supported for dtype '{self.dtype}'" + raise TypeError(msg) from err if convert_to_int: result = result.cast(pa_dtype) diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py index 8963265b0a800..1b3db0e5b857f 100644 --- a/pandas/tests/apply/test_invalid_arg.py +++ b/pandas/tests/apply/test_invalid_arg.py @@ -218,18 +218,12 @@ def transform(row): def test_agg_cython_table_raises_frame(df, func, expected, axis, using_infer_string): # GH 21224 if using_infer_string: - if df.dtypes.iloc[0].storage == "pyarrow": - import pyarrow as pa - - # TODO(infer_string) - # should raise a proper TypeError instead of propagating the pyarrow error - - expected = (expected, pa.lib.ArrowNotImplementedError) - else: - expected = (expected, NotImplementedError) + expected = (expected, NotImplementedError) msg = ( - "can't multiply sequence by non-int of type 'str'|has no kernel|cannot perform" + "can't multiply sequence by non-int of type 'str'" + "|cannot perform cumprod with type str" # NotImplementedError python backend + "|operation 'cumprod' not supported for dtype 'str'" # TypeError pyarrow ) warn = None if isinstance(func, str) else FutureWarning with pytest.raises(expected, match=msg): @@ -259,16 +253,12 @@ def test_agg_cython_table_raises_series(series, func, expected, using_infer_stri if func == "median" or func is np.nanmedian or func is np.median: msg = r"Cannot convert \['a' 'b' 'c'\] to numeric" - if using_infer_string: - if series.dtype.storage == "pyarrow": - import pyarrow as pa - - # TODO(infer_string) - # should raise a proper TypeError instead of propagating the pyarrow error - expected = (expected, pa.lib.ArrowNotImplementedError) - else: - expected = (expected, NotImplementedError) - msg = msg + "|does not support|has no kernel|Cannot perform|cannot perform" + if using_infer_string and func == "cumprod": + expected = (expected, NotImplementedError) + + msg = ( + msg + "|does not support|has no kernel|Cannot perform|cannot perform|operation" + ) warn = None if isinstance(func, str) else FutureWarning with pytest.raises(expected, match=msg): diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py index 8956aed5e9ceb..55abac79b58f9 100644 --- a/pandas/tests/apply/test_str.py +++ b/pandas/tests/apply/test_str.py @@ -4,8 +4,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas.core.dtypes.common import is_number from pandas import ( @@ -88,7 +86,6 @@ def test_apply_np_transformer(float_frame, op, how): tm.assert_frame_equal(result, expected) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "series, func, expected", chain( @@ -147,7 +144,6 @@ def test_agg_cython_table_series(series, func, expected): assert result == expected -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "series, func, expected", chain( @@ -170,10 +166,17 @@ def test_agg_cython_table_series(series, func, expected): ), ), ) -def test_agg_cython_table_transform_series(series, func, expected): +def test_agg_cython_table_transform_series(request, series, func, expected): # GH21224 # test transforming functions in # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) + if series.dtype == "string" and func == "cumsum": + request.applymarker( + pytest.mark.xfail( + raises=(TypeError, NotImplementedError), + reason="TODO(infer_string) cumsum not yet implemented for string", + ) + ) warn = None if isinstance(func, str) else FutureWarning with tm.assert_produces_warning(warn, match="is currently using Series.*"): result = series.agg(func) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 0ce7a66e0e00c..03ab7c7f1dad8 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -436,7 +436,7 @@ def test_accumulate_series(self, data, all_numeric_accumulations, skipna, reques request.applymarker( pytest.mark.xfail( reason=f"{all_numeric_accumulations} not implemented for {pa_type}", - raises=NotImplementedError, + raises=TypeError, ) ) From f6e4a9b5628fdf948eafa869d824249c85dd3763 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 15 Nov 2024 13:54:53 -0500 Subject: [PATCH 2/2] 2.3 test function compat --- pandas/tests/apply/test_invalid_arg.py | 2 +- pandas/tests/apply/test_str.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py index 1b3db0e5b857f..68f3fe36546a0 100644 --- a/pandas/tests/apply/test_invalid_arg.py +++ b/pandas/tests/apply/test_invalid_arg.py @@ -253,7 +253,7 @@ def test_agg_cython_table_raises_series(series, func, expected, using_infer_stri if func == "median" or func is np.nanmedian or func is np.median: msg = r"Cannot convert \['a' 'b' 'c'\] to numeric" - if using_infer_string and func == "cumprod": + if using_infer_string and func in ("cumprod", np.cumprod, np.nancumprod): expected = (expected, NotImplementedError) msg = ( diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py index 55abac79b58f9..f916567c6b883 100644 --- a/pandas/tests/apply/test_str.py +++ b/pandas/tests/apply/test_str.py @@ -170,7 +170,7 @@ def test_agg_cython_table_transform_series(request, series, func, expected): # GH21224 # test transforming functions in # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) - if series.dtype == "string" and func == "cumsum": + if series.dtype == "string" and func in ("cumsum", np.cumsum, np.nancumsum): request.applymarker( pytest.mark.xfail( raises=(TypeError, NotImplementedError),