diff --git a/doc/source/whatsnew/v2.0.1.rst b/doc/source/whatsnew/v2.0.1.rst index a98c162c37d74..13f06f92aa4ff 100644 --- a/doc/source/whatsnew/v2.0.1.rst +++ b/doc/source/whatsnew/v2.0.1.rst @@ -31,6 +31,7 @@ Bug fixes - Bug in :meth:`ArrowDtype.__from_arrow__` not respecting if dtype is explicitly given (:issue:`52533`) - Bug in :func:`read_csv` casting PyArrow datetimes to NumPy when ``dtype_backend="pyarrow"`` and ``parse_dates`` is set causing a performance bottleneck in the process (:issue:`52546`) - Bug in :class:`arrays.DatetimeArray` constructor returning an incorrect unit when passed a non-nanosecond numpy datetime array (:issue:`52555`) +- Bug in :func:`to_numeric` with ``errors='coerce'`` and ``dtype_backend='pyarrow'`` with :class:`ArrowDtype` data (:issue:`52588`) .. --------------------------------------------------------------------------- .. _whatsnew_201.other: diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 04443f89ddf6f..a06152272797f 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -275,7 +275,8 @@ def to_numeric( # GH33013: for IntegerArray, BooleanArray & FloatingArray need to reconstruct # masked array if (mask is not None or new_mask is not None) and not is_string_dtype(values.dtype): - if mask is None: + if mask is None or (new_mask is not None and new_mask.shape == mask.shape): + # GH 52588 mask = new_mask else: mask = mask.copy() diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index fe6794b120681..499bcae5e90f0 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -6,6 +6,7 @@ import pandas as pd from pandas import ( + ArrowDtype, DataFrame, Index, Series, @@ -942,3 +943,12 @@ def test_invalid_dtype_backend(): ) with pytest.raises(ValueError, match=msg): to_numeric(ser, dtype_backend="numpy") + + +def test_coerce_pyarrow_backend(): + # GH 52588 + pa = pytest.importorskip("pyarrow") + ser = Series(list("12x"), dtype=ArrowDtype(pa.string())) + result = to_numeric(ser, errors="coerce", dtype_backend="pyarrow") + expected = Series([1, 2, None], dtype=ArrowDtype(pa.int64())) + tm.assert_series_equal(result, expected)