From adf52b26c5d330fdfb970d5f42a81b5ab89ebf41 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 26 Nov 2023 18:24:17 +0100 Subject: [PATCH] BUG: to_numeric casting to ea for new string dtype --- doc/source/whatsnew/v2.1.4.rst | 2 +- pandas/core/tools/numeric.py | 4 +++- pandas/tests/tools/test_to_numeric.py | 13 ++++++++++--- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v2.1.4.rst b/doc/source/whatsnew/v2.1.4.rst index 543a9864ced26..d3db268ad6cf8 100644 --- a/doc/source/whatsnew/v2.1.4.rst +++ b/doc/source/whatsnew/v2.1.4.rst @@ -23,9 +23,9 @@ Bug fixes ~~~~~~~~~ - Bug in :class:`Series` constructor raising DeprecationWarning when ``index`` is a list of :class:`Series` (:issue:`55228`) - Bug in :meth:`Index.__getitem__` returning wrong result for Arrow dtypes and negative stepsize (:issue:`55832`) +- Fixed bug in :func:`to_numeric` converting to extension dtype for ``string[pyarrow_numpy]`` dtype (:issue:`56179`) - Fixed bug in :meth:`DataFrame.__setitem__` casting :class:`Index` with object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`) - Fixed bug in :meth:`Index.insert` casting object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`) -- .. --------------------------------------------------------------------------- .. _whatsnew_214.other: diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index c5a2736d4f926..09652a7d8bc92 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -234,7 +234,8 @@ def to_numeric( set(), coerce_numeric=coerce_numeric, convert_to_masked_nullable=dtype_backend is not lib.no_default - or isinstance(values_dtype, StringDtype), + or isinstance(values_dtype, StringDtype) + and not values_dtype.storage == "pyarrow_numpy", ) except (ValueError, TypeError): if errors == "raise": @@ -249,6 +250,7 @@ def to_numeric( dtype_backend is not lib.no_default and new_mask is None or isinstance(values_dtype, StringDtype) + and not values_dtype.storage == "pyarrow_numpy" ): new_mask = np.zeros(values.shape, dtype=np.bool_) diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index d6b085b7954db..c452382ec572b 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -4,12 +4,15 @@ from numpy import iinfo import pytest +import pandas.util._test_decorators as td + import pandas as pd from pandas import ( ArrowDtype, DataFrame, Index, Series, + option_context, to_numeric, ) import pandas._testing as tm @@ -67,10 +70,14 @@ def test_empty(input_kwargs, result_kwargs): tm.assert_series_equal(result, expected) +@pytest.mark.parametrize( + "infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))] +) @pytest.mark.parametrize("last_val", ["7", 7]) -def test_series(last_val): - ser = Series(["1", "-3.14", last_val]) - result = to_numeric(ser) +def test_series(last_val, infer_string): + with option_context("future.infer_string", infer_string): + ser = Series(["1", "-3.14", last_val]) + result = to_numeric(ser) expected = Series([1, -3.14, 7]) tm.assert_series_equal(result, expected)