diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index bbecf3fee01f3..0ceda331de790 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -61,6 +61,7 @@ to select the nullable dtypes implementation. * :func:`read_parquet` * :func:`read_orc` * :func:`read_feather` +* :func:`to_numeric` And the following methods will also utilize the ``mode.dtype_backend`` option. diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index a8ae8c47b0d19..64bb34241d956 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -4,6 +4,8 @@ import numpy as np +from pandas._config import get_option + from pandas._libs import lib from pandas._typing import ( DateTimeErrorChoices, @@ -190,6 +192,9 @@ def to_numeric( values = values._data[~mask] values_dtype = getattr(values, "dtype", None) + if isinstance(values_dtype, pd.ArrowDtype): + mask = values.isna() + values = values.dropna().to_numpy() new_mask: np.ndarray | None = None if is_numeric_dtype(values_dtype): pass @@ -258,6 +263,7 @@ def to_numeric( data[~mask] = values from pandas.core.arrays import ( + ArrowExtensionArray, BooleanArray, FloatingArray, IntegerArray, @@ -272,6 +278,11 @@ def to_numeric( klass = FloatingArray values = klass(data, mask) + if get_option("mode.dtype_backend") == "pyarrow" or isinstance( + values_dtype, pd.ArrowDtype + ): + values = ArrowExtensionArray(values.__arrow_array__()) + if is_series: return arg._constructor(values, index=arg.index, name=arg.name) elif is_index: diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index d3701c30aa50c..a2b94883d457d 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -9,6 +9,7 @@ DataFrame, Index, Series, + option_context, to_numeric, ) import pandas._testing as tm @@ -813,39 +814,86 @@ def test_to_numeric_use_nullable_dtypes(val, dtype): @pytest.mark.parametrize( - "val, dtype", [(1, "Int64"), (1.5, "Float64"), (True, "boolean")] + "val, dtype", + [ + (1, "Int64"), + (1.5, "Float64"), + (True, "boolean"), + (1, "int64[pyarrow]"), + (1.5, "float64[pyarrow]"), + (True, "bool[pyarrow]"), + ], ) def test_to_numeric_use_nullable_dtypes_na(val, dtype): # GH#50505 + if "pyarrow" in dtype: + pytest.importorskip("pyarrow") + dtype_backend = "pyarrow" + else: + dtype_backend = "pandas" ser = Series([val, None], dtype=object) - result = to_numeric(ser, use_nullable_dtypes=True) + with option_context("mode.dtype_backend", dtype_backend): + result = to_numeric(ser, use_nullable_dtypes=True) expected = Series([val, pd.NA], dtype=dtype) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "val, dtype, downcast", - [(1, "Int8", "integer"), (1.5, "Float32", "float"), (1, "Int8", "signed")], + [ + (1, "Int8", "integer"), + (1.5, "Float32", "float"), + (1, "Int8", "signed"), + (1, "int8[pyarrow]", "integer"), + (1.5, "float[pyarrow]", "float"), + (1, "int8[pyarrow]", "signed"), + ], ) def test_to_numeric_use_nullable_dtypes_downcasting(val, dtype, downcast): # GH#50505 + if "pyarrow" in dtype: + pytest.importorskip("pyarrow") + dtype_backend = "pyarrow" + else: + dtype_backend = "pandas" ser = Series([val, None], dtype=object) - result = to_numeric(ser, use_nullable_dtypes=True, downcast=downcast) + with option_context("mode.dtype_backend", dtype_backend): + result = to_numeric(ser, use_nullable_dtypes=True, downcast=downcast) expected = Series([val, pd.NA], dtype=dtype) tm.assert_series_equal(result, expected) -def test_to_numeric_use_nullable_dtypes_downcasting_uint(): +@pytest.mark.parametrize( + "smaller, dtype_backend", [["UInt8", "pandas"], ["uint8[pyarrow]", "pyarrow"]] +) +def test_to_numeric_use_nullable_dtypes_downcasting_uint(smaller, dtype_backend): # GH#50505 + if dtype_backend == "pyarrow": + pytest.importorskip("pyarrow") ser = Series([1, pd.NA], dtype="UInt64") - result = to_numeric(ser, use_nullable_dtypes=True, downcast="unsigned") - expected = Series([1, pd.NA], dtype="UInt8") + with option_context("mode.dtype_backend", dtype_backend): + result = to_numeric(ser, use_nullable_dtypes=True, downcast="unsigned") + expected = Series([1, pd.NA], dtype=smaller) tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("dtype", ["Int64", "UInt64", "Float64", "boolean"]) +@pytest.mark.parametrize( + "dtype", + [ + "Int64", + "UInt64", + "Float64", + "boolean", + "int64[pyarrow]", + "uint64[pyarrow]", + "float64[pyarrow]", + "bool[pyarrow]", + ], +) def test_to_numeric_use_nullable_dtypes_already_nullable(dtype): # GH#50505 + if "pyarrow" in dtype: + pytest.importorskip("pyarrow") ser = Series([1, pd.NA], dtype=dtype) result = to_numeric(ser, use_nullable_dtypes=True) expected = Series([1, pd.NA], dtype=dtype) @@ -855,16 +903,30 @@ def test_to_numeric_use_nullable_dtypes_already_nullable(dtype): @pytest.mark.parametrize( "use_nullable_dtypes, dtype", [(True, "Float64"), (False, "float64")] ) -def test_to_numeric_use_nullable_dtypes_error(use_nullable_dtypes, dtype): +@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"]) +def test_to_numeric_use_nullable_dtypes_error( + use_nullable_dtypes, dtype, dtype_backend +): # GH#50505 + if dtype_backend == "pyarrow": + pytest.importorskip("pyarrow") ser = Series(["a", "b", ""]) expected = ser.copy() with pytest.raises(ValueError, match="Unable to parse string"): - to_numeric(ser, use_nullable_dtypes=use_nullable_dtypes) + with option_context("mode.dtype_backend", dtype_backend): + to_numeric(ser, use_nullable_dtypes=use_nullable_dtypes) - result = to_numeric(ser, use_nullable_dtypes=use_nullable_dtypes, errors="ignore") + with option_context("mode.dtype_backend", dtype_backend): + result = to_numeric( + ser, use_nullable_dtypes=use_nullable_dtypes, errors="ignore" + ) tm.assert_series_equal(result, expected) - result = to_numeric(ser, use_nullable_dtypes=use_nullable_dtypes, errors="coerce") + with option_context("mode.dtype_backend", dtype_backend): + result = to_numeric( + ser, use_nullable_dtypes=use_nullable_dtypes, errors="coerce" + ) + if use_nullable_dtypes and dtype_backend == "pyarrow": + dtype = "double[pyarrow]" expected = Series([np.nan, np.nan, np.nan], dtype=dtype) tm.assert_series_equal(result, expected)