diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 766c418741ada..ffd20df85ed1f 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -580,6 +580,7 @@ Conversion ^^^^^^^^^^ - Bug in :meth:`DataFrame.to_dict` with ``orient='records'`` now returns python native datetime objects for datetimelike columns (:issue:`21256`) +- Bug in :meth:`Series.astype` conversion from ``string`` to ``float`` raised in presence of ``pd.NA`` values (:issue:`37626`) - Strings diff --git a/pandas/conftest.py b/pandas/conftest.py index 77e9af67590a6..a2c137a1e1aed 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -286,7 +286,6 @@ def unique_nulls_fixture(request): # Generate cartesian product of unique_nulls_fixture: unique_nulls_fixture2 = unique_nulls_fixture - # ---------------------------------------------------------------- # Classes # ---------------------------------------------------------------- @@ -1069,6 +1068,20 @@ def float_ea_dtype(request): return request.param +@pytest.fixture(params=tm.FLOAT_DTYPES + tm.FLOAT_EA_DTYPES) +def any_float_allowed_nullable_dtype(request): + """ + Parameterized fixture for float dtypes. + + * float + * 'float32' + * 'float64' + * 'Float32' + * 'Float64' + """ + return request.param + + @pytest.fixture(params=tm.COMPLEX_DTYPES) def complex_dtype(request): """ diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 3b297e7c2b13b..e0bb788d665eb 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -18,7 +18,8 @@ from pandas.core import ops from pandas.core.array_algos import masked_reductions -from pandas.core.arrays import IntegerArray, PandasArray +from pandas.core.arrays import FloatingArray, IntegerArray, PandasArray +from pandas.core.arrays.floating import FloatingDtype from pandas.core.arrays.integer import _IntegerDtype from pandas.core.construction import extract_array from pandas.core.indexers import check_array_indexer @@ -298,6 +299,19 @@ def astype(self, dtype, copy=True): arr[mask] = 0 values = arr.astype(dtype.numpy_dtype) return IntegerArray(values, mask, copy=False) + elif isinstance(dtype, FloatingDtype): + arr = self.copy() + mask = self.isna() + arr[mask] = "0" + values = arr.astype(dtype.numpy_dtype) + return FloatingArray(values, mask, copy=False) + elif np.issubdtype(dtype, np.floating): + arr = self._ndarray.copy() + mask = self.isna() + arr[mask] = 0 + values = arr.astype(dtype) + values[mask] = np.nan + return values return super().astype(dtype, copy) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 07e9484994c26..b629e2fca2feb 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -366,6 +366,15 @@ def test_astype_int(dtype, request): tm.assert_extension_array_equal(result, expected) +def test_astype_float(any_float_allowed_nullable_dtype): + # Don't compare arrays (37974) + ser = pd.Series(["1.1", pd.NA, "3.3"], dtype="string") + + result = ser.astype(any_float_allowed_nullable_dtype) + expected = pd.Series([1.1, np.nan, 3.3], dtype=any_float_allowed_nullable_dtype) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.xfail(reason="Not implemented StringArray.sum") def test_reduce(skipna, dtype):