Skip to content

Commit fb554ce

Browse files
authored
Backport PR pandas-dev#52174 on branch 2.0.x (BUG: to_numeric converting StringArray to object or float64) (pandas-dev#52193)
1 parent 2e74ace commit fb554ce

File tree

3 files changed

+43
-12
lines changed

3 files changed

+43
-12
lines changed

pandas/_libs/lib.pyx

+6-2
Original file line numberDiff line numberDiff line change
@@ -2321,10 +2321,14 @@ def maybe_convert_numeric(
23212321
if not seen.coerce_numeric:
23222322
raise type(err)(f"{err} at position {i}")
23232323

2324-
seen.saw_null()
2325-
floats[i] = NaN
23262324
mask[i] = 1
23272325

2326+
if allow_null_in_int:
2327+
seen.null_ = True
2328+
else:
2329+
seen.saw_null()
2330+
floats[i] = NaN
2331+
23282332
if seen.check_uint64_conflict():
23292333
return (values, None)
23302334

pandas/core/tools/numeric.py

+13-4
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121
is_integer_dtype,
2222
is_number,
2323
is_numeric_dtype,
24-
is_object_dtype,
2524
is_scalar,
25+
is_string_dtype,
2626
needs_i8_conversion,
2727
)
2828
from pandas.core.dtypes.generic import (
@@ -32,6 +32,7 @@
3232

3333
import pandas as pd
3434
from pandas.core.arrays import BaseMaskedArray
35+
from pandas.core.arrays.string_ import StringDtype
3536

3637

3738
def to_numeric(
@@ -191,6 +192,8 @@ def to_numeric(
191192
else:
192193
values = arg
193194

195+
orig_values = values
196+
194197
# GH33013: for IntegerArray & FloatingArray extract non-null values for casting
195198
# save mask to reconstruct the full array after casting
196199
mask: npt.NDArray[np.bool_] | None = None
@@ -215,17 +218,23 @@ def to_numeric(
215218
values,
216219
set(),
217220
coerce_numeric=coerce_numeric,
218-
convert_to_masked_nullable=dtype_backend is not lib.no_default,
221+
convert_to_masked_nullable=dtype_backend is not lib.no_default
222+
or isinstance(values_dtype, StringDtype),
219223
)
220224
except (ValueError, TypeError):
221225
if errors == "raise":
222226
raise
227+
values = orig_values
223228

224229
if new_mask is not None:
225230
# Remove unnecessary values, is expected later anyway and enables
226231
# downcasting
227232
values = values[~new_mask]
228-
elif dtype_backend is not lib.no_default and new_mask is None:
233+
elif (
234+
dtype_backend is not lib.no_default
235+
and new_mask is None
236+
or isinstance(values_dtype, StringDtype)
237+
):
229238
new_mask = np.zeros(values.shape, dtype=np.bool_)
230239

231240
# attempt downcast only if the data has been successfully converted
@@ -260,7 +269,7 @@ def to_numeric(
260269

261270
# GH33013: for IntegerArray, BooleanArray & FloatingArray need to reconstruct
262271
# masked array
263-
if (mask is not None or new_mask is not None) and not is_object_dtype(values.dtype):
272+
if (mask is not None or new_mask is not None) and not is_string_dtype(values.dtype):
264273
if mask is None:
265274
mask = new_mask
266275
else:

pandas/tests/tools/test_to_numeric.py

+24-6
Original file line numberDiff line numberDiff line change
@@ -723,12 +723,12 @@ def test_precision_float_conversion(strrep):
723723
@pytest.mark.parametrize(
724724
"values, expected",
725725
[
726-
(["1", "2", None], Series([1, 2, np.nan])),
727-
(["1", "2", "3"], Series([1, 2, 3])),
728-
(["1", "2", 3], Series([1, 2, 3])),
729-
(["1", "2", 3.5], Series([1, 2, 3.5])),
730-
(["1", None, 3.5], Series([1, np.nan, 3.5])),
731-
(["1", "2", "3.5"], Series([1, 2, 3.5])),
726+
(["1", "2", None], Series([1, 2, np.nan], dtype="Int64")),
727+
(["1", "2", "3"], Series([1, 2, 3], dtype="Int64")),
728+
(["1", "2", 3], Series([1, 2, 3], dtype="Int64")),
729+
(["1", "2", 3.5], Series([1, 2, 3.5], dtype="Float64")),
730+
(["1", None, 3.5], Series([1, np.nan, 3.5], dtype="Float64")),
731+
(["1", "2", "3.5"], Series([1, 2, 3.5], dtype="Float64")),
732732
],
733733
)
734734
def test_to_numeric_from_nullable_string(values, nullable_string_dtype, expected):
@@ -738,6 +738,24 @@ def test_to_numeric_from_nullable_string(values, nullable_string_dtype, expected
738738
tm.assert_series_equal(result, expected)
739739

740740

741+
def test_to_numeric_from_nullable_string_coerce(nullable_string_dtype):
742+
# GH#52146
743+
values = ["a", "1"]
744+
ser = Series(values, dtype=nullable_string_dtype)
745+
result = to_numeric(ser, errors="coerce")
746+
expected = Series([pd.NA, 1], dtype="Int64")
747+
tm.assert_series_equal(result, expected)
748+
749+
750+
def test_to_numeric_from_nullable_string_ignore(nullable_string_dtype):
751+
# GH#52146
752+
values = ["a", "1"]
753+
ser = Series(values, dtype=nullable_string_dtype)
754+
expected = ser.copy()
755+
result = to_numeric(ser, errors="ignore")
756+
tm.assert_series_equal(result, expected)
757+
758+
741759
@pytest.mark.parametrize(
742760
"data, input_dtype, downcast, expected_dtype",
743761
(

0 commit comments

Comments
 (0)