ENH: Add equivalence test for float in to_numeric() GH43693 (#43710)

jbencina · web-flow · commit 45460157b23f · 2022-01-17T08:48:35.000-05:00
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -33,6 +33,7 @@ Other enhancements
 ^^^^^^^^^^^^^^^^^^
 - :class:`StringArray` now accepts array-likes containing nan-likes (``None``, ``np.nan``) for the ``values`` parameter in its constructor in addition to strings and :attr:`pandas.NA`. (:issue:`40839`)
 - Improved the rendering of ``categories`` in :class:`CategoricalIndex` (:issue:`45218`)
+- :meth:`to_numeric` now preserves float64 arrays when downcasting would generate values not representable in float32 (:issue:`43693`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -357,7 +357,17 @@ def trans(x):
         and not is_bool_dtype(result.dtype)
         and not is_string_dtype(result.dtype)
     ):
-        return result.astype(dtype)
+        new_result = result.astype(dtype)
+
+        # Adjust tolerances based on floating point size
+        size_tols = {4: 5e-4, 8: 5e-8, 16: 5e-16}
+
+        atol = size_tols.get(new_result.dtype.itemsize, 0.0)
+
+        # Check downcast float values are still equal within 7 digits when
+        # converting from float64 to float32
+        if np.allclose(new_result, result, equal_nan=True, rtol=0.0, atol=atol):
+            return new_result
 
     return result
 
diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py
@@ -572,6 +572,14 @@ def test_downcast_limits(dtype, downcast, min_max):
     assert series.dtype == dtype
 
 
+def test_downcast_float64_to_float32():
+    # GH-43693: Check float64 preservation when >= 16,777,217
+    series = Series([16777217.0, np.finfo(np.float64).max, np.nan], dtype=np.float64)
+    result = to_numeric(series, downcast="float")
+
+    assert series.dtype == result.dtype
+
+
 @pytest.mark.parametrize(
     "ser,expected",
     [
@@ -762,6 +770,8 @@ def test_to_numeric_from_nullable_string(values, nullable_string_dtype, expected
         ([-1, -1], "Int32", "unsigned", "Int32"),
         ([1, 1], "Float64", "float", "Float32"),
         ([1, 1.1], "Float64", "float", "Float32"),
+        ([1, 1], "Float32", "float", "Float32"),
+        ([1, 1.1], "Float32", "float", "Float32"),
     ),
 )
 def test_downcast_nullable_numeric(data, input_dtype, downcast, expected_dtype):

Original file line number	Diff line number	Diff line change
`@@ -33,6 +33,7 @@ Other enhancements`
`33`	`33`	`^^^^^^^^^^^^^^^^^^`
`34`	`34`	- :class:`StringArray` now accepts array-likes containing nan-likes (``None``, ``np.nan``) for the ``values`` parameter in its constructor in addition to strings and :attr:`pandas.NA`. (:issue:`40839`)
`35`	`35`	- Improved the rendering of ``categories`` in :class:`CategoricalIndex` (:issue:`45218`)
	`36`	+- :meth:`to_numeric` now preserves float64 arrays when downcasting would generate values not representable in float32 (:issue:`43693`)
`36`	`37`	`-`
`37`	`38`
`38`	`39`	`.. ---------------------------------------------------------------------------`