ENH: Add equivalence test for float in to_numeric() GH43693

jbencina · jbencina · commit 6a037b183841 · 2022-01-08T04:25:02.000Z
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -225,6 +225,7 @@ Other enhancements
 - :class:`ExtensionDtype` and :class:`ExtensionArray` are now (de)serialized when exporting a :class:`DataFrame` with :meth:`DataFrame.to_json` using ``orient='table'`` (:issue:`20612`, :issue:`44705`).
 - Add support for `Zstandard <http://facebook.github.io/zstd/>`_ compression to :meth:`DataFrame.to_pickle`/:meth:`read_pickle` and friends (:issue:`43925`)
 - :meth:`DataFrame.to_sql` now returns an ``int`` of the number of written rows (:issue:`23998`)
+- :meth:`to_numeric` now preserves float64 arrays when downcasting would generate values not representable in float32 (:issue:`43693`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -354,7 +354,17 @@ def trans(x):
         and not is_bool_dtype(result.dtype)
         and not is_string_dtype(result.dtype)
     ):
-        return result.astype(dtype)
+        new_result = result.astype(dtype)
+
+        # Adjust tolerances based on floating point size
+        size_tols = {4: 5e-4, 8: 5e-8, 16: 5e-16}
+
+        atol = size_tols.get(new_result.dtype.itemsize, 0.0)
+
+        # Check downcast float values are still equal within 7 digits when
+        # converting from float64 to float32
+        if np.allclose(new_result, result, equal_nan=True, rtol=0.0, atol=atol):
+            return new_result
 
     return result
 
diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py
@@ -572,6 +572,14 @@ def test_downcast_limits(dtype, downcast, min_max):
     assert series.dtype == dtype
 
 
+def test_downcast_float64_to_float32():
+    # GH-43693: Check float64 preservation when >= 16,777,217
+    series = Series([16777217.0, np.finfo(np.float64).max, np.nan], dtype=np.float64)
+    result = to_numeric(series, downcast="float")
+
+    assert series.dtype == result.dtype
+
+
 @pytest.mark.parametrize(
     "ser,expected",
     [
@@ -762,6 +770,8 @@ def test_to_numeric_from_nullable_string(values, nullable_string_dtype, expected
         ([-1, -1], "Int32", "unsigned", "Int32"),
         ([1, 1], "Float64", "float", "Float32"),
         ([1, 1.1], "Float64", "float", "Float32"),
+        ([1, 1], "Float32", "float", "Float32"),
+        ([1, 1.1], "Float32", "float", "Float32"),
     ),
 )
 def test_downcast_nullable_numeric(data, input_dtype, downcast, expected_dtype):