ENH: Add equivalence test for float in to_numeric() GH43693

jbencina · jbencina · commit b0dfbaad9a68 · 2021-12-05T18:03:26.000Z
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -228,7 +228,7 @@ Other enhancements
 - :meth:`.Rolling.var`, :meth:`.Expanding.var`, :meth:`.Rolling.std`, :meth:`.Expanding.std` now support `Numba <http://numba.pydata.org/>`_ execution with the ``engine`` keyword (:issue:`44461`)
 - :meth:`Series.info` has been added, for compatibility with :meth:`DataFrame.info` (:issue:`5167`)
 - :meth:`UInt64Index.map` now retains ``dtype`` where possible (:issue:`44609`)
--
+- :meth:`to_numeric` now preserves float64 arrays when downcasting would generate values not representable in float32 (:issue:`43693`)
 
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -352,7 +352,17 @@ def trans(x):
         and not is_bool_dtype(result.dtype)
         and not is_string_dtype(result.dtype)
     ):
-        return result.astype(dtype)
+        new_result = result.astype(dtype)
+
+        # Adjust tolerances based on floating point size
+        size_tols = {4: 5e-4, 8: 5e-8, 16: 5e-16}
+
+        atol = size_tols.get(new_result.dtype.itemsize, 0.0)
+
+        # Check downcast float values are still equal within 7 digits when
+        # converting from float64 to float32
+        if np.allclose(new_result, result, equal_nan=True, rtol=0.0, atol=atol):
+            return new_result
 
     return result
 
diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py
@@ -572,6 +572,14 @@ def test_downcast_limits(dtype, downcast, min_max):
     assert series.dtype == dtype
 
 
+def test_downcast_float64_to_float32():
+    # GH-43693: Check float64 preservation when >= 16,777,217
+    series = Series([16777217.0, np.finfo(np.float64).max, np.nan], dtype=np.float64)
+    result = to_numeric(series, downcast="float")
+
+    assert series.dtype == result.dtype
+
+
 @pytest.mark.parametrize(
     "ser,expected",
     [
@@ -762,6 +770,8 @@ def test_to_numeric_from_nullable_string(values, nullable_string_dtype, expected
         ([-1, -1], "Int32", "unsigned", "Int32"),
         ([1, 1], "Float64", "float", "Float32"),
         ([1, 1.1], "Float64", "float", "Float32"),
+        ([1, 1], "Float32", "float", "Float32"),
+        ([1, 1.1], "Float32", "float", "Float32"),
     ),
 )
 def test_downcast_nullable_numeric(data, input_dtype, downcast, expected_dtype):