From 9f9eb18f49f29808bcc63ee9ce0c41471b330d89 Mon Sep 17 00:00:00 2001 From: John Bencina Date: Sun, 5 Dec 2021 17:49:10 +0000 Subject: [PATCH] ENH: Add equivalence test for float in to_numeric() GH43693 --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/dtypes/cast.py | 12 +++++++++++- pandas/tests/tools/test_to_numeric.py | 10 ++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 290f2e0ae08b6..045466ba9192e 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -31,6 +31,7 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ - Improved the rendering of ``categories`` in :class:`CategoricalIndex` (:issue:`45218`) +- :meth:`to_numeric` now preserves float64 arrays when downcasting would generate values not representable in float32 (:issue:`43693`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 8f7a20949a831..6466fc8db341b 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -357,7 +357,17 @@ def trans(x): and not is_bool_dtype(result.dtype) and not is_string_dtype(result.dtype) ): - return result.astype(dtype) + new_result = result.astype(dtype) + + # Adjust tolerances based on floating point size + size_tols = {4: 5e-4, 8: 5e-8, 16: 5e-16} + + atol = size_tols.get(new_result.dtype.itemsize, 0.0) + + # Check downcast float values are still equal within 7 digits when + # converting from float64 to float32 + if np.allclose(new_result, result, equal_nan=True, rtol=0.0, atol=atol): + return new_result return result diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index 643a5617abbeb..b4db174c271d4 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -572,6 +572,14 @@ def test_downcast_limits(dtype, downcast, min_max): assert series.dtype == dtype +def test_downcast_float64_to_float32(): + # GH-43693: Check float64 preservation when >= 16,777,217 + series = Series([16777217.0, np.finfo(np.float64).max, np.nan], dtype=np.float64) + result = to_numeric(series, downcast="float") + + assert series.dtype == result.dtype + + @pytest.mark.parametrize( "ser,expected", [ @@ -762,6 +770,8 @@ def test_to_numeric_from_nullable_string(values, nullable_string_dtype, expected ([-1, -1], "Int32", "unsigned", "Int32"), ([1, 1], "Float64", "float", "Float32"), ([1, 1.1], "Float64", "float", "Float32"), + ([1, 1], "Float32", "float", "Float32"), + ([1, 1.1], "Float32", "float", "Float32"), ), ) def test_downcast_nullable_numeric(data, input_dtype, downcast, expected_dtype):