API: Series(floaty, dtype=inty) (#49609)

jbrockmendel · web-flow · commit 00fa270c0553 · 2022-12-15T14:18:58.000+01:00
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -476,6 +476,7 @@ Other API changes
 - :func:`read_stata` with parameter ``index_col`` set to ``None`` (the default) will now set the index on the returned :class:`DataFrame` to a :class:`RangeIndex` instead of a :class:`Int64Index` (:issue:`49745`)
 - Changed behavior of :class:`Index`, :class:`Series`, and :class:`DataFrame` arithmetic methods when working with object-dtypes, the results no longer do type inference on the result of the array operations, use ``result.infer_objects()`` to do type inference on the result (:issue:`49999`)
 - Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`)
+- Changed behavior of :class:`Series` and :class:`DataFrame` constructors when given an integer dtype and floating-point data that is not round numbers, this now raises ``ValueError`` instead of silently retaining the float dtype; do ``Series(data)`` or ``DataFrame(data)`` to get the old behavior, and ``Series(data).astype(dtype)`` or ``DataFrame(data).astype(dtype)`` to get the specified dtype (:issue:`49599`)
 - Changed behavior of :meth:`DataFrame.shift` with ``axis=1``, an integer ``fill_value``, and homogeneous datetime-like dtype, this now fills new columns with integer dtypes instead of casting to datetimelike (:issue:`49842`)
 - Files are now closed when encountering an exception in :func:`read_json` (:issue:`49921`)
 - Changed behavior of :func:`read_csv`, :func:`read_json` & :func:`read_fwf`, where the index will now always be a :class:`RangeIndex`, when no index is specified. Previously the index would be a :class:`Index` with dtype ``object`` if the new DataFrame/Series has length 0 (:issue:`49572`)
diff --git a/pandas/core/construction.py b/pandas/core/construction.py
@@ -27,7 +27,6 @@
     DtypeObj,
     T,
 )
-from pandas.errors import IntCastingNaNError
 
 from pandas.core.dtypes.base import (
     ExtensionDtype,
@@ -46,7 +45,6 @@
     is_datetime64_ns_dtype,
     is_dtype_equal,
     is_extension_array_dtype,
-    is_float_dtype,
     is_integer_dtype,
     is_list_like,
     is_object_dtype,
@@ -503,7 +501,6 @@ def sanitize_array(
     copy: bool = False,
     *,
     allow_2d: bool = False,
-    strict_ints: bool = False,
 ) -> ArrayLike:
     """
     Sanitize input data to an ndarray or ExtensionArray, copy if specified,
@@ -517,8 +514,6 @@ def sanitize_array(
     copy : bool, default False
     allow_2d : bool, default False
         If False, raise if we have a 2D Arraylike.
-    strict_ints : bool, default False
-        If False, silently ignore failures to cast float data to int dtype.
 
     Returns
     -------
@@ -571,32 +566,7 @@ def sanitize_array(
         if isinstance(data, np.matrix):
             data = data.A
 
-        if dtype is not None and is_float_dtype(data.dtype) and is_integer_dtype(dtype):
-            # possibility of nan -> garbage
-            try:
-                # GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int
-                # casting aligning with IntCastingNaNError below
-                with np.errstate(invalid="ignore"):
-                    # GH#15832: Check if we are requesting a numeric dtype and
-                    # that we can convert the data to the requested dtype.
-                    subarr = maybe_cast_to_integer_array(data, dtype)
-
-            except IntCastingNaNError:
-                raise
-            except ValueError:
-                # Pre-2.0, we would have different behavior for Series vs DataFrame.
-                #  DataFrame would call np.array(data, dtype=dtype, copy=copy),
-                #  which would cast to the integer dtype even if the cast is lossy.
-                #  See GH#40110.
-                if strict_ints:
-                    raise
-
-                # We ignore the dtype arg and return floating values,
-                #  e.g. test_constructor_floating_data_int_dtype
-                # TODO: where is the discussion that documents the reason for this?
-                subarr = np.array(data, copy=copy)
-
-        elif dtype is None:
+        if dtype is None:
             subarr = data
             if data.dtype == object:
                 subarr = maybe_infer_to_datetimelike(data)
@@ -629,27 +599,8 @@ def sanitize_array(
             subarr = np.array([], dtype=np.float64)
 
         elif dtype is not None:
-            try:
-                subarr = _try_cast(data, dtype, copy)
-            except ValueError:
-                if is_integer_dtype(dtype):
-                    if strict_ints:
-                        raise
-                    casted = np.array(data, copy=False)
-                    if casted.dtype.kind == "f":
-                        # GH#40110 match the behavior we have if we passed
-                        #  a ndarray[float] to begin with
-                        return sanitize_array(
-                            casted,
-                            index,
-                            dtype,
-                            copy=False,
-                            allow_2d=allow_2d,
-                        )
-                    else:
-                        raise
-                else:
-                    raise
+            subarr = _try_cast(data, dtype, copy)
+
         else:
             subarr = maybe_convert_platform(data)
             if subarr.dtype == object:
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -499,7 +499,7 @@ def __new__(
                 data = com.asarray_tuplesafe(data, dtype=_dtype_obj)
 
         try:
-            arr = sanitize_array(data, None, dtype=dtype, copy=copy, strict_ints=True)
+            arr = sanitize_array(data, None, dtype=dtype, copy=copy)
         except ValueError as err:
             if "index must be specified when data is not list-like" in str(err):
                 raise cls._raise_scalar_data_error(data) from err
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -2706,11 +2706,12 @@ def test_floating_values_integer_dtype(self):
 
         arr = np.random.randn(10, 5)
 
-        # as of 2.0, we match Series behavior by retaining float dtype instead
-        #  of doing a lossy conversion here. Below we _do_ do the conversion
-        #  since it is lossless.
-        df = DataFrame(arr, dtype="i8")
-        assert (df.dtypes == "f8").all()
+        # GH#49599 in 2.0 we raise instead of either
+        #  a) silently ignoring dtype and returningfloat (the old Series behavior) or
+        #  b) rounding (the old DataFrame behavior)
+        msg = "Trying to coerce float values to integers"
+        with pytest.raises(ValueError, match=msg):
+            DataFrame(arr, dtype="i8")
 
         df = DataFrame(arr.round(), dtype="i8")
         assert (df.dtypes == "i8").all()
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
@@ -801,11 +801,13 @@ def test_constructor_floating_data_int_dtype(self, frame_or_series):
         #  not clear if this is what we want long-term
         expected = frame_or_series(arr)
 
-        res = frame_or_series(arr, dtype="i8")
-        tm.assert_equal(res, expected)
+        # GH#49599 as of 2.0 we raise instead of silently retaining float dtype
+        msg = "Trying to coerce float values to integer"
+        with pytest.raises(ValueError, match=msg):
+            frame_or_series(arr, dtype="i8")
 
-        res = frame_or_series(list(arr), dtype="i8")
-        tm.assert_equal(res, expected)
+        with pytest.raises(ValueError, match=msg):
+            frame_or_series(list(arr), dtype="i8")
 
         # pre-2.0, when we had NaNs, we silently ignored the integer dtype
         arr[0] = np.nan
@@ -815,7 +817,12 @@ def test_constructor_floating_data_int_dtype(self, frame_or_series):
         with pytest.raises(IntCastingNaNError, match=msg):
             frame_or_series(arr, dtype="i8")
 
-        with pytest.raises(IntCastingNaNError, match=msg):
+        exc = IntCastingNaNError
+        if frame_or_series is Series:
+            # TODO: try to align these
+            exc = ValueError
+            msg = "cannot convert float NaN to integer"
+        with pytest.raises(exc, match=msg):
             # same behavior if we pass list instead of the ndarray
             frame_or_series(list(arr), dtype="i8")
 
@@ -833,13 +840,14 @@ def test_constructor_coerce_float_fail(self, any_int_numpy_dtype):
         # see gh-15832
         # Updated: make sure we treat this list the same as we would treat
         #  the equivalent ndarray
+        # GH#49599 pre-2.0 we silently retained float dtype, in 2.0 we raise
         vals = [1, 2, 3.5]
 
-        res = Series(vals, dtype=any_int_numpy_dtype)
-        expected = Series(np.array(vals), dtype=any_int_numpy_dtype)
-        tm.assert_series_equal(res, expected)
-        alt = Series(np.array(vals))  # i.e. we ignore the dtype kwd
-        tm.assert_series_equal(alt, expected)
+        msg = "Trying to coerce float values to integer"
+        with pytest.raises(ValueError, match=msg):
+            Series(vals, dtype=any_int_numpy_dtype)
+        with pytest.raises(ValueError, match=msg):
+            Series(np.array(vals), dtype=any_int_numpy_dtype)
 
     def test_constructor_coerce_float_valid(self, float_numpy_dtype):
         s = Series([1, 2, 3.5], dtype=float_numpy_dtype)
@@ -853,9 +861,10 @@ def test_constructor_invalid_coerce_ints_with_float_nan(self, any_int_numpy_dtyp
         vals = [1, 2, np.nan]
         # pre-2.0 this would return with a float dtype, in 2.0 we raise
 
-        msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
-        with pytest.raises(IntCastingNaNError, match=msg):
+        msg = "cannot convert float NaN to integer"
+        with pytest.raises(ValueError, match=msg):
             Series(vals, dtype=any_int_numpy_dtype)
+        msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
         with pytest.raises(IntCastingNaNError, match=msg):
             Series(np.array(vals), dtype=any_int_numpy_dtype)
 
diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py
@@ -95,9 +95,10 @@ def test_construct_dask_float_array_int_dtype_match_ndarray():
     expected = Series(arr)
     tm.assert_series_equal(res, expected)
 
-    res = Series(darr, dtype="i8")
-    expected = Series(arr, dtype="i8")
-    tm.assert_series_equal(res, expected)
+    # GH#49599 in 2.0 we raise instead of silently ignoring the dtype
+    msg = "Trying to coerce float values to integers"
+    with pytest.raises(ValueError, match=msg):
+        Series(darr, dtype="i8")
 
     msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
     arr[2] = np.nan