Skip to content

Commit 4a73b8c

Browse files
jbrockmendelTLouf
authored andcommitted
DEPR: silent overflow on Series construction (pandas-dev#41734)
1 parent 17044a5 commit 4a73b8c

File tree

4 files changed

+32
-2
lines changed

4 files changed

+32
-2
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,7 @@ Deprecations
698698
- Deprecated passing arguments (apart from ``value``) as positional in :meth:`DataFrame.fillna` and :meth:`Series.fillna` (:issue:`41485`)
699699
- Deprecated passing arguments as positional in :meth:`DataFrame.reset_index` (other than ``"level"``) and :meth:`Series.reset_index` (:issue:`41485`)
700700
- Deprecated construction of :class:`Series` or :class:`DataFrame` with ``DatetimeTZDtype`` data and ``datetime64[ns]`` dtype. Use ``Series(data).dt.tz_localize(None)`` instead (:issue:`41555`,:issue:`33401`)
701+
- Deprecated behavior of :class:`Series` construction with large-integer values and small-integer dtype silently overflowing; use ``Series(data).astype(dtype)`` instead (:issue:`41734`)
701702
- Deprecated inference of ``timedelta64[ns]``, ``datetime64[ns]``, or ``DatetimeTZDtype`` dtypes in :class:`Series` construction when data containing strings is passed and no ``dtype`` is passed (:issue:`33558`)
702703
- In a future version, constructing :class:`Series` or :class:`DataFrame` with ``datetime64[ns]`` data and ``DatetimeTZDtype`` will treat the data as wall-times instead of as UTC times (matching DatetimeIndex behavior). To treat the data as UTC times, use ``pd.Series(data).dt.tz_localize("UTC").dt.tz_convert(dtype.tz)`` or ``pd.Series(data.view("int64"), dtype=dtype)`` (:issue:`33401`)
703704
- Deprecated passing arguments as positional in :meth:`DataFrame.set_axis` and :meth:`Series.set_axis` (other than ``"labels"``) (:issue:`41485`)

pandas/core/dtypes/cast.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -2036,7 +2036,7 @@ def construct_1d_ndarray_preserving_na(
20362036

20372037
def maybe_cast_to_integer_array(
20382038
arr: list | np.ndarray, dtype: np.dtype, copy: bool = False
2039-
):
2039+
) -> np.ndarray:
20402040
"""
20412041
Takes any dtype and returns the casted version, raising for when data is
20422042
incompatible with integer/unsigned integer dtypes.
@@ -2107,6 +2107,20 @@ def maybe_cast_to_integer_array(
21072107
if is_float_dtype(arr.dtype) or is_object_dtype(arr.dtype):
21082108
raise ValueError("Trying to coerce float values to integers")
21092109

2110+
if casted.dtype < arr.dtype:
2111+
# GH#41734 e.g. [1, 200, 923442] and dtype="int8" -> overflows
2112+
warnings.warn(
2113+
f"Values are too large to be losslessly cast to {dtype}. "
2114+
"In a future version this will raise OverflowError. To retain the "
2115+
f"old behavior, use pd.Series(values).astype({dtype})",
2116+
FutureWarning,
2117+
stacklevel=find_stack_level(),
2118+
)
2119+
return casted
2120+
2121+
# No known cases that get here, but raising explicitly to cover our bases.
2122+
raise ValueError(f"values cannot be losslessly cast to {dtype}")
2123+
21102124

21112125
def convert_scalar_for_putitemlike(scalar: Scalar, dtype: np.dtype) -> Scalar:
21122126
"""

pandas/tests/frame/test_stack_unstack.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ def test_unstack_preserve_dtypes(self):
358358
"E": Series([1.0, 50.0, 100.0]).astype("float32"),
359359
"F": Series([3.0, 4.0, 5.0]).astype("float64"),
360360
"G": False,
361-
"H": Series([1, 200, 923442], dtype="int8"),
361+
"H": Series([1, 200, 923442]).astype("int8"),
362362
}
363363
)
364364

pandas/tests/series/test_constructors.py

+15
Original file line numberDiff line numberDiff line change
@@ -711,6 +711,21 @@ def test_constructor_cast(self):
711711
with pytest.raises(ValueError, match=msg):
712712
Series(["a", "b", "c"], dtype=float)
713713

714+
def test_constructor_signed_int_overflow_deprecation(self):
715+
# GH#41734 disallow silent overflow
716+
msg = "Values are too large to be losslessly cast"
717+
with tm.assert_produces_warning(FutureWarning, match=msg):
718+
ser = Series([1, 200, 923442], dtype="int8")
719+
720+
expected = Series([1, -56, 50], dtype="int8")
721+
tm.assert_series_equal(ser, expected)
722+
723+
with tm.assert_produces_warning(FutureWarning, match=msg):
724+
ser = Series([1, 200, 923442], dtype="uint8")
725+
726+
expected = Series([1, 200, 50], dtype="uint8")
727+
tm.assert_series_equal(ser, expected)
728+
714729
def test_constructor_unsigned_dtype_overflow(self, uint_dtype):
715730
# see gh-15832
716731
msg = "Trying to coerce negative values to unsigned integers"

0 commit comments

Comments
 (0)