Skip to content

Commit f81f687

Browse files
authored
DEPR: Enforce Series(float_with_nan, dtype=inty) (#49605)
* DEPR: Enforce Series(float_with_nan, dtype=inty) * update asv * troubleshoot asv * suggested asv edit
1 parent f3c46cd commit f81f687

File tree

6 files changed

+68
-82
lines changed

6 files changed

+68
-82
lines changed

asv_bench/benchmarks/groupby.py

+20-16
Original file line numberDiff line numberDiff line change
@@ -600,31 +600,35 @@ def time_frame_agg(self, dtype, method):
600600

601601

602602
class Cumulative:
603-
param_names = ["dtype", "method"]
603+
param_names = ["dtype", "method", "with_nans"]
604604
params = [
605605
["float64", "int64", "Float64", "Int64"],
606606
["cummin", "cummax", "cumsum"],
607+
[True, False],
607608
]
608609

609-
def setup(self, dtype, method):
610+
def setup(self, dtype, method, with_nans):
611+
if with_nans and dtype == "int64":
612+
raise NotImplementedError("Construction of df would raise")
613+
610614
N = 500_000
611-
vals = np.random.randint(-10, 10, (N, 5))
612-
null_vals = vals.astype(float, copy=True)
613-
null_vals[::2, :] = np.nan
614-
null_vals[::3, :] = np.nan
615-
df = DataFrame(vals, columns=list("abcde"), dtype=dtype)
616-
null_df = DataFrame(null_vals, columns=list("abcde"), dtype=dtype)
617615
keys = np.random.randint(0, 100, size=N)
618-
df["key"] = keys
619-
null_df["key"] = keys
620-
self.df = df
621-
self.null_df = null_df
616+
vals = np.random.randint(-10, 10, (N, 5))
622617

623-
def time_frame_transform(self, dtype, method):
624-
self.df.groupby("key").transform(method)
618+
if with_nans:
619+
null_vals = vals.astype(float, copy=True)
620+
null_vals[::2, :] = np.nan
621+
null_vals[::3, :] = np.nan
622+
df = DataFrame(null_vals, columns=list("abcde"), dtype=dtype)
623+
df["key"] = keys
624+
self.df = df
625+
else:
626+
df = DataFrame(vals, columns=list("abcde")).astype(dtype, copy=False)
627+
df["key"] = keys
628+
self.df = df
625629

626-
def time_frame_transform_many_nulls(self, dtype, method):
627-
self.null_df.groupby("key").transform(method)
630+
def time_frame_transform(self, dtype, method, with_nans):
631+
self.df.groupby("key").transform(method)
628632

629633

630634
class RankWithTies:

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,7 @@ Removal of prior version deprecations/changes
492492
- Changed behavior of :class:`Index` constructor when passed a ``SparseArray`` or ``SparseDtype`` to retain that dtype instead of casting to ``numpy.ndarray`` (:issue:`43930`)
493493
- Changed behavior of setitem-like operations (``__setitem__``, ``fillna``, ``where``, ``mask``, ``replace``, ``insert``, fill_value for ``shift``) on an object with :class:`DatetimeTZDtype` when using a value with a non-matching timezone, the value will be cast to the object's timezone instead of casting both to object-dtype (:issue:`44243`)
494494
- Changed behavior of :class:`Index`, :class:`Series`, :class:`DataFrame` constructors with floating-dtype data and a :class:`DatetimeTZDtype`, the data are now interpreted as UTC-times instead of wall-times, consistent with how integer-dtype data are treated (:issue:`45573`)
495+
- Changed behavior of :class:`Series` and :class:`DataFrame` constructors with integer dtype and floating-point data containing ``NaN``, this now raises ``IntCastingNaNError`` (:issue:`40110`)
495496
- Removed the deprecated ``base`` and ``loffset`` arguments from :meth:`pandas.DataFrame.resample`, :meth:`pandas.Series.resample` and :class:`pandas.Grouper`. Use ``offset`` or ``origin`` instead (:issue:`31809`)
496497
- Changed behavior of :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype and an incompatible ``fill_value``; this now casts to ``object`` dtype instead of raising, consistent with the behavior with other dtypes (:issue:`45746`)
497498
- Change the default argument of ``regex`` for :meth:`Series.str.replace` from ``True`` to ``False``. Additionally, a single character ``pat`` with ``regex=True`` is now treated as a regular expression instead of a string literal. (:issue:`36695`, :issue:`24804`)

pandas/core/construction.py

+1-12
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
cast,
1515
overload,
1616
)
17-
import warnings
1817

1918
import numpy as np
2019
from numpy import ma
@@ -29,7 +28,6 @@
2928
T,
3029
)
3130
from pandas.errors import IntCastingNaNError
32-
from pandas.util._exceptions import find_stack_level
3331

3432
from pandas.core.dtypes.base import (
3533
ExtensionDtype,
@@ -577,16 +575,7 @@ def sanitize_array(
577575
subarr = maybe_cast_to_integer_array(data, dtype)
578576

579577
except IntCastingNaNError:
580-
warnings.warn(
581-
"In a future version, passing float-dtype values containing NaN "
582-
"and an integer dtype will raise IntCastingNaNError "
583-
"(subclass of ValueError) instead of silently ignoring the "
584-
"passed dtype. To retain the old behavior, call Series(arr) or "
585-
"DataFrame(arr) without passing a dtype.",
586-
FutureWarning,
587-
stacklevel=find_stack_level(),
588-
)
589-
subarr = np.array(data, copy=copy)
578+
raise
590579
except ValueError:
591580
# Pre-2.0, we would have different behavior for Series vs DataFrame.
592581
# DataFrame would call np.array(data, dtype=dtype, copy=copy),

pandas/tests/frame/test_constructors.py

+21-29
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import pytest
1919
import pytz
2020

21+
from pandas.errors import IntCastingNaNError
2122
import pandas.util._test_decorators as td
2223

2324
from pandas.core.dtypes.common import is_integer_dtype
@@ -105,16 +106,13 @@ def test_constructor_dict_with_tzaware_scalar(self):
105106
def test_construct_ndarray_with_nas_and_int_dtype(self):
106107
# GH#26919 match Series by not casting np.nan to meaningless int
107108
arr = np.array([[1, np.nan], [2, 3]])
108-
with tm.assert_produces_warning(FutureWarning):
109-
df = DataFrame(arr, dtype="i8")
110-
assert df.values.dtype == arr.dtype
111-
assert isna(df.iloc[0, 1])
109+
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
110+
with pytest.raises(IntCastingNaNError, match=msg):
111+
DataFrame(arr, dtype="i8")
112112

113113
# check this matches Series behavior
114-
with tm.assert_produces_warning(FutureWarning):
115-
ser = Series(arr[0], dtype="i8", name=0)
116-
expected = df.iloc[0]
117-
tm.assert_series_equal(ser, expected)
114+
with pytest.raises(IntCastingNaNError, match=msg):
115+
Series(arr[0], dtype="i8", name=0)
118116

119117
def test_construct_from_list_of_datetimes(self):
120118
df = DataFrame([datetime.now(), datetime.now()])
@@ -966,21 +964,16 @@ def _check_basic_constructor(self, empty):
966964
assert len(frame.index) == 3
967965
assert len(frame.columns) == 1
968966

969-
warn = None if empty is np.ones else FutureWarning
970-
with tm.assert_produces_warning(warn):
967+
if empty is not np.ones:
968+
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
969+
with pytest.raises(IntCastingNaNError, match=msg):
970+
DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.int64)
971+
return
972+
else:
971973
frame = DataFrame(
972974
mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.int64
973975
)
974-
if empty is np.ones:
975-
# passing dtype casts
976976
assert frame.values.dtype == np.int64
977-
else:
978-
# i.e. ma.masked_all
979-
# Since we have NaNs, refuse to cast to int dtype, which would take NaN
980-
# to meaningless integers. This matches Series behavior. GH#26919
981-
assert frame.isna().all().all()
982-
assert frame.values.dtype == np.float64
983-
assert isna(frame.values).all()
984977

985978
# wrong size axis labels
986979
msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)"
@@ -1741,11 +1734,10 @@ def test_constructor_mix_series_nonseries(self, float_frame):
17411734
DataFrame({"A": float_frame["A"], "B": list(float_frame["B"])[:-2]})
17421735

17431736
def test_constructor_miscast_na_int_dtype(self):
1744-
msg = "float-dtype values containing NaN and an integer dtype"
1745-
with tm.assert_produces_warning(FutureWarning, match=msg):
1746-
df = DataFrame([[np.nan, 1], [1, 0]], dtype=np.int64)
1747-
expected = DataFrame([[np.nan, 1], [1, 0]])
1748-
tm.assert_frame_equal(df, expected)
1737+
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
1738+
1739+
with pytest.raises(IntCastingNaNError, match=msg):
1740+
DataFrame([[np.nan, 1], [1, 0]], dtype=np.int64)
17491741

17501742
def test_constructor_column_duplicates(self):
17511743
# it works! #2079
@@ -2722,16 +2714,16 @@ def test_floating_values_integer_dtype(self):
27222714

27232715
# with NaNs, we go through a different path with a different warning
27242716
arr[0, 0] = np.nan
2725-
msg = "passing float-dtype values containing NaN"
2726-
with tm.assert_produces_warning(FutureWarning, match=msg):
2717+
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
2718+
with pytest.raises(IntCastingNaNError, match=msg):
27272719
DataFrame(arr, dtype="i8")
2728-
with tm.assert_produces_warning(FutureWarning, match=msg):
2720+
with pytest.raises(IntCastingNaNError, match=msg):
27292721
Series(arr[0], dtype="i8")
27302722
# The future (raising) behavior matches what we would get via astype:
27312723
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
2732-
with pytest.raises(ValueError, match=msg):
2724+
with pytest.raises(IntCastingNaNError, match=msg):
27332725
DataFrame(arr).astype("i8")
2734-
with pytest.raises(ValueError, match=msg):
2726+
with pytest.raises(IntCastingNaNError, match=msg):
27352727
Series(arr[0]).astype("i8")
27362728

27372729

pandas/tests/series/test_constructors.py

+18-19
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
lib,
1616
)
1717
from pandas.compat import is_numpy_dev
18+
from pandas.errors import IntCastingNaNError
1819
import pandas.util._test_decorators as td
1920

2021
from pandas.core.dtypes.common import (
@@ -670,10 +671,9 @@ def test_constructor_sanitize(self):
670671
s = Series(np.array([1.0, 1.0, 8.0]), dtype="i8")
671672
assert s.dtype == np.dtype("i8")
672673

673-
msg = "float-dtype values containing NaN and an integer dtype"
674-
with tm.assert_produces_warning(FutureWarning, match=msg):
675-
ser = Series(np.array([1.0, 1.0, np.nan]), copy=True, dtype="i8")
676-
assert ser.dtype == np.dtype("f8")
674+
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
675+
with pytest.raises(IntCastingNaNError, match=msg):
676+
Series(np.array([1.0, 1.0, np.nan]), copy=True, dtype="i8")
677677

678678
def test_constructor_copy(self):
679679
# GH15125
@@ -809,18 +809,17 @@ def test_constructor_floating_data_int_dtype(self, frame_or_series):
809809
res = frame_or_series(list(arr), dtype="i8")
810810
tm.assert_equal(res, expected)
811811

812-
# When we have NaNs, we silently ignore the integer dtype
812+
# pre-2.0, when we had NaNs, we silently ignored the integer dtype
813813
arr[0] = np.nan
814814
expected = frame_or_series(arr)
815-
msg = "passing float-dtype values containing NaN and an integer dtype"
816-
with tm.assert_produces_warning(FutureWarning, match=msg):
817-
obj = frame_or_series(arr, dtype="i8")
818-
tm.assert_equal(obj, expected)
819815

820-
with tm.assert_produces_warning(FutureWarning, match=msg):
816+
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
817+
with pytest.raises(IntCastingNaNError, match=msg):
818+
frame_or_series(arr, dtype="i8")
819+
820+
with pytest.raises(IntCastingNaNError, match=msg):
821821
# same behavior if we pass list instead of the ndarray
822-
obj = frame_or_series(list(arr), dtype="i8")
823-
tm.assert_equal(obj, expected)
822+
frame_or_series(list(arr), dtype="i8")
824823

825824
# float array that can be losslessly cast to integers
826825
arr = np.array([1.0, 2.0], dtype="float64")
@@ -854,13 +853,13 @@ def test_constructor_invalid_coerce_ints_with_float_nan(self, any_int_numpy_dtyp
854853
# Updated: make sure we treat this list the same as we would treat the
855854
# equivalent ndarray
856855
vals = [1, 2, np.nan]
857-
msg = "In a future version, passing float-dtype values containing NaN"
858-
with tm.assert_produces_warning(FutureWarning, match=msg):
859-
res = Series(vals, dtype=any_int_numpy_dtype)
860-
with tm.assert_produces_warning(FutureWarning, match=msg):
861-
expected = Series(np.array(vals), dtype=any_int_numpy_dtype)
862-
tm.assert_series_equal(res, expected)
863-
assert np.isnan(expected.iloc[-1])
856+
# pre-2.0 this would return with a float dtype, in 2.0 we raise
857+
858+
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
859+
with pytest.raises(IntCastingNaNError, match=msg):
860+
Series(vals, dtype=any_int_numpy_dtype)
861+
with pytest.raises(IntCastingNaNError, match=msg):
862+
Series(np.array(vals), dtype=any_int_numpy_dtype)
864863

865864
def test_constructor_dtype_no_cast(self):
866865
# see gh-1572

pandas/tests/test_downstream.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import numpy as np
99
import pytest
1010

11+
from pandas.errors import IntCastingNaNError
1112
import pandas.util._test_decorators as td
1213

1314
import pandas as pd
@@ -100,13 +101,13 @@ def test_construct_dask_float_array_int_dtype_match_ndarray():
100101
expected = Series(arr, dtype="i8")
101102
tm.assert_series_equal(res, expected)
102103

103-
msg = "In a future version, passing float-dtype values containing NaN"
104+
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
104105
arr[2] = np.nan
105-
with tm.assert_produces_warning(FutureWarning, match=msg):
106-
res = Series(darr, dtype="i8")
107-
with tm.assert_produces_warning(FutureWarning, match=msg):
108-
expected = Series(arr, dtype="i8")
109-
tm.assert_series_equal(res, expected)
106+
with pytest.raises(IntCastingNaNError, match=msg):
107+
Series(darr, dtype="i8")
108+
# which is the same as we get with a numpy input
109+
with pytest.raises(IntCastingNaNError, match=msg):
110+
Series(arr, dtype="i8")
110111

111112

112113
def test_xarray(df):

0 commit comments

Comments
 (0)