From d85ff1259d0e8285aea2ab123c64d5e7d5dace6b Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 30 Dec 2021 18:40:59 -0800 Subject: [PATCH 1/4] DEPR: Series([np.nan], dtype='i8') silently ignoring dtype --- doc/source/whatsnew/v1.4.0.rst | 2 ++ pandas/core/construction.py | 9 ++++++++ pandas/tests/frame/test_constructors.py | 29 +++++++++++++++++++----- pandas/tests/frame/test_stack_unstack.py | 2 +- pandas/tests/series/test_constructors.py | 6 +++-- 5 files changed, 39 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 3924191bebcfd..0df3391052685 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -619,8 +619,10 @@ Other Deprecations - Deprecated ``numeric_only=None`` in :meth:`DataFrame.rank`; in a future version ``numeric_only`` must be either ``True`` or ``False`` (the default) (:issue:`45036`) - Deprecated the behavior of :meth:`Timestamp.utcfromtimestamp`, in the future it will return a timezone-aware UTC :class:`Timestamp` (:issue:`22451`) - Deprecated :meth:`NaT.freq` (:issue:`45071`) +- Deprecated behavior of :class:`Series` and :class:`DataFrame` construction when passed float-dtype data containing ``NaN`` and an integer dtype ignoring the dtype argument; in a future version this will raise (:issue:`40110`) - + .. --------------------------------------------------------------------------- .. _whatsnew_140.performance: diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 17fa2d6e2f388..e496125683c09 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -535,6 +535,15 @@ def sanitize_array( try: subarr = _try_cast(data, dtype, copy, True) except IntCastingNaNError: + warnings.warn( + "In a future version, passing float-dtype values containing NaN " + "and an integer dtype will raise IntCastingNaNError " + "(subclass of ValueError) instead of silently ignoring the " + "passed dtype. To retain the old behavior, call Series(arr) or " + "DataFrame(arr) without passing a dtype.", + FutureWarning, + stacklevel=find_stack_level(), + ) subarr = np.array(data, copy=copy) except ValueError: if not raise_cast_failure: diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 7cf2721621a03..7f030fc11a20b 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -97,12 +97,14 @@ def test_constructor_dict_with_tzaware_scalar(self): def test_construct_ndarray_with_nas_and_int_dtype(self): # GH#26919 match Series by not casting np.nan to meaningless int arr = np.array([[1, np.nan], [2, 3]]) - df = DataFrame(arr, dtype="i8") + with tm.assert_produces_warning(FutureWarning): + df = DataFrame(arr, dtype="i8") assert df.values.dtype == arr.dtype assert isna(df.iloc[0, 1]) # check this matches Series behavior - ser = Series(arr[0], dtype="i8", name=0) + with tm.assert_produces_warning(FutureWarning): + ser = Series(arr[0], dtype="i8", name=0) expected = df.iloc[0] tm.assert_series_equal(ser, expected) @@ -937,7 +939,11 @@ def _check_basic_constructor(self, empty): assert len(frame.index) == 3 assert len(frame.columns) == 1 - frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.int64) + warn = None if empty is np.ones else FutureWarning + with tm.assert_produces_warning(warn): + frame = DataFrame( + mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.int64 + ) if empty is np.ones: # passing dtype casts assert frame.values.dtype == np.int64 @@ -1766,7 +1772,9 @@ def test_constructor_mix_series_nonseries(self, float_frame): DataFrame({"A": float_frame["A"], "B": list(float_frame["B"])[:-2]}) def test_constructor_miscast_na_int_dtype(self): - df = DataFrame([[np.nan, 1], [1, 0]], dtype=np.int64) + msg = "float-dtype values containing NaN and an integer dtype" + with tm.assert_produces_warning(FutureWarning, match=msg): + df = DataFrame([[np.nan, 1], [1, 0]], dtype=np.int64) expected = DataFrame([[np.nan, 1], [1, 0]]) tm.assert_frame_equal(df, expected) @@ -2713,10 +2721,19 @@ def test_floating_values_integer_dtype(self): # if they can be cast losslessly, no warning DataFrame(arr.round(), dtype="i8") - # with NaNs, we already have the correct behavior, so no warning + # with NaNs, we go through a different path with a different warning arr[0, 0] = np.nan - with tm.assert_produces_warning(None): + msg = "passing float-dtype values containing NaN" + with tm.assert_produces_warning(FutureWarning, match=msg): DataFrame(arr, dtype="i8") + with tm.assert_produces_warning(FutureWarning, match=msg): + Series(arr[0], dtype="i8") + # The future (raising) behavior matches what we would get via astype: + msg = r"Cannot convert non-finite values \(NA or inf\) to integer" + with pytest.raises(ValueError, match=msg): + DataFrame(arr).astype("i8") + with pytest.raises(ValueError, match=msg): + Series(arr[0]).astype("i8") class TestDataFrameConstructorWithDatetimeTZ: diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index eecae31bec914..1a91b1b609b32 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1046,7 +1046,7 @@ def _test_stack_with_multiindex(multiindex): names=[None, "Lower"], ), columns=Index(["B", "C"], name="Upper"), - dtype=df.dtypes[0], + # dtype=df.dtypes[0], ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 00a958f58cc93..7300b8f03ade6 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -652,8 +652,10 @@ def test_constructor_sanitize(self): s = Series(np.array([1.0, 1.0, 8.0]), dtype="i8") assert s.dtype == np.dtype("i8") - s = Series(np.array([1.0, 1.0, np.nan]), copy=True, dtype="i8") - assert s.dtype == np.dtype("f8") + msg = "float-dtype values containing NaN and an integer dtype" + with tm.assert_produces_warning(FutureWarning, match=msg): + ser = Series(np.array([1.0, 1.0, np.nan]), copy=True, dtype="i8") + assert ser.dtype == np.dtype("f8") def test_constructor_copy(self): # GH15125 From c0e7258e4cbb71d12452d060089dd28fe354defe Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 30 Dec 2021 18:43:09 -0800 Subject: [PATCH 2/4] remove commented-out --- pandas/tests/frame/test_stack_unstack.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 1a91b1b609b32..b1f308fbb1c27 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1046,7 +1046,6 @@ def _test_stack_with_multiindex(multiindex): names=[None, "Lower"], ), columns=Index(["B", "C"], name="Upper"), - # dtype=df.dtypes[0], ) tm.assert_frame_equal(result, expected) From 5e1e67df6e0cf4fce91785450f32d9f54b1225c5 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 30 Dec 2021 21:23:22 -0800 Subject: [PATCH 3/4] 32bit compat --- pandas/tests/frame/test_stack_unstack.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index b1f308fbb1c27..a1e392b62c685 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1047,6 +1047,7 @@ def _test_stack_with_multiindex(multiindex): ), columns=Index(["B", "C"], name="Upper"), ) + expected[0] = expected[0].astype(df.dtypes[0]) # need on 32bit builds tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("ordered", [False, True]) From 49f21f98435bc078e91efd3c8b751660e4ef9e61 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 31 Dec 2021 07:59:07 -0800 Subject: [PATCH 4/4] typo fixup --- pandas/tests/frame/test_stack_unstack.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index a1e392b62c685..e73885ebcc2c8 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1047,7 +1047,7 @@ def _test_stack_with_multiindex(multiindex): ), columns=Index(["B", "C"], name="Upper"), ) - expected[0] = expected[0].astype(df.dtypes[0]) # need on 32bit builds + expected["B"] = expected["B"].astype(df.dtypes[0]) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("ordered", [False, True])