From fb51b69907d7d67464148adb55fd7882e2b693c0 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Tue, 12 Mar 2024 10:18:18 +0100 Subject: [PATCH 1/8] remove interpolate with object dtype --- pandas/core/generic.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a7a69a6b835fb..427683907e7da 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7848,13 +7848,8 @@ def interpolate( if np.any(obj.dtypes == object): # GH#53631 if not (obj.ndim == 2 and np.all(obj.dtypes == object)): - # don't warn in cases that already raise - warnings.warn( - f"{type(self).__name__}.interpolate with object dtype is " - "deprecated and will raise in a future version. Call " - "obj.infer_objects(copy=False) before interpolating instead.", - FutureWarning, - stacklevel=find_stack_level(), + raise TypeError( + f"{type(self).__name__} cannot interpolate with object dtype." ) if method in fillna_methods and "fill_value" in kwargs: From b2ee587b4268473743a2374efcb4a1f0837b5aa8 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Tue, 12 Mar 2024 14:24:01 +0100 Subject: [PATCH 2/8] enforce deprecation interpolate with object dtype, correct tests --- pandas/tests/copy_view/test_interp_fillna.py | 16 ++----- .../tests/frame/methods/test_interpolate.py | 43 +++++++------------ .../tests/series/methods/test_interpolate.py | 6 +-- 3 files changed, 22 insertions(+), 43 deletions(-) diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index e88896c9ec8c2..8fe58e59b9cfd 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -111,20 +111,12 @@ def test_interp_fill_functions_inplace(func, dtype): assert view._mgr._has_no_reference(0) -def test_interpolate_cleaned_fill_method(): - # Check that "method is set to None" case works correctly +def test_interpolate_cannot_with_object_dtype(): df = DataFrame({"a": ["a", np.nan, "c"], "b": 1}) - df_orig = df.copy() - - msg = "DataFrame.interpolate with object dtype" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.interpolate(method="linear") - assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) - result.iloc[0, 0] = Timestamp("2021-12-31") - - assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) - tm.assert_frame_equal(df, df_orig) + msg = "DataFrame cannot interpolate with object dtype" + with pytest.raises(TypeError, match=msg): + df.interpolate() def test_interpolate_object_convert_no_op(): diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index 73ba3545eaadb..e26df3f8929b2 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -76,29 +76,14 @@ def test_interp_basic(self): "D": list("abcd"), } ) - expected = DataFrame( - { - "A": [1.0, 2.0, 3.0, 4.0], - "B": [1.0, 4.0, 9.0, 9.0], - "C": [1, 2, 3, 5], - "D": list("abcd"), - } - ) - msg = "DataFrame.interpolate with object dtype" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.interpolate() - tm.assert_frame_equal(result, expected) - - # check we didn't operate inplace GH#45791 + msg = "DataFrame cannot interpolate with object dtype" + with pytest.raises(TypeError, match=msg): + df.interpolate() + cvalues = df["C"]._values dvalues = df["D"].values - assert np.shares_memory(cvalues, result["C"]._values) - assert np.shares_memory(dvalues, result["D"]._values) - - with tm.assert_produces_warning(FutureWarning, match=msg): - res = df.interpolate(inplace=True) - assert res is None - tm.assert_frame_equal(df, expected) + with pytest.raises(TypeError, match=msg): + df.interpolate(inplace=True) # check we DID operate inplace assert np.shares_memory(df["C"]._values, cvalues) @@ -117,14 +102,16 @@ def test_interp_basic_with_non_range_index(self, using_infer_string): } ) - msg = "DataFrame.interpolate with object dtype" - warning = FutureWarning if not using_infer_string else None - with tm.assert_produces_warning(warning, match=msg): + msg = "DataFrame cannot interpolate with object dtype" + if not using_infer_string: + with pytest.raises(TypeError, match=msg): + df.set_index("C").interpolate() + else: result = df.set_index("C").interpolate() - expected = df.set_index("C") - expected.loc[3, "A"] = 3 - expected.loc[5, "B"] = 9 - tm.assert_frame_equal(result, expected) + expected = df.set_index("C") + expected.loc[3, "A"] = 3 + expected.loc[5, "B"] = 9 + tm.assert_frame_equal(result, expected) def test_interp_empty(self): # https://github.com/pandas-dev/pandas/issues/35598 diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index db101d87a282f..e4726f3ec6b32 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -846,10 +846,10 @@ def test_interpolate_unsorted_index(self, ascending, expected_values): def test_interpolate_asfreq_raises(self): ser = Series(["a", None, "b"], dtype=object) - msg2 = "Series.interpolate with object dtype" + msg2 = "Series cannot interpolate with object dtype" msg = "Invalid fill method" - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=msg2): + with pytest.raises(TypeError, match=msg2): + with pytest.raises(ValueError, match=msg): ser.interpolate(method="asfreq") def test_interpolate_fill_value(self): From 3ae7d759d851e9195bfd71e244470e0d1a73968f Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Tue, 12 Mar 2024 14:33:23 +0100 Subject: [PATCH 3/8] fix ruff error --- pandas/tests/frame/methods/test_interpolate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index e26df3f8929b2..2dcfe826569f8 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -79,7 +79,7 @@ def test_interp_basic(self): msg = "DataFrame cannot interpolate with object dtype" with pytest.raises(TypeError, match=msg): df.interpolate() - + cvalues = df["C"]._values dvalues = df["D"].values with pytest.raises(TypeError, match=msg): From cb2179f2c2bf822a8da0f787817c04a3dc5450d0 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Tue, 12 Mar 2024 14:36:51 +0100 Subject: [PATCH 4/8] add a note to v3.0.0 --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 391553909383b..ba99c304c760e 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -201,6 +201,7 @@ Removal of prior version deprecations/changes - Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`) - Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`) - Enforced deprecation of :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` allowing the ``name`` argument to be a non-tuple when grouping by a list of length 1 (:issue:`54155`) +- Enforced deprecation of :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` for object-dtype (:issue:`57820`) - Enforced deprecation of ``axis=None`` acting the same as ``axis=0`` in the DataFrame reductions ``sum``, ``prod``, ``std``, ``var``, and ``sem``, passing ``axis=None`` will now reduce over both axes; this is particularly the case when doing e.g. ``numpy.sum(df)`` (:issue:`21597`) - Enforced deprecation of passing a dictionary to :meth:`SeriesGroupBy.agg` (:issue:`52268`) - Enforced deprecation of string ``AS`` denoting frequency in :class:`YearBegin` and strings ``AS-DEC``, ``AS-JAN``, etc. denoting annual frequencies with various fiscal year starts (:issue:`57793`) From ea76776a61c66d68b07c576a5ab7a400f7f5d03f Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Wed, 13 Mar 2024 23:17:00 +0100 Subject: [PATCH 5/8] combine two conditions --- pandas/core/generic.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 427683907e7da..94065ba94ecb8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7845,12 +7845,13 @@ def interpolate( obj, should_transpose = self, False else: obj, should_transpose = (self.T, True) if axis == 1 else (self, False) - if np.any(obj.dtypes == object): - # GH#53631 - if not (obj.ndim == 2 and np.all(obj.dtypes == object)): - raise TypeError( - f"{type(self).__name__} cannot interpolate with object dtype." - ) + # GH#53631 + if np.any(obj.dtypes == object) and ( + obj.ndim == 1 or not np.all(obj.dtypes == object) + ): + raise TypeError( + f"{type(self).__name__} cannot interpolate with object dtype." + ) if method in fillna_methods and "fill_value" in kwargs: raise ValueError( From 823e467f937c22b22567e2517c38bea6a3943b31 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 14 Mar 2024 22:15:56 +0100 Subject: [PATCH 6/8] change blocks of if statements to avoid duplicate checks --- pandas/core/generic.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 94065ba94ecb8..3073c61f51161 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7846,12 +7846,21 @@ def interpolate( else: obj, should_transpose = (self.T, True) if axis == 1 else (self, False) # GH#53631 - if np.any(obj.dtypes == object) and ( - obj.ndim == 1 or not np.all(obj.dtypes == object) - ): + if obj.ndim == 1 and obj.dtype == object: raise TypeError( f"{type(self).__name__} cannot interpolate with object dtype." ) + if obj.ndim == 2: + if np.all(obj.dtypes == object): + raise TypeError( + "Cannot interpolate with all object-dtype columns " + "in the DataFrame. Try setting at least one " + "column to a numeric dtype." + ) + elif np.any(obj.dtypes == object): + raise TypeError( + f"{type(self).__name__} cannot interpolate with object dtype." + ) if method in fillna_methods and "fill_value" in kwargs: raise ValueError( @@ -7867,13 +7876,6 @@ def interpolate( limit_direction = missing.infer_limit_direction(limit_direction, method) - if obj.ndim == 2 and np.all(obj.dtypes == object): - raise TypeError( - "Cannot interpolate with all object-dtype columns " - "in the DataFrame. Try setting at least one " - "column to a numeric dtype." - ) - if method.lower() in fillna_methods: # TODO(3.0): remove this case # TODO: warn/raise on limit_direction or kwargs which are ignored? From e3c56a8ccc262249dc6f72a1c80592325f46f449 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 15 Mar 2024 13:34:55 +0100 Subject: [PATCH 7/8] replace err msg containing 'Try setting at least one column to a numeric dtype' --- pandas/core/generic.py | 15 +++------------ pandas/tests/frame/methods/test_interpolate.py | 12 ++---------- 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3073c61f51161..611135f270fb4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7846,21 +7846,12 @@ def interpolate( else: obj, should_transpose = (self.T, True) if axis == 1 else (self, False) # GH#53631 - if obj.ndim == 1 and obj.dtype == object: + if (obj.ndim == 1 and obj.dtype == object) or ( + obj.ndim == 2 and np.any(obj.dtypes == object) + ): raise TypeError( f"{type(self).__name__} cannot interpolate with object dtype." ) - if obj.ndim == 2: - if np.all(obj.dtypes == object): - raise TypeError( - "Cannot interpolate with all object-dtype columns " - "in the DataFrame. Try setting at least one " - "column to a numeric dtype." - ) - elif np.any(obj.dtypes == object): - raise TypeError( - f"{type(self).__name__} cannot interpolate with object dtype." - ) if method in fillna_methods and "fill_value" in kwargs: raise ValueError( diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index 2dcfe826569f8..2ba3bbd3109a2 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -302,22 +302,14 @@ def test_interp_raise_on_only_mixed(self, axis): "E": [1, 2, 3, 4], } ) - msg = ( - "Cannot interpolate with all object-dtype columns " - "in the DataFrame. Try setting at least one " - "column to a numeric dtype." - ) + msg = "DataFrame cannot interpolate with object dtype" with pytest.raises(TypeError, match=msg): df.astype("object").interpolate(axis=axis) def test_interp_raise_on_all_object_dtype(self): # GH 22985 df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, dtype="object") - msg = ( - "Cannot interpolate with all object-dtype columns " - "in the DataFrame. Try setting at least one " - "column to a numeric dtype." - ) + msg = "DataFrame cannot interpolate with object dtype" with pytest.raises(TypeError, match=msg): df.interpolate() From 05f805c8eda3105ead228a311b18fdf7df950d3d Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 15 Mar 2024 14:33:49 +0100 Subject: [PATCH 8/8] simplify if condition --- pandas/core/generic.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 611135f270fb4..ba1b5d5e1f040 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7846,9 +7846,7 @@ def interpolate( else: obj, should_transpose = (self.T, True) if axis == 1 else (self, False) # GH#53631 - if (obj.ndim == 1 and obj.dtype == object) or ( - obj.ndim == 2 and np.any(obj.dtypes == object) - ): + if np.any(obj.dtypes == object): raise TypeError( f"{type(self).__name__} cannot interpolate with object dtype." )