From 2e41b35e37aca3f466b396e1ad4eaefca3f33aad Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Thu, 2 Jan 2025 19:03:34 -0500 Subject: [PATCH] BUG/TST (string dtype): raise proper TypeError in interpolate (#60637) * TST(string dtype): Resolve xfail for interpolate * Adjust arrow tests * Fixup for NumPyExtensionArray * Use tm.shares_memory (cherry picked from commit 5e50d3f3d2b0ee65f0d5bfda0c6da47ffd39dcfe) --- pandas/core/arrays/arrow/array.py | 3 +++ pandas/core/arrays/numpy_.py | 3 +++ pandas/tests/extension/test_arrow.py | 11 +++++++++++ pandas/tests/frame/methods/test_interpolate.py | 17 ++++++++++------- 4 files changed, 27 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 0c1e1d0c63c85..00992ade4f160 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -2150,6 +2150,9 @@ def interpolate( See NDFrame.interpolate.__doc__. """ # NB: we return type(self) even if copy=False + if not self.dtype._is_numeric: + raise TypeError(f"Cannot interpolate with {self.dtype} dtype") + mask = self.isna() if self.dtype.kind == "f": data = self._pa_array.to_numpy() diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 9f7238a97d808..07fa6254d87f3 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -287,6 +287,9 @@ def interpolate( See NDFrame.interpolate.__doc__. """ # NB: we return type(self) even if copy=False + if not self.dtype._is_numeric: + raise TypeError(f"Cannot interpolate with {self.dtype} dtype") + if not copy: out_data = self._ndarray else: diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 470ca0673c60e..d524ed5a16828 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3350,6 +3350,17 @@ def test_string_to_datetime_parsing_cast(): tm.assert_series_equal(result, expected) +@pytest.mark.skipif( + pa_version_under13p0, reason="pairwise_diff_checked not implemented in pyarrow" +) +def test_interpolate_not_numeric(data): + if not data.dtype._is_numeric: + ser = pd.Series(data) + msg = re.escape(f"Cannot interpolate with {ser.dtype} dtype") + with pytest.raises(TypeError, match=msg): + pd.Series(data).interpolate() + + def test_string_to_time_parsing_cast(): # GH 56463 string_times = ["11:41:43.076160"] diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index bbb5e59e4a274..ebee19e3de20a 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -69,11 +69,7 @@ def test_interpolate_inplace(self, frame_or_series, using_array_manager, request assert np.shares_memory(orig, obj.values) assert orig.squeeze()[1] == 1.5 - # TODO(infer_string) raise proper TypeError in case of string dtype - @pytest.mark.xfail( - using_string_dtype(), reason="interpolate doesn't work for string" - ) - def test_interp_basic(self, using_copy_on_write): + def test_interp_basic(self, using_copy_on_write, using_infer_string): df = DataFrame( { "A": [1, 2, np.nan, 4], @@ -90,6 +86,13 @@ def test_interp_basic(self, using_copy_on_write): "D": list("abcd"), } ) + if using_infer_string: + dtype = "str" if using_infer_string else "object" + msg = f"[Cc]annot interpolate with {dtype} dtype" + with pytest.raises(TypeError, match=msg): + df.interpolate() + return + msg = "DataFrame.interpolate with object dtype" with tm.assert_produces_warning(FutureWarning, match=msg): result = df.interpolate() @@ -111,8 +114,8 @@ def test_interp_basic(self, using_copy_on_write): tm.assert_frame_equal(df, expected) # check we DID operate inplace - assert np.shares_memory(df["C"]._values, cvalues) - assert np.shares_memory(df["D"]._values, dvalues) + assert tm.shares_memory(df["C"]._values, cvalues) + assert tm.shares_memory(df["D"]._values, dvalues) @pytest.mark.xfail( using_string_dtype(), reason="interpolate doesn't work for string"