From be587d5443a8d66f011eecdbe38cff35766fafd7 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 26 Nov 2023 00:18:27 +0100 Subject: [PATCH 1/4] CoW: Fix warnings for eval --- pandas/core/computation/eval.py | 15 ++++----------- pandas/core/generic.py | 5 ++++- pandas/tests/computation/test_eval.py | 12 ++++-------- 3 files changed, 12 insertions(+), 20 deletions(-) diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index 0c99e5e7bdc54..f1fe528de06f8 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -388,17 +388,10 @@ def eval( # we will ignore numpy warnings here; e.g. if trying # to use a non-numeric indexer try: - with warnings.catch_warnings(record=True): - warnings.filterwarnings( - "always", "Setting a value on a view", FutureWarning - ) - # TODO: Filter the warnings we actually care about here. - if inplace and isinstance(target, NDFrame): - target.loc[:, assigner] = ret - else: - target[ # pyright: ignore[reportGeneralTypeIssues] - assigner - ] = ret + if inplace and isinstance(target, NDFrame): + target.loc[:, assigner] = ret + else: + target[assigner] = ret # pyright: ignore[reportGeneralTypeIssues] except (TypeError, IndexError) as err: raise ValueError("Cannot assign expression output to target") from err diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e4e95a973a3c1..a66269cbf3f8f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -637,12 +637,15 @@ def _get_cleaned_column_resolvers(self) -> dict[Hashable, Series]: Used in :meth:`DataFrame.eval`. """ from pandas.core.computation.parsing import clean_column_name + from pandas.core.series import Series if isinstance(self, ABCSeries): return {clean_column_name(self.name): self} return { - clean_column_name(k): v for k, v in self.items() if not isinstance(k, int) + clean_column_name(k): Series(v._values, copy=False, index=v.index) + for k, v in self.items() + if not isinstance(k, int) } @final diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index fe49446424de1..bc46de43d765b 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -1164,9 +1164,7 @@ def test_assignment_single_assign_new(self): df.eval("c = a + b", inplace=True) tm.assert_frame_equal(df, expected) - # TODO(CoW-warn) this should not warn (DataFrame.eval creates refs to self) - @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") - def test_assignment_single_assign_local_overlap(self, warn_copy_on_write): + def test_assignment_single_assign_local_overlap(self): df = DataFrame( np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab") ) @@ -1220,8 +1218,6 @@ def test_column_in(self): tm.assert_series_equal(result, expected, check_names=False) @pytest.mark.xfail(reason="Unknown: Omitted test_ in name prior.") - # TODO(CoW-warn) this should not warn (DataFrame.eval creates refs to self) - @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") def test_assignment_not_inplace(self): # see gh-9297 df = DataFrame( @@ -1235,7 +1231,7 @@ def test_assignment_not_inplace(self): expected["c"] = expected["a"] + expected["b"] tm.assert_frame_equal(df, expected) - def test_multi_line_expression(self): + def test_multi_line_expression(self, warn_copy_on_write): # GH 11149 df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) expected = df.copy() @@ -1908,8 +1904,8 @@ def test_set_inplace(using_copy_on_write, warn_copy_on_write): df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) result_view = df[:] ser = df["A"] - # with tm.assert_cow_warning(warn_copy_on_write): - df.eval("A = B + C", inplace=True) + with tm.assert_cow_warning(warn_copy_on_write): + df.eval("A = B + C", inplace=True) expected = DataFrame({"A": [11, 13, 15], "B": [4, 5, 6], "C": [7, 8, 9]}) tm.assert_frame_equal(df, expected) if not using_copy_on_write: From b92bd60f8ab8185e250f10ddd522cd71118f349a Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 26 Nov 2023 00:56:04 +0100 Subject: [PATCH 2/4] Fixup --- pandas/core/generic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a66269cbf3f8f..cd053b70d8ef9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -643,7 +643,9 @@ def _get_cleaned_column_resolvers(self) -> dict[Hashable, Series]: return {clean_column_name(self.name): self} return { - clean_column_name(k): Series(v._values, copy=False, index=v.index) + clean_column_name(k): Series( + v._values, copy=False, index=v.index + ).__finalize__(v) for k, v in self.items() if not isinstance(k, int) } From e396b1dce31909b0e909ee3c96a275a1df014750 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 27 Nov 2023 23:51:04 +0100 Subject: [PATCH 3/4] Update --- pandas/core/generic.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cd053b70d8ef9..48708b1569438 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -643,11 +643,11 @@ def _get_cleaned_column_resolvers(self) -> dict[Hashable, Series]: return {clean_column_name(self.name): self} return { - clean_column_name(k): Series( - v._values, copy=False, index=v.index - ).__finalize__(v) - for k, v in self.items() - if not isinstance(k, int) + clean_column_name(self.columns[i]): Series( + v, copy=False, index=self.index + ).__finalize__(self) + for i, v in enumerate(self._iter_column_arrays()) + if not isinstance(self.columns[i], int) } @final From 006f42559aee73e2ebdaf8a10f172007bacb44d4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 4 Dec 2023 10:49:18 +0100 Subject: [PATCH 4/4] fix column name --- pandas/core/generic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8bd379b53dd51..eaae515c4d7d5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -656,11 +656,11 @@ def _get_cleaned_column_resolvers(self) -> dict[Hashable, Series]: return {clean_column_name(self.name): self} return { - clean_column_name(self.columns[i]): Series( - v, copy=False, index=self.index + clean_column_name(k): Series( + v, copy=False, index=self.index, name=k ).__finalize__(self) - for i, v in enumerate(self._iter_column_arrays()) - if not isinstance(self.columns[i], int) + for k, v in zip(self.columns, self._iter_column_arrays()) + if not isinstance(k, int) } @final