From 5a08868eec5932695438439d9d9d996e34dcd063 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Tue, 20 Jun 2023 22:15:23 +0200 Subject: [PATCH 1/5] CoW: Add lazy copy to eval --- doc/source/user_guide/copy_on_write.rst | 1 + doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/computation/eval.py | 4 +++- pandas/tests/copy_view/test_methods.py | 27 +++++++++++++++++++++++++ 4 files changed, 32 insertions(+), 1 deletion(-) diff --git a/doc/source/user_guide/copy_on_write.rst b/doc/source/user_guide/copy_on_write.rst index e2e7dfa42d115..59bdb1926895f 100644 --- a/doc/source/user_guide/copy_on_write.rst +++ b/doc/source/user_guide/copy_on_write.rst @@ -211,6 +211,7 @@ following methods: - :meth:`DataFrame.astype` / :meth:`Series.astype` - :meth:`DataFrame.convert_dtypes` / :meth:`Series.convert_dtypes` - :meth:`DataFrame.join` + - :meth:`DataFrame.eval` - :func:`concat` - :func:`merge` diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 44691e4265f5b..313f134958c28 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -24,6 +24,7 @@ Copy-on-Write improvements - The :class:`DataFrame` constructor, when constructing a DataFrame from a dictionary of Index objects and specifying ``copy=False``, will now use a lazy copy of those Index objects for the columns of the DataFrame (:issue:`52947`) +- Add lazy copy mechanism to :meth:`DataFrame.eval` (:issue:`53746`) .. _whatsnew_210.enhancements.enhancement2: diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index d19730a321b36..3f076364fe3f5 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -7,6 +7,8 @@ from typing import TYPE_CHECKING import warnings +from pandas._config import using_copy_on_write + from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_bool_kwarg @@ -373,7 +375,7 @@ def eval( # if returning a copy, copy only on the first assignment if not inplace and first_expr: try: - target = env.target.copy() + target = env.target.copy(deep=not using_copy_on_write()) except AttributeError as err: raise ValueError("Cannot return a copy of the target") from err else: diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 3af47d0b37338..9807df07033f9 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -1803,3 +1803,30 @@ def test_insert_series(using_copy_on_write): df.iloc[0, 1] = 100 tm.assert_series_equal(ser, ser_orig) + + +def test_eval(using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": 1}) + df_orig = df.copy() + + result = df.eval("c = a+b") + if using_copy_on_write: + assert np.shares_memory(get_array(df, "a"), get_array(result, "a")) + else: + assert not np.shares_memory(get_array(df, "a"), get_array(result, "a")) + + result.iloc[0, 0] = 100 + tm.assert_frame_equal(df, df_orig) + + +def test_eval_inplace(using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": 1}) + df_orig = df.copy() + df_view = df[:] + + df.eval("c = a+b", inplace=True) + assert np.shares_memory(get_array(df, "a"), get_array(df_view, "a")) + + df.iloc[0, 0] = 100 + if using_copy_on_write: + tm.assert_frame_equal(df_view, df_orig) From 6c897ab1ea51f18b8289d173b73de512a790f0b9 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Tue, 20 Jun 2023 22:47:56 +0200 Subject: [PATCH 2/5] Fix --- pandas/core/computation/eval.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index 3f076364fe3f5..eabe25d7f0db2 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -375,7 +375,11 @@ def eval( # if returning a copy, copy only on the first assignment if not inplace and first_expr: try: - target = env.target.copy(deep=not using_copy_on_write()) + target = env.target + if isinstance(target, NDFrame): + target = target.copy(deep=not using_copy_on_write()) + else: + target = target.copy() except AttributeError as err: raise ValueError("Cannot return a copy of the target") from err else: From 550cf3d8487ff1e44d6586198e7c991018906549 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Wed, 28 Jun 2023 12:16:49 +0200 Subject: [PATCH 3/5] Use None --- pandas/core/computation/eval.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index eabe25d7f0db2..7012e50c758b5 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -7,8 +7,6 @@ from typing import TYPE_CHECKING import warnings -from pandas._config import using_copy_on_write - from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_bool_kwarg @@ -377,7 +375,7 @@ def eval( try: target = env.target if isinstance(target, NDFrame): - target = target.copy(deep=not using_copy_on_write()) + target = target.copy(deep=None) else: target = target.copy() except AttributeError as err: From b8d0a33fd4451adde4ecfcfcc5c361ef871256d5 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 29 Jun 2023 00:34:24 +0200 Subject: [PATCH 4/5] Ignore pyright --- pandas/core/computation/eval.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index 7012e50c758b5..187de68fc1aa9 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -393,7 +393,9 @@ def eval( if inplace and isinstance(target, NDFrame): target.loc[:, assigner] = ret else: - target[assigner] = ret + target[ + assigner + ] = ret # pyright: ignore[reportGeneralTypeIssues] except (TypeError, IndexError) as err: raise ValueError("Cannot assign expression output to target") from err From 7fa0d5d1ffe485f94075a18ed965f804b4990a81 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Thu, 29 Jun 2023 10:10:15 +0200 Subject: [PATCH 5/5] Fix --- pandas/core/computation/eval.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index 187de68fc1aa9..ce0c50a810ab1 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -393,9 +393,9 @@ def eval( if inplace and isinstance(target, NDFrame): target.loc[:, assigner] = ret else: - target[ + target[ # pyright: ignore[reportGeneralTypeIssues] assigner - ] = ret # pyright: ignore[reportGeneralTypeIssues] + ] = ret except (TypeError, IndexError) as err: raise ValueError("Cannot assign expression output to target") from err