From daccc5811388cc5a49adbf0bac8480150497911d Mon Sep 17 00:00:00 2001 From: Laurent Mutricy Date: Fri, 17 May 2024 13:27:23 +0200 Subject: [PATCH 1/6] updating df.query and df.eval docstrings. resolves #16283 --- pandas/core/frame.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f72a214f120a0..93093522e5d91 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4604,6 +4604,14 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No >>> df[df.B == df["C C"]] A B C C 0 1 10 10 + + Using local variable: + + >>> local_var = 2 + >>> df.query("A <= @local_var") + A B C C + 0 1 10 10 + 1 2 8 9 """ inplace = validate_bool_kwarg(inplace, "inplace") if not isinstance(expr, str): @@ -4644,6 +4652,20 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None: ---------- expr : str The expression string to evaluate. + + You can refer to variables + in the environment by prefixing them with an '@' character like + ``@a + b``. + + You can refer to column names that are not valid Python variable names + by surrounding them in backticks. Thus, column names containing spaces + or punctuations (besides underscores) or starting with digits must be + surrounded by backticks. (For example, a column named "Area (cm^2)" would + be referenced as ```Area (cm^2)```). Column names which are Python keywords + (like "list", "for", "import", etc) cannot be used. + + For example, if one of your columns is called ``a a`` and you want + to sum it with ``b``, your query should be ```a a` + b``. inplace : bool, default False If the expression contains an assignment, whether to perform the operation inplace and mutate the existing DataFrame. Otherwise, @@ -4723,6 +4745,17 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None: 2 3 6 9 -3 3 4 4 8 0 4 5 2 7 3 + + Local variables shall be explicitely referenced using ``@`` + character in front of the name: + + >>> local_var = 2 + >>> df.eval("@local_var * A") + 0 2 + 1 4 + 2 6 + 3 8 + 4 10 """ from pandas.core.computation.eval import eval as _eval From 0de174a4a66603d3158f07e1e7f385a8e5a47587 Mon Sep 17 00:00:00 2001 From: Laurent Mutricy Date: Fri, 17 May 2024 14:56:12 +0200 Subject: [PATCH 2/6] typo --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 93093522e5d91..f160901b281ad 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4746,7 +4746,7 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None: 3 4 4 8 0 4 5 2 7 3 - Local variables shall be explicitely referenced using ``@`` + Local variables shall be explicitly referenced using ``@`` character in front of the name: >>> local_var = 2 From 5e0e65b25599e9b2581fe40533ebad7869f78ce5 Mon Sep 17 00:00:00 2001 From: Laurent Mutricy Date: Tue, 21 May 2024 16:17:13 +0200 Subject: [PATCH 3/6] adding 1 example --- pandas/core/frame.py | 61 ++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 25 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f160901b281ad..a8db983f8e61e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4697,14 +4697,16 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None: Examples -------- - >>> df = pd.DataFrame({"A": range(1, 6), "B": range(10, 0, -2)}) + >>> df = pd.DataFrame( + ... {"A": range(1, 6), "B": range(10, 0, -2), "C C": range(10, 5, -1)} + ... ) >>> df - A B - 0 1 10 - 1 2 8 - 2 3 6 - 3 4 4 - 4 5 2 + A B C C + 0 1 10 10 + 1 2 8 9 + 2 3 6 8 + 3 4 4 7 + 4 5 2 6 >>> df.eval("A + B") 0 11 1 10 @@ -4717,19 +4719,19 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None: modified. >>> df.eval("C = A + B") - A B C - 0 1 10 11 - 1 2 8 10 - 2 3 6 9 - 3 4 4 8 - 4 5 2 7 + A B C C C + 0 1 10 10 11 + 1 2 8 9 10 + 2 3 6 8 9 + 3 4 4 7 8 + 4 5 2 6 7 >>> df - A B - 0 1 10 - 1 2 8 - 2 3 6 - 3 4 4 - 4 5 2 + A B C C + 0 1 10 10 + 1 2 8 9 + 2 3 6 8 + 3 4 4 7 + 4 5 2 6 Multiple columns can be assigned to using multi-line expressions: @@ -4739,12 +4741,21 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None: ... D = A - B ... ''' ... ) - A B C D - 0 1 10 11 -9 - 1 2 8 10 -6 - 2 3 6 9 -3 - 3 4 4 8 0 - 4 5 2 7 3 + A B C C C D + 0 1 10 10 11 -9 + 1 2 8 9 10 -6 + 2 3 6 8 9 -3 + 3 4 4 7 8 0 + 4 5 2 6 7 3 + + For columns with spaces in their name, you can use backtick quoting. + + >>> df.eval("B * `C C`") + 0 100 + 1 72 + 2 48 + 3 28 + 4 12 Local variables shall be explicitly referenced using ``@`` character in front of the name: From 6e9e58f636acc470ab950da06fe1c0fe2f860c73 Mon Sep 17 00:00:00 2001 From: Laurent Mutricy Date: Tue, 21 May 2024 23:36:15 +0200 Subject: [PATCH 4/6] changing wording following example added --- pandas/core/frame.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a8db983f8e61e..6f472303860dc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4657,15 +4657,8 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None: in the environment by prefixing them with an '@' character like ``@a + b``. - You can refer to column names that are not valid Python variable names - by surrounding them in backticks. Thus, column names containing spaces - or punctuations (besides underscores) or starting with digits must be - surrounded by backticks. (For example, a column named "Area (cm^2)" would - be referenced as ```Area (cm^2)```). Column names which are Python keywords - (like "list", "for", "import", etc) cannot be used. - - For example, if one of your columns is called ``a a`` and you want - to sum it with ``b``, your query should be ```a a` + b``. + You can refer to column names that are not valid Python variable + names by surrounding them with backticks `````. inplace : bool, default False If the expression contains an assignment, whether to perform the operation inplace and mutate the existing DataFrame. Otherwise, From 2a176f3b3fac32ebbff0d75d381efe77be5bcb13 Mon Sep 17 00:00:00 2001 From: Laurent Mutricy Date: Thu, 23 May 2024 22:42:36 +0200 Subject: [PATCH 5/6] updating 'C C' to 'C&C' for eval --- pandas/core/frame.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6f472303860dc..f613c436fc4ec 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4691,10 +4691,10 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None: Examples -------- >>> df = pd.DataFrame( - ... {"A": range(1, 6), "B": range(10, 0, -2), "C C": range(10, 5, -1)} + ... {"A": range(1, 6), "B": range(10, 0, -2), "C&C": range(10, 5, -1)} ... ) >>> df - A B C C + A B C&C 0 1 10 10 1 2 8 9 2 3 6 8 @@ -4711,15 +4711,15 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None: Assignment is allowed though by default the original DataFrame is not modified. - >>> df.eval("C = A + B") - A B C C C + >>> df.eval("D = A + B") + A B C&C D 0 1 10 10 11 1 2 8 9 10 2 3 6 8 9 3 4 4 7 8 4 5 2 6 7 >>> df - A B C C + A B C&C 0 1 10 10 1 2 8 9 2 3 6 8 @@ -4730,11 +4730,11 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None: >>> df.eval( ... ''' - ... C = A + B - ... D = A - B + ... D = A + B + ... E = A - B ... ''' ... ) - A B C C C D + A B C&C D E 0 1 10 10 11 -9 1 2 8 9 10 -6 2 3 6 8 9 -3 @@ -4743,7 +4743,7 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None: For columns with spaces in their name, you can use backtick quoting. - >>> df.eval("B * `C C`") + >>> df.eval("B * `C&C`") 0 100 1 72 2 48 From 22d2c63ab80b54f78ebe718a1a300239b0d98477 Mon Sep 17 00:00:00 2001 From: Laurent Mutricy Date: Thu, 23 May 2024 22:46:34 +0200 Subject: [PATCH 6/6] updating 'C C' to 'C&C' for query --- pandas/core/frame.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f613c436fc4ec..ca9c16bd6e582 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4574,42 +4574,42 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No Examples -------- >>> df = pd.DataFrame( - ... {"A": range(1, 6), "B": range(10, 0, -2), "C C": range(10, 5, -1)} + ... {"A": range(1, 6), "B": range(10, 0, -2), "C&C": range(10, 5, -1)} ... ) >>> df - A B C C + A B C&C 0 1 10 10 1 2 8 9 2 3 6 8 3 4 4 7 4 5 2 6 >>> df.query("A > B") - A B C C + A B C&C 4 5 2 6 The previous expression is equivalent to >>> df[df.A > df.B] - A B C C + A B C&C 4 5 2 6 For columns with spaces in their name, you can use backtick quoting. - >>> df.query("B == `C C`") - A B C C + >>> df.query("B == `C&C`") + A B C&C 0 1 10 10 The previous expression is equivalent to - >>> df[df.B == df["C C"]] - A B C C + >>> df[df.B == df["C&C"]] + A B C&C 0 1 10 10 Using local variable: >>> local_var = 2 >>> df.query("A <= @local_var") - A B C C + A B C&C 0 1 10 10 1 2 8 9 """