Skip to content

Commit 593a815

Browse files
domsmrzmroeschke
authored andcommitted
pd.eval: Series names are now preserved even for "numexpr" engine. (pandas-dev#58437)
* Eval: Series names are preserved for numexpr Series names are now preserved even when using numexpr engine. Making the behavior consistent with python engine. * Update doc/source/whatsnew/v3.0.0.rst Co-authored-by: Matthew Roeschke <[email protected]> * Update pandas/core/computation/align.py Co-authored-by: Matthew Roeschke <[email protected]> * Update pandas/tests/computation/test_eval.py --------- Co-authored-by: Matthew Roeschke <[email protected]>
1 parent 1feafb5 commit 593a815

File tree

4 files changed

+56
-33
lines changed

4 files changed

+56
-33
lines changed

pandas/core/computation/align.py

+13-6
Original file line numberDiff line numberDiff line change
@@ -160,19 +160,24 @@ def align_terms(terms):
160160
# can't iterate so it must just be a constant or single variable
161161
if isinstance(terms.value, (ABCSeries, ABCDataFrame)):
162162
typ = type(terms.value)
163-
return typ, _zip_axes_from_type(typ, terms.value.axes)
164-
return np.result_type(terms.type), None
163+
name = terms.value.name if isinstance(terms.value, ABCSeries) else None
164+
return typ, _zip_axes_from_type(typ, terms.value.axes), name
165+
return np.result_type(terms.type), None, None
165166

166167
# if all resolved variables are numeric scalars
167168
if all(term.is_scalar for term in terms):
168-
return result_type_many(*(term.value for term in terms)).type, None
169+
return result_type_many(*(term.value for term in terms)).type, None, None
170+
171+
# if all input series have a common name, propagate it to the returned series
172+
names = {term.value.name for term in terms if isinstance(term.value, ABCSeries)}
173+
name = names.pop() if len(names) == 1 else None
169174

170175
# perform the main alignment
171176
typ, axes = _align_core(terms)
172-
return typ, axes
177+
return typ, axes, name
173178

174179

175-
def reconstruct_object(typ, obj, axes, dtype):
180+
def reconstruct_object(typ, obj, axes, dtype, name):
176181
"""
177182
Reconstruct an object given its type, raw value, and possibly empty
178183
(None) axes.
@@ -200,7 +205,9 @@ def reconstruct_object(typ, obj, axes, dtype):
200205
res_t = np.result_type(obj.dtype, dtype)
201206

202207
if not isinstance(typ, partial) and issubclass(typ, PandasObject):
203-
return typ(obj, dtype=res_t, **axes)
208+
if name is None:
209+
return typ(obj, dtype=res_t, **axes)
210+
return typ(obj, dtype=res_t, name=name, **axes)
204211

205212
# special case for pathological things like ~True/~False
206213
if hasattr(res_t, "type") and typ == np.bool_ and res_t != np.bool_:

pandas/core/computation/engines.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def __init__(self, expr) -> None:
5454
self.expr = expr
5555
self.aligned_axes = None
5656
self.result_type = None
57+
self.result_name = None
5758

5859
def convert(self) -> str:
5960
"""
@@ -76,12 +77,18 @@ def evaluate(self) -> object:
7677
The result of the passed expression.
7778
"""
7879
if not self._is_aligned:
79-
self.result_type, self.aligned_axes = align_terms(self.expr.terms)
80+
self.result_type, self.aligned_axes, self.result_name = align_terms(
81+
self.expr.terms
82+
)
8083

8184
# make sure no names in resolvers and locals/globals clash
8285
res = self._evaluate()
8386
return reconstruct_object(
84-
self.result_type, res, self.aligned_axes, self.expr.terms.return_type
87+
self.result_type,
88+
res,
89+
self.aligned_axes,
90+
self.expr.terms.return_type,
91+
self.result_name,
8592
)
8693

8794
@property

pandas/tests/computation/test_eval.py

+24-19
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,17 @@ def test_and_logic_string_match(self):
737737
assert pd.eval(f"{event.str.match('hello').a}")
738738
assert pd.eval(f"{event.str.match('hello').a and event.str.match('hello').a}")
739739

740+
def test_eval_keep_name(self, engine, parser):
741+
df = Series([2, 15, 28], name="a").to_frame()
742+
res = df.eval("a + a", engine=engine, parser=parser)
743+
expected = Series([4, 30, 56], name="a")
744+
tm.assert_series_equal(expected, res)
745+
746+
def test_eval_unmatching_names(self, engine, parser):
747+
variable_name = Series([42], name="series_name")
748+
res = pd.eval("variable_name + 0", engine=engine, parser=parser)
749+
tm.assert_series_equal(variable_name, res)
750+
740751

741752
# -------------------------------------
742753
# gh-12388: Typecasting rules consistency with python
@@ -1269,14 +1280,12 @@ def test_assignment_explicit(self):
12691280
expected["c"] = expected["a"] + expected["b"]
12701281
tm.assert_frame_equal(df, expected)
12711282

1272-
def test_column_in(self):
1283+
def test_column_in(self, engine):
12731284
# GH 11235
12741285
df = DataFrame({"a": [11], "b": [-32]})
1275-
result = df.eval("a in [11, -32]")
1276-
expected = Series([True])
1277-
# TODO: 2022-01-29: Name check failed with numexpr 2.7.3 in CI
1278-
# but cannot reproduce locally
1279-
tm.assert_series_equal(result, expected, check_names=False)
1286+
result = df.eval("a in [11, -32]", engine=engine)
1287+
expected = Series([True], name="a")
1288+
tm.assert_series_equal(result, expected)
12801289

12811290
@pytest.mark.xfail(reason="Unknown: Omitted test_ in name prior.")
12821291
def test_assignment_not_inplace(self):
@@ -1505,7 +1514,7 @@ def test_date_boolean(self, engine, parser):
15051514
parser=parser,
15061515
)
15071516
expec = df.dates1 < "20130101"
1508-
tm.assert_series_equal(res, expec, check_names=False)
1517+
tm.assert_series_equal(res, expec)
15091518

15101519
def test_simple_in_ops(self, engine, parser):
15111520
if parser != "python":
@@ -1620,7 +1629,7 @@ def test_unary_functions(self, fn, engine, parser):
16201629
got = self.eval(expr, engine=engine, parser=parser)
16211630
with np.errstate(all="ignore"):
16221631
expect = getattr(np, fn)(a)
1623-
tm.assert_series_equal(got, expect, check_names=False)
1632+
tm.assert_series_equal(got, expect)
16241633

16251634
@pytest.mark.parametrize("fn", _binary_math_ops)
16261635
def test_binary_functions(self, fn, engine, parser):
@@ -1637,7 +1646,7 @@ def test_binary_functions(self, fn, engine, parser):
16371646
got = self.eval(expr, engine=engine, parser=parser)
16381647
with np.errstate(all="ignore"):
16391648
expect = getattr(np, fn)(a, b)
1640-
tm.assert_almost_equal(got, expect, check_names=False)
1649+
tm.assert_almost_equal(got, expect)
16411650

16421651
def test_df_use_case(self, engine, parser):
16431652
df = DataFrame(
@@ -1653,8 +1662,8 @@ def test_df_use_case(self, engine, parser):
16531662
inplace=True,
16541663
)
16551664
got = df.e
1656-
expect = np.arctan2(np.sin(df.a), df.b)
1657-
tm.assert_series_equal(got, expect, check_names=False)
1665+
expect = np.arctan2(np.sin(df.a), df.b).rename("e")
1666+
tm.assert_series_equal(got, expect)
16581667

16591668
def test_df_arithmetic_subexpression(self, engine, parser):
16601669
df = DataFrame(
@@ -1665,8 +1674,8 @@ def test_df_arithmetic_subexpression(self, engine, parser):
16651674
)
16661675
df.eval("e = sin(a + b)", engine=engine, parser=parser, inplace=True)
16671676
got = df.e
1668-
expect = np.sin(df.a + df.b)
1669-
tm.assert_series_equal(got, expect, check_names=False)
1677+
expect = np.sin(df.a + df.b).rename("e")
1678+
tm.assert_series_equal(got, expect)
16701679

16711680
@pytest.mark.parametrize(
16721681
"dtype, expect_dtype",
@@ -1690,10 +1699,10 @@ def test_result_types(self, dtype, expect_dtype, engine, parser):
16901699
assert df.a.dtype == dtype
16911700
df.eval("b = sin(a)", engine=engine, parser=parser, inplace=True)
16921701
got = df.b
1693-
expect = np.sin(df.a)
1702+
expect = np.sin(df.a).rename("b")
16941703
assert expect.dtype == got.dtype
16951704
assert expect_dtype == got.dtype
1696-
tm.assert_series_equal(got, expect, check_names=False)
1705+
tm.assert_series_equal(got, expect)
16971706

16981707
def test_undefined_func(self, engine, parser):
16991708
df = DataFrame({"a": np.random.default_rng(2).standard_normal(10)})
@@ -1898,10 +1907,6 @@ def test_equals_various(other):
18981907
df = DataFrame({"A": ["a", "b", "c"]}, dtype=object)
18991908
result = df.eval(f"A == {other}")
19001909
expected = Series([False, False, False], name="A")
1901-
if USE_NUMEXPR:
1902-
# https://github.com/pandas-dev/pandas/issues/10239
1903-
# lose name with numexpr engine. Remove when that's fixed.
1904-
expected.name = None
19051910
tm.assert_series_equal(result, expected)
19061911

19071912

pandas/tests/frame/test_query_eval.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -58,26 +58,26 @@ def test_query_default(self, df, expected1, expected2):
5858
result = df.query("A>0")
5959
tm.assert_frame_equal(result, expected1)
6060
result = df.eval("A+1")
61-
tm.assert_series_equal(result, expected2, check_names=False)
61+
tm.assert_series_equal(result, expected2)
6262

6363
def test_query_None(self, df, expected1, expected2):
6464
result = df.query("A>0", engine=None)
6565
tm.assert_frame_equal(result, expected1)
6666
result = df.eval("A+1", engine=None)
67-
tm.assert_series_equal(result, expected2, check_names=False)
67+
tm.assert_series_equal(result, expected2)
6868

6969
def test_query_python(self, df, expected1, expected2):
7070
result = df.query("A>0", engine="python")
7171
tm.assert_frame_equal(result, expected1)
7272
result = df.eval("A+1", engine="python")
73-
tm.assert_series_equal(result, expected2, check_names=False)
73+
tm.assert_series_equal(result, expected2)
7474

7575
def test_query_numexpr(self, df, expected1, expected2):
7676
if NUMEXPR_INSTALLED:
7777
result = df.query("A>0", engine="numexpr")
7878
tm.assert_frame_equal(result, expected1)
7979
result = df.eval("A+1", engine="numexpr")
80-
tm.assert_series_equal(result, expected2, check_names=False)
80+
tm.assert_series_equal(result, expected2)
8181
else:
8282
msg = (
8383
r"'numexpr' is not installed or an unsupported version. "
@@ -194,8 +194,12 @@ def test_using_numpy(self, engine, parser):
194194
df = Series([0.2, 1.5, 2.8], name="a").to_frame()
195195
res = df.eval("@np.floor(a)", engine=engine, parser=parser)
196196
expected = np.floor(df["a"])
197-
if engine == "numexpr":
198-
expected.name = None # See GH 58069
197+
tm.assert_series_equal(expected, res)
198+
199+
def test_eval_simple(self, engine, parser):
200+
df = Series([0.2, 1.5, 2.8], name="a").to_frame()
201+
res = df.eval("a", engine=engine, parser=parser)
202+
expected = df["a"]
199203
tm.assert_series_equal(expected, res)
200204

201205

0 commit comments

Comments
 (0)