Skip to content

BUG: Bug in expressions evaluation with reversed ops, showing in series-dataframe ops (GH7198) #7201

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 21, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,7 @@ Bug Fixes
(:issue:`7178`)
- Bug in recognizing out-of-bounds positional list indexers with ``iloc`` and a multi-axis tuple indexer (:issue:`7189`)
- Bug in setitem with a single value, multi-index and integer indices (:issue:`7190`)
- Bug in expressions evaluation with reversed ops, showing in series-dataframe ops (:issue:`7198`, :issue:`7192`)

pandas 0.13.1
-------------
Expand Down
8 changes: 7 additions & 1 deletion pandas/computation/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,18 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check):
return False


def _evaluate_numexpr(op, op_str, a, b, raise_on_error=False, truediv=True,
def _evaluate_numexpr(op, op_str, a, b, raise_on_error=False, truediv=True, reversed=False,
**eval_kwargs):
result = None

if _can_use_numexpr(op, op_str, a, b, 'evaluate'):
try:

# we were originally called by a reversed op
# method
if reversed:
a,b = b,a

a_value = getattr(a, "values", a)
b_value = getattr(b, "values", b)
result = ne.evaluate('a_value %s b_value' % op_str,
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2256,7 +2256,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
names.append(col.name)
elif isinstance(col, Index):
level = col
names.append(col.name)
names.append(col.name)
elif isinstance(col, (list, np.ndarray)):
level = col
names.append(None)
Expand Down
14 changes: 8 additions & 6 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,19 +68,21 @@ def names(x):
# not entirely sure why this is necessary, but previously was included
# so it's here to maintain compatibility
rmul=arith_method(operator.mul, names('rmul'), op('*'),
default_axis=default_axis),
default_axis=default_axis, reversed=True),
rsub=arith_method(lambda x, y: y - x, names('rsub'), op('-'),
default_axis=default_axis),
default_axis=default_axis, reversed=True),
rtruediv=arith_method(lambda x, y: operator.truediv(y, x),
names('rtruediv'), op('/'), truediv=True,
fill_zeros=np.inf, default_axis=default_axis),
fill_zeros=np.inf, default_axis=default_axis,
reversed=True),
rfloordiv=arith_method(lambda x, y: operator.floordiv(y, x),
names('rfloordiv'), op('//'),
default_axis=default_axis, fill_zeros=np.inf),
default_axis=default_axis, fill_zeros=np.inf,
reversed=True),
rpow=arith_method(lambda x, y: y ** x, names('rpow'), op('**'),
default_axis=default_axis),
default_axis=default_axis, reversed=True),
rmod=arith_method(lambda x, y: y % x, names('rmod'), op('%'),
default_axis=default_axis),
default_axis=default_axis, reversed=True),
)
new_methods['div'] = new_methods['truediv']
new_methods['rdiv'] = new_methods['rtruediv']
Expand Down
41 changes: 41 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8351,6 +8351,47 @@ def test_combine_multiple_frames_dtypes(self):
expected = Series(dict( float64 = 2, float32 = 2 ))
assert_series_equal(results,expected)

def test_ops(self):

# tst ops and reversed ops in evaluation
# GH7198

# smaller hits python, larger hits numexpr
for n in [ 4, 4000 ]:

df = DataFrame(1,index=range(n),columns=list('abcd'))
df.iloc[0] = 2
m = df.mean()

for op_str, op, rop in [('+','__add__','__radd__'),
('-','__sub__','__rsub__'),
('*','__mul__','__rmul__'),
('/','__truediv__','__rtruediv__')]:

base = DataFrame(np.tile(m.values,n).reshape(n,-1),columns=list('abcd'))
expected = eval("base{op}df".format(op=op_str))

# ops as strings
result = eval("m{op}df".format(op=op_str))
assert_frame_equal(result,expected)

# these are commutative
if op in ['+','*']:
result = getattr(df,op)(m)
assert_frame_equal(result,expected)

# these are not
elif op in ['-','/']:
result = getattr(df,rop)(m)
assert_frame_equal(result,expected)

# GH7192
df = DataFrame(dict(A=np.random.randn(25000)))
df.iloc[0:5] = np.nan
expected = (1-np.isnan(df.iloc[0:25]))
result = (1-np.isnan(df)).iloc[0:25]
assert_frame_equal(result,expected)

def test_truncate(self):
offset = datetools.bday

Expand Down