Skip to content

BUG: disallow mixed dtype operations in eval/query #6652

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 17, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ API Changes
- A tuple passed to ``DataFame.sort_index`` will be interpreted as the levels of
the index, rather than requiring a list of tuple (:issue:`4370`)

- Fix a bug where invalid eval/query operations would blow the stack (:issue:`5198`)

Deprecations
~~~~~~~~~~~~

Expand Down
5 changes: 5 additions & 0 deletions pandas/computation/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,11 @@ def _possibly_evaluate_binop(self, op, op_class, lhs, rhs,
'<=', '>=')):
res = op(lhs, rhs)

if res.has_invalid_return_type:
raise TypeError("unsupported operand type(s) for {0}:"
" '{1}' and '{2}'".format(res.op, lhs.type,
rhs.type))

if self.engine != 'pytables':
if (res.op in _cmp_ops_syms
and getattr(lhs, 'is_datetime', False)
Expand Down
22 changes: 21 additions & 1 deletion pandas/computation/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def name(self):

class Op(StringMixin):

"""Hold an operator of unknown arity
"""Hold an operator of arbitrary arity
"""

def __init__(self, op, operands, *args, **kwargs):
Expand All @@ -195,6 +195,16 @@ def return_type(self):
return np.bool_
return _result_type_many(*(term.type for term in com.flatten(self)))

@property
def has_invalid_return_type(self):
types = self.operand_types
obj_dtype_set = frozenset([np.dtype('object')])
return self.return_type == object and types - obj_dtype_set

@property
def operand_types(self):
return frozenset(term.type for term in com.flatten(self))

@property
def isscalar(self):
return all(operand.isscalar for operand in self.operands)
Expand Down Expand Up @@ -412,6 +422,10 @@ def _disallow_scalar_only_bool_ops(self):
raise NotImplementedError("cannot evaluate scalar only bool ops")


def isnumeric(dtype):
return issubclass(np.dtype(dtype).type, np.number)


class Div(BinOp):

"""Div operator to special case casting.
Expand All @@ -428,6 +442,12 @@ class Div(BinOp):
def __init__(self, lhs, rhs, truediv, *args, **kwargs):
super(Div, self).__init__('/', lhs, rhs, *args, **kwargs)

if not isnumeric(lhs.return_type) or not isnumeric(rhs.return_type):
raise TypeError("unsupported operand type(s) for {0}:"
" '{1}' and '{2}'".format(self.op,
lhs.return_type,
rhs.return_type))

if truediv or PY3:
_cast_inplace(com.flatten(self), np.float_)

Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -13243,6 +13243,16 @@ def test_bool_arith_expr(self):
expect = self.frame.a[self.frame.a < 1] + self.frame.b
assert_series_equal(res, expect)

def test_invalid_type_for_operator_raises(self):
df = DataFrame({'a': [1, 2], 'b': ['c', 'd']})
ops = '+', '-', '*', '/'
for op in ops:
with tm.assertRaisesRegexp(TypeError,
"unsupported operand type\(s\) for "
".+: '.+' and '.+'"):
df.eval('a {0} b'.format(op), engine=self.engine,
parser=self.parser)


class TestDataFrameEvalNumExprPython(TestDataFrameEvalNumExprPandas):

Expand Down