From 8bdf093e761c5849571802e0155f564a51506068 Mon Sep 17 00:00:00 2001 From: jreback Date: Thu, 21 Nov 2013 09:17:55 -0500 Subject: [PATCH] TST/API/BUG: resolve scoping issues in pytables query where rhs is a compound selection or scoped variables e.g. 'l1=selection.index' and 'l1=l', where l = selection.index were failing --- pandas/computation/expr.py | 28 ++++++----- pandas/computation/pytables.py | 18 +++++++- pandas/io/pytables.py | 4 ++ pandas/io/tests/test_pytables.py | 79 ++++++++++++++++++++++++++++++-- 4 files changed, 110 insertions(+), 19 deletions(-) diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py index 4c6ea3ecdae7d..bcb5570eae1fc 100644 --- a/pandas/computation/expr.py +++ b/pandas/computation/expr.py @@ -125,6 +125,10 @@ def __init__(self, gbls=None, lcls=None, level=1, resolvers=None, self.globals['True'] = True self.globals['False'] = False + # function defs + self.globals['list'] = list + self.globals['tuple'] = tuple + res_keys = (list(o.keys()) for o in self.resolvers) self.resolver_keys = frozenset(reduce(operator.add, res_keys, [])) self._global_resolvers = self.resolvers + (self.locals, self.globals) @@ -505,21 +509,21 @@ def _possibly_evaluate_binop(self, op, op_class, lhs, rhs, maybe_eval_in_python=('==', '!=')): res = op(lhs, rhs) - if (res.op in _cmp_ops_syms and lhs.is_datetime or rhs.is_datetime and - self.engine != 'pytables'): - # all date ops must be done in python bc numexpr doesn't work well - # with NaT - return self._possibly_eval(res, self.binary_ops) + if self.engine != 'pytables': + if (res.op in _cmp_ops_syms and getattr(lhs,'is_datetime',False) or getattr(rhs,'is_datetime',False)): + # all date ops must be done in python bc numexpr doesn't work well + # with NaT + return self._possibly_eval(res, self.binary_ops) if res.op in eval_in_python: # "in"/"not in" ops are always evaluated in python return self._possibly_eval(res, eval_in_python) - elif (lhs.return_type == object or rhs.return_type == object and - self.engine != 'pytables'): - # evaluate "==" and "!=" in python if either of our operands has an - # object return type - return self._possibly_eval(res, eval_in_python + - maybe_eval_in_python) + elif self.engine != 'pytables': + if (getattr(lhs,'return_type',None) == object or getattr(rhs,'return_type',None) == object): + # evaluate "==" and "!=" in python if either of our operands has an + # object return type + return self._possibly_eval(res, eval_in_python + + maybe_eval_in_python) return res def visit_BinOp(self, node, **kwargs): @@ -635,7 +639,7 @@ def visit_Attribute(self, node, **kwargs): raise ValueError("Invalid Attribute context {0}".format(ctx.__name__)) - def visit_Call(self, node, **kwargs): + def visit_Call(self, node, side=None, **kwargs): # this can happen with: datetime.datetime if isinstance(node.func, ast.Attribute): diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py index a521bfb3cfec9..7716bc0051159 100644 --- a/pandas/computation/pytables.py +++ b/pandas/computation/pytables.py @@ -401,8 +401,15 @@ def visit_Assign(self, node, **kwargs): return self.visit(cmpr) def visit_Subscript(self, node, **kwargs): + # only allow simple suscripts + value = self.visit(node.value) slobj = self.visit(node.slice) + try: + value = value.value + except: + pass + try: return self.const_type(value[slobj], self.env) except TypeError: @@ -416,9 +423,16 @@ def visit_Attribute(self, node, **kwargs): ctx = node.ctx.__class__ if ctx == ast.Load: # resolve the value - resolved = self.visit(value).value + resolved = self.visit(value) + + # try to get the value to see if we are another expression + try: + resolved = resolved.value + except (AttributeError): + pass + try: - return getattr(resolved, attr) + return self.term_type(getattr(resolved, attr), self.env) except AttributeError: # something like datetime.datetime where scope is overriden diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 49f60a7051ba9..d2fe1e0638192 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -294,6 +294,10 @@ def read_hdf(path_or_buf, key, **kwargs): """ + # grab the scope + if 'where' in kwargs: + kwargs['where'] = _ensure_term(kwargs['where']) + f = lambda store, auto_close: store.select( key, auto_close=auto_close, **kwargs) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 759f63a962e57..ba69f7a834dad 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -81,14 +81,19 @@ def ensure_clean_store(path, mode='a', complevel=None, complib=None, def ensure_clean_path(path): """ return essentially a named temporary file that is not opened - and deleted on existing + and deleted on existing; if path is a list, then create and + return list of filenames """ - try: - filename = create_tempfile(path) - yield filename + if isinstance(path, list): + filenames = [ create_tempfile(p) for p in path ] + yield filenames + else: + filenames = [ create_tempfile(path) ] + yield filenames[0] finally: - safe_remove(filename) + for f in filenames: + safe_remove(f) # set these parameters so we don't have file sharing tables.parameters.MAX_NUMEXPR_THREADS = 1 @@ -3124,6 +3129,70 @@ def test_frame_select_complex(self): expected = df.loc[df.index>df.index[3]].reindex(columns=['A','B']) tm.assert_frame_equal(result, expected) + def test_frame_select_complex2(self): + + with ensure_clean_path(['parms.hdf','hist.hdf']) as paths: + + pp, hh = paths + + # use non-trivial selection criteria + parms = DataFrame({ 'A' : [1,1,2,2,3] }) + parms.to_hdf(pp,'df',mode='w',format='table',data_columns=['A']) + + selection = read_hdf(pp,'df',where='A=[2,3]') + hist = DataFrame(np.random.randn(25,1),columns=['data'], + index=MultiIndex.from_tuples([ (i,j) for i in range(5) for j in range(5) ], + names=['l1','l2'])) + + hist.to_hdf(hh,'df',mode='w',format='table') + + expected = read_hdf(hh,'df',where=Term('l1','=',[2,3,4])) + + # list like + result = read_hdf(hh,'df',where=Term('l1','=',selection.index.tolist())) + assert_frame_equal(result, expected) + l = selection.index.tolist() + + # sccope with list like + store = HDFStore(hh) + result = store.select('df',where='l1=l') + assert_frame_equal(result, expected) + store.close() + + result = read_hdf(hh,'df',where='l1=l') + assert_frame_equal(result, expected) + + # index + index = selection.index + result = read_hdf(hh,'df',where='l1=index') + assert_frame_equal(result, expected) + + result = read_hdf(hh,'df',where='l1=selection.index') + assert_frame_equal(result, expected) + + result = read_hdf(hh,'df',where='l1=selection.index.tolist()') + assert_frame_equal(result, expected) + + result = read_hdf(hh,'df',where='l1=list(selection.index)') + assert_frame_equal(result, expected) + + # sccope with index + store = HDFStore(hh) + + result = store.select('df',where='l1=index') + assert_frame_equal(result, expected) + + result = store.select('df',where='l1=selection.index') + assert_frame_equal(result, expected) + + result = store.select('df',where='l1=selection.index.tolist()') + assert_frame_equal(result, expected) + + result = store.select('df',where='l1=list(selection.index)') + assert_frame_equal(result, expected) + + store.close() + def test_invalid_filtering(self): # can't use more than one filter (atm)