From 8bdf093e761c5849571802e0155f564a51506068 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Thu, 21 Nov 2013 09:17:55 -0500
Subject: [PATCH] TST/API/BUG: resolve scoping issues in pytables query where
 rhs is a              compound selection or scoped variables

             e.g. 'l1=selection.index' and 'l1=l', where l = selection.index
                  were failing
---
 pandas/computation/expr.py       | 28 ++++++-----
 pandas/computation/pytables.py   | 18 +++++++-
 pandas/io/pytables.py            |  4 ++
 pandas/io/tests/test_pytables.py | 79 ++++++++++++++++++++++++++++++--
 4 files changed, 110 insertions(+), 19 deletions(-)

diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py
index 4c6ea3ecdae7d..bcb5570eae1fc 100644
--- a/pandas/computation/expr.py
+++ b/pandas/computation/expr.py
@@ -125,6 +125,10 @@ def __init__(self, gbls=None, lcls=None, level=1, resolvers=None,
         self.globals['True'] = True
         self.globals['False'] = False
 
+        # function defs
+        self.globals['list'] = list
+        self.globals['tuple'] = tuple
+
         res_keys = (list(o.keys()) for o in self.resolvers)
         self.resolver_keys = frozenset(reduce(operator.add, res_keys, []))
         self._global_resolvers = self.resolvers + (self.locals, self.globals)
@@ -505,21 +509,21 @@ def _possibly_evaluate_binop(self, op, op_class, lhs, rhs,
                                  maybe_eval_in_python=('==', '!=')):
         res = op(lhs, rhs)
 
-        if (res.op in _cmp_ops_syms and lhs.is_datetime or rhs.is_datetime and
-                self.engine != 'pytables'):
-            # all date ops must be done in python bc numexpr doesn't work well
-            # with NaT
-            return self._possibly_eval(res, self.binary_ops)
+        if self.engine != 'pytables':
+            if (res.op in _cmp_ops_syms and getattr(lhs,'is_datetime',False) or getattr(rhs,'is_datetime',False)):
+                # all date ops must be done in python bc numexpr doesn't work well
+                # with NaT
+                return self._possibly_eval(res, self.binary_ops)
 
         if res.op in eval_in_python:
             # "in"/"not in" ops are always evaluated in python
             return self._possibly_eval(res, eval_in_python)
-        elif (lhs.return_type == object or rhs.return_type == object and
-              self.engine != 'pytables'):
-            # evaluate "==" and "!=" in python if either of our operands has an
-            # object return type
-            return self._possibly_eval(res, eval_in_python +
-                                       maybe_eval_in_python)
+        elif self.engine != 'pytables':
+            if (getattr(lhs,'return_type',None) == object or getattr(rhs,'return_type',None) == object):
+                # evaluate "==" and "!=" in python if either of our operands has an
+                # object return type
+                return self._possibly_eval(res, eval_in_python +
+                                           maybe_eval_in_python)
         return res
 
     def visit_BinOp(self, node, **kwargs):
@@ -635,7 +639,7 @@ def visit_Attribute(self, node, **kwargs):
 
         raise ValueError("Invalid Attribute context {0}".format(ctx.__name__))
 
-    def visit_Call(self, node, **kwargs):
+    def visit_Call(self, node, side=None, **kwargs):
 
         # this can happen with: datetime.datetime
         if isinstance(node.func, ast.Attribute):
diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py
index a521bfb3cfec9..7716bc0051159 100644
--- a/pandas/computation/pytables.py
+++ b/pandas/computation/pytables.py
@@ -401,8 +401,15 @@ def visit_Assign(self, node, **kwargs):
         return self.visit(cmpr)
 
     def visit_Subscript(self, node, **kwargs):
+        # only allow simple suscripts
+
         value = self.visit(node.value)
         slobj = self.visit(node.slice)
+        try:
+            value = value.value
+        except:
+            pass
+
         try:
             return self.const_type(value[slobj], self.env)
         except TypeError:
@@ -416,9 +423,16 @@ def visit_Attribute(self, node, **kwargs):
         ctx = node.ctx.__class__
         if ctx == ast.Load:
             # resolve the value
-            resolved = self.visit(value).value
+            resolved = self.visit(value)
+
+            # try to get the value to see if we are another expression
+            try:
+                resolved = resolved.value
+            except (AttributeError):
+                pass
+
             try:
-                return getattr(resolved, attr)
+                return self.term_type(getattr(resolved, attr), self.env)
             except AttributeError:
 
                 # something like datetime.datetime where scope is overriden
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 49f60a7051ba9..d2fe1e0638192 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -294,6 +294,10 @@ def read_hdf(path_or_buf, key, **kwargs):
 
         """
 
+    # grab the scope
+    if 'where' in kwargs:
+        kwargs['where'] = _ensure_term(kwargs['where'])
+
     f = lambda store, auto_close: store.select(
         key, auto_close=auto_close, **kwargs)
 
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 759f63a962e57..ba69f7a834dad 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -81,14 +81,19 @@ def ensure_clean_store(path, mode='a', complevel=None, complib=None,
 def ensure_clean_path(path):
     """
     return essentially a named temporary file that is not opened
-    and deleted on existing
+    and deleted on existing; if path is a list, then create and
+    return list of filenames
     """
-
     try:
-        filename = create_tempfile(path)
-        yield filename
+        if isinstance(path, list):
+            filenames = [ create_tempfile(p) for p in path ]
+            yield filenames
+        else:
+            filenames = [ create_tempfile(path) ]
+            yield filenames[0]
     finally:
-        safe_remove(filename)
+        for f in filenames:
+            safe_remove(f)
 
 # set these parameters so we don't have file sharing
 tables.parameters.MAX_NUMEXPR_THREADS = 1
@@ -3124,6 +3129,70 @@ def test_frame_select_complex(self):
             expected = df.loc[df.index>df.index[3]].reindex(columns=['A','B'])
             tm.assert_frame_equal(result, expected)
 
+    def test_frame_select_complex2(self):
+
+        with ensure_clean_path(['parms.hdf','hist.hdf']) as paths:
+
+            pp, hh = paths
+
+            # use non-trivial selection criteria
+            parms = DataFrame({ 'A' : [1,1,2,2,3] })
+            parms.to_hdf(pp,'df',mode='w',format='table',data_columns=['A'])
+
+            selection = read_hdf(pp,'df',where='A=[2,3]')
+            hist = DataFrame(np.random.randn(25,1),columns=['data'],
+                             index=MultiIndex.from_tuples([ (i,j) for i in range(5) for j in range(5) ],
+                                                          names=['l1','l2']))
+
+            hist.to_hdf(hh,'df',mode='w',format='table')
+
+            expected = read_hdf(hh,'df',where=Term('l1','=',[2,3,4]))
+
+            # list like
+            result = read_hdf(hh,'df',where=Term('l1','=',selection.index.tolist()))
+            assert_frame_equal(result, expected)
+            l = selection.index.tolist()
+
+            # sccope with list like
+            store = HDFStore(hh)
+            result = store.select('df',where='l1=l')
+            assert_frame_equal(result, expected)
+            store.close()
+
+            result = read_hdf(hh,'df',where='l1=l')
+            assert_frame_equal(result, expected)
+
+            # index
+            index = selection.index
+            result = read_hdf(hh,'df',where='l1=index')
+            assert_frame_equal(result, expected)
+
+            result = read_hdf(hh,'df',where='l1=selection.index')
+            assert_frame_equal(result, expected)
+
+            result = read_hdf(hh,'df',where='l1=selection.index.tolist()')
+            assert_frame_equal(result, expected)
+
+            result = read_hdf(hh,'df',where='l1=list(selection.index)')
+            assert_frame_equal(result, expected)
+
+            # sccope with index
+            store = HDFStore(hh)
+
+            result = store.select('df',where='l1=index')
+            assert_frame_equal(result, expected)
+
+            result = store.select('df',where='l1=selection.index')
+            assert_frame_equal(result, expected)
+
+            result = store.select('df',where='l1=selection.index.tolist()')
+            assert_frame_equal(result, expected)
+
+            result = store.select('df',where='l1=list(selection.index)')
+            assert_frame_equal(result, expected)
+
+            store.close()
+
     def test_invalid_filtering(self):
 
         # can't use more than one filter (atm)