From 89a03bea1e3846e0af520d8760a6be7f2516bfa3 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Sat, 15 Jun 2013 21:34:56 -0400
Subject: [PATCH 01/48] ENH: add new computation module and toplevel eval
 function

---
 pandas/__init__.py                           |   1 +
 pandas/computation/__init__.py               |   0
 pandas/computation/api.py                    |   1 +
 pandas/computation/common.py                 |  11 +
 pandas/computation/engines.py                | 290 ++++++++++
 pandas/computation/eval.py                   |  75 +++
 pandas/computation/expr.py                   | 135 +++++
 pandas/{core => computation}/expressions.py  |  72 +--
 pandas/computation/ops.py                    | 188 +++++++
 pandas/computation/tests/__init__.py         |   0
 pandas/computation/tests/test_eval.py        | 552 +++++++++++++++++++
 pandas/computation/tests/test_expressions.py | 157 ++++++
 pandas/core/frame.py                         |   4 +-
 pandas/core/internals.py                     |   2 +-
 pandas/tests/test_expressions.py             | 203 -------
 setup.py                                     |   3 +-
 vb_suite/binary_ops.py                       |  12 +-
 vb_suite/indexing.py                         |   4 +-
 18 files changed, 1465 insertions(+), 245 deletions(-)
 create mode 100644 pandas/computation/__init__.py
 create mode 100644 pandas/computation/api.py
 create mode 100644 pandas/computation/common.py
 create mode 100644 pandas/computation/engines.py
 create mode 100644 pandas/computation/eval.py
 create mode 100644 pandas/computation/expr.py
 rename pandas/{core => computation}/expressions.py (75%)
 create mode 100644 pandas/computation/ops.py
 create mode 100644 pandas/computation/tests/__init__.py
 create mode 100644 pandas/computation/tests/test_eval.py
 create mode 100644 pandas/computation/tests/test_expressions.py
 delete mode 100644 pandas/tests/test_expressions.py

diff --git a/pandas/__init__.py b/pandas/__init__.py
index a0edb397c28c1..bec0877b13bb8 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -29,6 +29,7 @@
 from pandas.stats.api import *
 from pandas.tseries.api import *
 from pandas.io.api import *
+from pandas.computation.api import eval
 
 from pandas.util.testing import debug
 
diff --git a/pandas/computation/__init__.py b/pandas/computation/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/pandas/computation/api.py b/pandas/computation/api.py
new file mode 100644
index 0000000000000..86f72902a52c8
--- /dev/null
+++ b/pandas/computation/api.py
@@ -0,0 +1 @@
+from pandas.computation.eval import eval
diff --git a/pandas/computation/common.py b/pandas/computation/common.py
new file mode 100644
index 0000000000000..4061984dd5e08
--- /dev/null
+++ b/pandas/computation/common.py
@@ -0,0 +1,11 @@
+import collections
+from pandas.core.common import is_string
+
+
+def flatten(l):
+    for el in l:
+        if isinstance(el, collections.Iterable) and not is_string(el):
+            for s in flatten(el):
+                yield s
+        else:
+            yield el
diff --git a/pandas/computation/engines.py b/pandas/computation/engines.py
new file mode 100644
index 0000000000000..0eb9875b85549
--- /dev/null
+++ b/pandas/computation/engines.py
@@ -0,0 +1,290 @@
+import abc
+from functools import partial
+from itertools import izip
+
+import numpy as np
+
+import pandas as pd
+import pandas.core.common as com
+from pandas.computation.ops import _resolve_name, _update_names
+from pandas.computation.common import flatten
+
+
+def _align_core_single_unary_op(term):
+    if isinstance(term, np.ndarray) and not com.is_series(term):
+        typ = np.asanyarray
+    else:
+        typ = type(term)
+    ret = typ, [term]
+
+    if not hasattr(term, 'axes'):
+        ret += None,
+    else:
+        ret += _zip_axes_from_type(typ, term.axes),
+    return ret
+
+
+def _zip_axes_from_type(typ, new_axes):
+    axes = {}
+    for ax_ind, ax_name in typ._AXIS_NAMES.iteritems():
+        axes[ax_name] = new_axes[ax_ind]
+    return axes
+
+
+def _maybe_promote_shape(values, naxes):
+    # test to see if we have an array else leave since must be a number
+    if not isinstance(values, np.ndarray):
+        return values
+
+    ndims = values.ndim
+    if ndims > naxes:
+        raise AssertionError('cannot have more dims than axes, '
+                             '{0} > {1}'.format(ndims, naxes))
+    if ndims == naxes:
+        return values
+
+    ndim = set(xrange(ndims))
+    nax = set(xrange(naxes))
+
+    axes_slice = [slice(None)] * naxes
+
+    # symmetric difference
+    slices = nax - ndim
+
+    if ndims == naxes:
+        if slices:
+            raise AssertionError('slices should be empty if ndims == naxes '
+                                 '{0}'.format(slices))
+    else:
+        if not slices:
+            raise AssertionError('slices should NOT be empty if ndim != naxes '
+                                 '{0}'.format(slices))
+
+    for sl in slices:
+        axes_slice[sl] = np.newaxis
+
+    return values[tuple(axes_slice)]
+
+
+def _align_core(terms):
+    # need to ensure that terms is not an iterator
+    terms = list(terms)
+
+    ## special cases
+
+    # single unary operand
+    if len(terms) == 1:
+        return _align_core_single_unary_op(terms[0])
+    # only scalars
+    elif all(np.isscalar(term) for term in terms):
+        return np.result_type(*terms), terms, None
+
+    # single dim ndarrays
+    all_has_size = all(hasattr(term, 'size') for term in terms)
+    if (all_has_size and all(term.size == 1 for term in terms)):
+        return np.result_type(*terms), terms, None
+
+    # made it past the special cases
+    term_index = [i for i, term in enumerate(terms) if hasattr(term, 'axes')]
+    term_dims = [terms[i].ndim for i in term_index]
+    ndims = pd.Series(dict(zip(term_index, term_dims)))
+
+    # initial axes are the axes of the largest-axis'd term
+    biggest = terms[ndims.idxmax()]
+    typ = biggest._constructor
+    axes = biggest.axes
+    naxes = len(axes)
+
+    for i in term_index:
+        for axis, items in enumerate(terms[i].axes):
+            if com.is_series(terms[i]) and naxes > 1:
+                axes[naxes - 1] = axes[naxes - 1].join(terms[i].index,
+                                                       how='outer')
+            else:
+                axes[axis] = axes[axis].join(items, how='outer')
+
+    for i, ndim in ndims.iteritems():
+        for axis, items in izip(xrange(ndim), axes):
+            ti = terms[i]  # needed here because we modify it in the inner loop
+
+            if hasattr(ti, 'reindex_axis'):
+                transpose = com.is_series(ti) and naxes > 1
+
+                if transpose:
+                    f = partial(ti.reindex, index=axes[naxes - 1], copy=False)
+                else:
+                    f = partial(ti.reindex_axis, items, axis=axis, copy=False)
+
+                if pd.lib.is_bool_array(ti.values):
+                    r = f(fill_value=True)
+                else:
+                    r = f()
+
+                terms[i] = r
+
+        res = _maybe_promote_shape(terms[i].T if transpose else terms[i],
+                                   naxes)
+        res = res.T if transpose else res
+
+        try:
+            terms[i] = res.values
+        except AttributeError:
+            terms[i] = res
+
+    return typ, terms, _zip_axes_from_type(typ, axes)
+
+
+def _filter_terms(flat):
+    # numeric literals
+    literals = filter(lambda string: not com.is_string(string), flat)
+    literals_set = set(literals)
+
+    # these are strings which are variable names
+    names = filter(com.is_string, flat)
+    names_set = set(names)
+
+    # literals are not names and names are not literals, by definition
+    if literals_set & names_set:
+        raise AssertionError('literals cannot be names and names cannot be '
+                             'literals')
+    return names, literals
+
+
+def _align(terms, env):
+    # flatten the parse tree (a nested list)
+    flat = list(flatten(terms))
+
+    names, literals = _filter_terms(flat)
+
+    # given an expression consisting of literals
+    if not names:
+        return np.result_type(*literals).type, None
+
+    # get the variables out
+    resolve_in_env = partial(_resolve_name, env)
+    resolved = map(resolve_in_env, names)
+
+    # if all resolved variables are numeric scalars
+    if all(map(np.isscalar, resolved)):
+        return np.result_type(*resolved).type, None
+
+    # perform the main alignment
+    typ, resolved, axes = _align_core(resolved)
+
+    # put them back in the symbol table
+    _update_names(env, dict(izip(names, resolved)))
+
+    # we need this to reconstruct things after evaluation since we CANNOT
+    # depend on the array interface
+    return typ, axes
+
+
+def _reconstruct_object(typ, obj, axes):
+    """Reconstruct an object given its type, raw value, and possibly empty
+    (None) axes.
+
+    Parameters
+    ----------
+    typ : object
+        A type
+    obj : object
+        The value to use in the type constructor
+    axes : dict
+        The axes to use to construct the resulting pandas object
+
+    Returns
+    -------
+    reconst : typ
+        An object of type ``typ`` with the value `obj` and possible axes
+        `axes`.
+    """
+    try:
+        # handle numpy dtypes
+        typ = typ.type
+    except AttributeError:
+        pass
+
+    if typ != np.asanyarray and issubclass(typ, pd.core.generic.PandasObject):
+        return typ(obj, **axes)
+
+    ret_value = typ(obj)
+
+    try:
+        return ret_value.item()
+    except (AttributeError, ValueError):
+        return ret_value
+
+
+class AbstractEngine(object):
+    """"""
+    __metaclass__ = abc.ABCMeta
+
+    has_neg_frac = False
+
+    def __init__(self, expr):
+        self.expr = expr
+        self.aligned_axes = None
+        self.result_type = None
+
+    @abc.abstractmethod
+    def convert(self):
+        """Convert an expression for evaluation."""
+        pass
+
+    def evaluate(self, env):
+        if not self._is_aligned:
+            self.result_type, self.aligned_axes = _align(self.expr.terms, env)
+
+        res = self._evaluate(env)
+        return _reconstruct_object(self.result_type, res, self.aligned_axes)
+
+    @property
+    def _is_aligned(self):
+        return self.aligned_axes is not None and self.result_type is not None
+
+    @abc.abstractmethod
+    def _evaluate(self, env):
+        """Return an evaluated expression."""
+        pass
+
+
+class NumExprEngine(AbstractEngine):
+    """NumExpr engine class"""
+    has_neg_frac = True
+
+    def __init__(self, expr):
+        super(NumExprEngine, self).__init__(expr)
+
+    def convert(self):
+        """Return a string"""
+        return str(self.expr)
+
+    def _evaluate(self, env):
+        import numexpr as ne
+
+        try:
+            return ne.evaluate(self.convert(), local_dict=env.locals,
+                               global_dict=env.globals,
+                               truediv=self.expr.truediv)
+        except KeyError as e:
+            raise NameError('{0!r} is not defined'.format(e.message))
+
+
+class PythonEngine(AbstractEngine):
+    """Use NumPy even if numexpr is installed"""
+    has_neg_frac = False
+
+    def __init__(self, expr):
+        super(PythonEngine, self).__init__(expr)
+
+    def convert(self):
+        pass
+
+    def evaluate(self, env):
+        return self.expr(env)
+
+    def _evaluate(self, env):
+        pass
+
+
+_engines = {'numexpr': NumExprEngine, 'python': PythonEngine}
diff --git a/pandas/computation/eval.py b/pandas/computation/eval.py
new file mode 100644
index 0000000000000..21348f221bc99
--- /dev/null
+++ b/pandas/computation/eval.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+
+import sys
+import numbers
+import collections
+import itertools
+
+import numpy as np
+
+Scope = collections.namedtuple('Scope', 'globals locals')
+
+import pandas.core.common as com
+from pandas.computation.expr import Expr
+from pandas.computation.engines import _engines
+
+
+def _scope_has_series_and_frame_datetime_index(env):
+    from pandas import DatetimeIndex
+    series_index = frame_index = 0
+
+    for v in itertools.chain(env.locals.itervalues(),
+                             env.globals.itervalues()):
+        series_index += com.is_series(v) and isinstance(v.index, DatetimeIndex)
+        frame_index += com.is_frame(v) and isinstance(v.index, DatetimeIndex)
+    return series_index, frame_index
+
+
+def _maybe_convert_engine(env, engine):
+    assert isinstance(env, Scope), 'environment must be an instance of Scope'
+    assert isinstance(engine, basestring), 'engine name must be a string'
+
+    ret = engine
+
+    if all(_scope_has_series_and_frame_datetime_index(env)):
+        ret = 'python'
+    return ret
+
+
+def eval(expr, engine='numexpr', truediv=True, local_dict=None,
+         global_dict=None):
+    # make sure we're passed a valid engine
+    if not engine in _engines:
+        raise KeyError('Invalid engine {0} passed, valid engines are'
+                       ' {1}'.format(_engines.keys()))
+
+    # 1 up in the call stack for locals/globals; see the documentation for the
+    # inspect module for why you must decrease the refcount of frame
+    frame = sys._getframe(1)
+
+    try:
+        # get the globals and locals
+        gbl, lcl = global_dict or frame.f_globals, local_dict or frame.f_locals
+
+        # shallow copy the scope so we don't overwrite everything
+        env = Scope(gbl.copy(), lcl.copy())
+
+        engine = _maybe_convert_engine(env, engine)
+
+        # parse the expression
+        parsed_expr = Expr(expr, engine, truediv)
+
+        # choose the engine
+        eng = _engines[engine]
+
+        # construct the engine and evaluate
+        ret = eng(parsed_expr).evaluate(env)
+    finally:
+        del frame
+
+    # sanity check for a number
+    if np.isscalar(ret):
+        if not isinstance(ret, (np.number, numbers.Number, np.bool_, bool)):
+            raise TypeError('scalar result must be numeric or bool, type is '
+                            '{0!r}'.format(ret.__class__.__name__))
+    return ret
diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py
new file mode 100644
index 0000000000000..105c0a020a2ad
--- /dev/null
+++ b/pandas/computation/expr.py
@@ -0,0 +1,135 @@
+import ast
+from functools import partial
+
+from pandas.computation.ops import BinOp, UnaryOp, _reductions, _mathops
+from pandas.computation.ops import _cmp_ops_syms, _bool_ops_syms
+from pandas.computation.ops import _arith_ops_syms, _unary_ops_syms
+
+
+class ExprParserError(Exception):
+    pass
+
+
+class ExprVisitor(ast.NodeVisitor):
+    """Custom ast walker
+    """
+    bin_ops = _cmp_ops_syms + _bool_ops_syms + _arith_ops_syms
+    bin_op_nodes = ('Gt', 'Lt', 'GtE', 'LtE', 'Eq', 'NotEq', 'BitAnd', 'BitOr',
+                    'Add', 'Sub', 'Mult', 'Div', 'Pow', 'FloorDiv')
+    bin_op_nodes_map = dict(zip(bin_ops, bin_op_nodes))
+
+    unary_ops = _unary_ops_syms
+    unary_op_nodes = 'UAdd', 'USub', 'Invert'
+    unary_op_nodes_map = dict(zip(unary_ops, unary_op_nodes))
+
+    def __init__(self):
+        for bin_op in self.bin_ops:
+            setattr(self, 'visit_{0}'.format(self.bin_op_nodes_map[bin_op]),
+                    lambda node, bin_op=bin_op: partial(BinOp, bin_op))
+
+        for unary_op in self.unary_ops:
+            setattr(self,
+                    'visit_{0}'.format(self.unary_op_nodes_map[unary_op]),
+                    lambda node, unary_op=unary_op: partial(UnaryOp, unary_op))
+
+    def visit(self, node):
+        if not (isinstance(node, ast.AST) or isinstance(node, basestring)):
+            raise AssertionError('"node" must be an AST node or a string, you'
+                                 ' passed a(n) {0}'.format(node.__class__))
+        if isinstance(node, basestring):
+            node = ast.fix_missing_locations(ast.parse(node))
+        return super(ExprVisitor, self).visit(node)
+
+    def visit_Module(self, node):
+        if len(node.body) != 1:
+            raise ExprParserError('only a single expression is allowed')
+
+        expr = node.body[0]
+        if not isinstance(expr, ast.Expr):
+            raise SyntaxError('only expressions are allowed')
+
+        return self.visit(expr)
+
+    def visit_Expr(self, node):
+        return self.visit(node.value)
+
+    def visit_BinOp(self, node):
+        op = self.visit(node.op)
+        left = self.visit(node.left)
+        right = self.visit(node.right)
+        return op(left, right)
+
+    def visit_UnaryOp(self, node):
+        op = self.visit(node.op)
+        return op(self.visit(node.operand))
+
+    def visit_Name(self, node):
+        return node.id
+
+    def visit_Num(self, node):
+        return node.n
+
+    def visit_Compare(self, node):
+        ops = node.ops
+        comps = node.comparators
+        if len(ops) != 1:
+            raise ExprParserError('chained comparisons not supported')
+        return self.visit(ops[0])(self.visit(node.left), self.visit(comps[0]))
+
+    def visit_Call(self, node):
+        if not isinstance(node.func, ast.Name):
+            raise TypeError("Only named functions are supported")
+
+        valid_ops = _reductions + _mathops
+
+        if node.func.id not in valid_ops:
+            raise ValueError("Only {0} are supported".format(valid_ops))
+
+        raise NotImplementedError("function calls not yet supported")
+
+    def visit_Attribute(self, node):
+        raise NotImplementedError("attribute access is not yet supported")
+
+    def visit_Mod(self, node):
+        raise NotImplementedError("modulo operator not yet supported")
+
+
+class Expr(object):
+    """Expr object for pandas
+    """
+    def __init__(self, expr, engine, truediv):
+        self.expr = expr
+        self._visitor = ExprVisitor()
+        self.terms = self.parse()
+        self.engine = engine
+        self.truediv = truediv
+
+    def __call__(self, env):
+        env.locals['truediv'] = self.truediv
+        return self.terms(env)
+
+    def __repr__(self):
+        return '{0} -> {1}'.format(self.expr, self.terms)
+
+    def __str__(self):
+        return self.expr
+
+    def parse(self):
+        """return a Termset"""
+        try:
+            visited = self._visitor.visit(self.expr)
+        except SyntaxError as e:
+            raise e
+        return visited
+
+    def align(self, env):
+        """align a set of Terms"""
+        return self.terms.align(env)
+
+
+def isexpr(s):
+    try:
+        Expr(s, engine=None)
+    except SyntaxError:
+        return False
+    return True
diff --git a/pandas/core/expressions.py b/pandas/computation/expressions.py
similarity index 75%
rename from pandas/core/expressions.py
rename to pandas/computation/expressions.py
index abe891b82410c..e1551f9b0548e 100644
--- a/pandas/core/expressions.py
+++ b/pandas/computation/expressions.py
@@ -5,6 +5,7 @@
 Offer fast expression evaluation thru numexpr
 
 """
+
 import numpy as np
 
 try:
@@ -14,17 +15,19 @@
     _NUMEXPR_INSTALLED = False
 
 _USE_NUMEXPR = _NUMEXPR_INSTALLED
-_evaluate    = None
-_where       = None
+_evaluate = None
+_where = None
 
 # the set of dtypes that we will allow pass to numexpr
-_ALLOWED_DTYPES = dict(evaluate = set(['int64','int32','float64','float32','bool']),
-                       where    = set(['int64','float64','bool']))
+_ALLOWED_DTYPES = dict(
+    evaluate=set(['int64', 'int32', 'float64', 'float32', 'bool']),
+    where=set(['int64', 'float64', 'bool']))
 
 # the minimum prod shape that we will use numexpr
-_MIN_ELEMENTS   = 10000
+_MIN_ELEMENTS = 10000
+
 
-def set_use_numexpr(v = True):
+def set_use_numexpr(v=True):
     # set/unset to use numexpr
     global _USE_NUMEXPR
     if _NUMEXPR_INSTALLED:
@@ -34,12 +37,13 @@ def set_use_numexpr(v = True):
     global _evaluate, _where
     if not _USE_NUMEXPR:
         _evaluate = _evaluate_standard
-        _where    = _where_standard
+        _where = _where_standard
     else:
         _evaluate = _evaluate_numexpr
-        _where    = _where_numexpr
+        _where = _where_numexpr
+
 
-def set_numexpr_threads(n = None):
+def set_numexpr_threads(n=None):
     # if we are using numexpr, set the threads to n
     # otherwise reset
     try:
@@ -53,24 +57,25 @@ def set_numexpr_threads(n = None):
 
 def _evaluate_standard(op, op_str, a, b, raise_on_error=True, **eval_kwargs):
     """ standard evaluation """
-    return op(a,b)
+    return op(a, b)
+
 
 def _can_use_numexpr(op, op_str, a, b, dtype_check):
     """ return a boolean if we WILL be using numexpr """
     if op_str is not None:
-        
+
         # required min elements (otherwise we are adding overhead)
         if np.prod(a.shape) > _MIN_ELEMENTS:
 
             # check for dtype compatiblity
             dtypes = set()
-            for o in [ a, b ]:
-                if hasattr(o,'get_dtype_counts'):
+            for o in [a, b]:
+                if hasattr(o, 'get_dtype_counts'):
                     s = o.get_dtype_counts()
                     if len(s) > 1:
                         return False
                     dtypes |= set(s.index)
-                elif isinstance(o,np.ndarray):
+                elif isinstance(o, np.ndarray):
                     dtypes |= set([o.dtype.name])
 
             # allowed are a superset
@@ -85,9 +90,9 @@ def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False, **eval_kwargs):
     if _can_use_numexpr(op, op_str, a, b, 'evaluate'):
         try:
             a_value, b_value = a, b
-            if hasattr(a_value,'values'):
+            if hasattr(a_value, 'values'):
                 a_value = a_value.values
-            if hasattr(b_value,'values'):
+            if hasattr(b_value, 'values'):
                 b_value = b_value.values
             result = ne.evaluate('a_value %s b_value' % op_str, 
                                  local_dict={ 'a_value' : a_value, 
@@ -98,33 +103,35 @@ def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False, **eval_kwargs):
                 pass
         except (Exception), detail:
             if raise_on_error:
-                raise TypeError(str(detail))
+                raise
 
     if result is None:
-        result = _evaluate_standard(op,op_str,a,b,raise_on_error)
+        result = _evaluate_standard(op, op_str, a, b, raise_on_error)
 
     return result
 
-def _where_standard(cond, a, b, raise_on_error=True):           
+
+def _where_standard(cond, a, b, raise_on_error=True):
     return np.where(cond, a, b)
 
-def _where_numexpr(cond, a, b, raise_on_error = False):
+
+def _where_numexpr(cond, a, b, raise_on_error=False):
     result = None
 
     if _can_use_numexpr(None, 'where', a, b, 'where'):
 
         try:
             cond_value, a_value, b_value = cond, a, b
-            if hasattr(cond_value,'values'):
+            if hasattr(cond_value, 'values'):
                 cond_value = cond_value.values
-            if hasattr(a_value,'values'):
+            if hasattr(a_value, 'values'):
                 a_value = a_value.values
-            if hasattr(b_value,'values'):
+            if hasattr(b_value, 'values'):
                 b_value = b_value.values
             result = ne.evaluate('where(cond_value,a_value,b_value)',
-                                 local_dict={ 'cond_value' : cond_value,
-                                              'a_value' : a_value, 
-                                              'b_value' : b_value }, 
+                                 local_dict={'cond_value': cond_value,
+                                             'a_value': a_value,
+                                             'b_value': b_value},
                                  casting='safe')
         except (ValueError), detail:
             if 'unknown type object' in str(detail):
@@ -134,7 +141,7 @@ def _where_numexpr(cond, a, b, raise_on_error = False):
                 raise TypeError(str(detail))
 
     if result is None:
-        result = _where_standard(cond,a,b,raise_on_error)
+        result = _where_standard(cond, a, b, raise_on_error)
 
     return result
 
@@ -152,8 +159,9 @@ def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True, **eval_kw
         op_str: the string version of the op
         a :     left operand
         b :     right operand
-        raise_on_error : pass the error to the higher level if indicated (default is False),
-                         otherwise evaluate the op with and return the results
+        raise_on_error : pass the error to the higher level if indicated
+                         (default is False), otherwise evaluate the op with and
+                         return the results
         use_numexpr : whether to try to use numexpr (default True)
         """
 
@@ -161,6 +169,7 @@ def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True, **eval_kw
         return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error, **eval_kwargs)
     return _evaluate_standard(op, op_str, a, b, raise_on_error=raise_on_error)
 
+
 def where(cond, a, b, raise_on_error=False, use_numexpr=True):
     """ evaluate the where condition cond on a and b
 
@@ -170,8 +179,9 @@ def where(cond, a, b, raise_on_error=False, use_numexpr=True):
         cond : a boolean array
         a :    return if cond is True
         b :    return if cond is False
-        raise_on_error : pass the error to the higher level if indicated (default is False),
-                         otherwise evaluate the op with and return the results
+        raise_on_error : pass the error to the higher level if indicated
+                         (default is False), otherwise evaluate the op with and
+                         return the results
         use_numexpr : whether to try to use numexpr (default True)
         """
 
diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py
new file mode 100644
index 0000000000000..fb1965f45c52b
--- /dev/null
+++ b/pandas/computation/ops.py
@@ -0,0 +1,188 @@
+import operator as op
+from functools import partial
+
+from pandas.util.py3compat import PY3
+
+
+_reductions = 'sum', 'prod'
+_mathops = 'sin', 'cos', 'tan'
+
+
+class OperatorError(Exception):
+    pass
+
+
+class UnaryOperatorError(OperatorError):
+    pass
+
+
+class BinaryOperatorError(OperatorError):
+    pass
+
+
+def _resolve_name(env, key):
+    res = env.locals.get(key, env.globals.get(key))
+
+    if res is None:
+        if not isinstance(key, basestring):
+            return key
+
+        raise NameError('{0!r} is undefined'.format(key))
+
+    return res
+
+
+def _update_name(env, key, value):
+    if isinstance(key, basestring):
+        try:
+            del env.locals[key]
+            env.locals[key] = value
+        except KeyError:
+            try:
+                del env.globals[key]
+                env.globals[key] = value
+            except KeyError:
+                raise NameError('{0!r} is undefined'.format(key))
+
+
+def _update_names(env, mapping):
+    updater = partial(_update_name, env)
+    for key, value in mapping.iteritems():
+        updater(key, value)
+
+
+class Op(object):
+    """Hold an operator of unknown arity
+    """
+    def __init__(self, op, operands):
+        self.op = op
+        self.operands = operands
+
+    def __iter__(self):
+        return iter(self.operands)
+
+    @property
+    def name(self):
+        return self.__class__.__name__
+
+
+_cmp_ops_syms = '>', '<', '>=', '<=', '==', '!='
+_cmp_ops_funcs = op.gt, op.lt, op.ge, op.le, op.eq, op.ne
+_cmp_ops_dict = dict(zip(_cmp_ops_syms, _cmp_ops_funcs))
+
+_bool_ops_syms = '&', '|'
+_bool_ops_funcs = op.and_, op.or_
+_bool_ops_dict = dict(zip(_bool_ops_syms, _bool_ops_funcs))
+
+_arith_ops_syms = '+', '-', '*', '/', '**', '//'
+_arith_ops_funcs = (op.add, op.sub, op.mul, op.truediv if PY3 else op.div,
+                    op.pow, op.floordiv)
+_arith_ops_dict = dict(zip(_arith_ops_syms, _arith_ops_funcs))
+
+_binary_ops_dict = {}
+
+for d in (_cmp_ops_dict, _bool_ops_dict, _arith_ops_dict):
+    _binary_ops_dict.update(d)
+
+
+class BinOp(Op):
+    """Hold a binary operator and its operands
+
+    Parameters
+    ----------
+    op : str or Op
+    left : str or Op
+    right : str or Op
+    """
+    def __init__(self, op, lhs, rhs):
+        super(BinOp, self).__init__(op, (lhs, rhs))
+        self.lhs = lhs
+        self.rhs = rhs
+
+        try:
+            self.func = _binary_ops_dict[op]
+        except KeyError:
+            keys = _binary_ops_dict.keys()
+            raise BinaryOperatorError('Invalid binary operator {0}, valid'
+                                      ' operators are {1}'.format(op, keys))
+
+    def __repr__(self):
+        return '{0}(op={1!r}, lhs={2!r}, rhs={3!r})'.format(self.name, self.op,
+                                                            self.lhs, self.rhs)
+
+    __str__ = __repr__
+
+    def __call__(self, env):
+        # handle truediv
+        if self.op == '/' and env.locals['truediv']:
+            self.func = op.truediv
+
+        # recurse over the left nodes
+        try:
+            left = self.lhs(env)
+        except TypeError:
+            left = self.lhs
+
+        # recursve over the right nodes
+        try:
+            right = self.rhs(env)
+        except TypeError:
+            right = self.rhs
+
+        # base cases
+        if not (isinstance(left, basestring) or isinstance(right, basestring)):
+            res = self.func(left, right)
+        elif isinstance(left, basestring) and not isinstance(right,
+                                                             basestring):
+            res = self.func(_resolve_name(env, left), right)
+        elif not isinstance(left, basestring) and isinstance(right,
+                                                             basestring):
+            res = self.func(left, _resolve_name(env, right))
+        elif isinstance(left, basestring) and isinstance(right, basestring):
+            res = self.func(_resolve_name(env, left), _resolve_name(env,
+                                                                    right))
+
+        return res
+
+
+_unary_ops_syms = '+', '-', '~'
+_unary_ops_funcs = op.pos, op.neg, op.invert
+_unary_ops_dict = dict(zip(_unary_ops_syms, _unary_ops_funcs))
+
+
+class UnaryOp(Op):
+    """Hold a unary operator and its operands
+    """
+    def __init__(self, op, operand):
+        super(UnaryOp, self).__init__(op, (operand,))
+        self.operand = operand
+
+        try:
+            self.func = _unary_ops_dict[op]
+        except KeyError:
+            raise UnaryOperatorError('Invalid unary operator {0}, valid '
+                                     'operators are '
+                                     '{1}'.format(op, _unary_ops_syms))
+
+    def __call__(self, env):
+        operand = self.operand
+        try:
+            operand = self.operand(env)
+        except TypeError:
+            operand = self.operand
+
+        if isinstance(operand, basestring):
+            v = _resolve_name(env, operand)
+        else:
+            v = operand
+
+        try:
+            res = self.func(v)
+        except TypeError:
+            res = self.func(v.values)
+
+        return res
+
+    def __repr__(self):
+        return '{0}(op={1!r}, operand={2!r})'.format(self.name, self.op,
+                                                     self.operand)
diff --git a/pandas/computation/tests/__init__.py b/pandas/computation/tests/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py
new file mode 100644
index 0000000000000..2d7bf4392cfea
--- /dev/null
+++ b/pandas/computation/tests/test_eval.py
@@ -0,0 +1,552 @@
+#!/usr/bin/env python
+
+import itertools
+from itertools import product
+
+import nose
+from nose.tools import assert_raises, assert_tuple_equal, assert_equal
+from nose.tools import assert_true
+
+from numpy.random import randn
+import numpy as np
+from numpy.testing import assert_array_equal
+from numpy.testing.decorators import slow
+
+import pandas as pd
+from pandas import DataFrame, Series
+from pandas.util.testing import makeCustomDataframe as mkdf
+from pandas.computation.engines import (_engines, _align_core,
+                                        _reconstruct_object)
+from pandas.computation.ops import _binary_ops_dict, _unary_ops_dict
+import pandas.computation.expr as expr
+from pandas.computation.expressions import _USE_NUMEXPR
+from pandas.computation.eval import Scope
+from pandas.computation.eval import _scope_has_series_and_frame_datetime_index
+from pandas.computation.eval import _maybe_convert_engine
+from pandas.util.testing import assert_frame_equal, randbool
+
+
+def skip_numexpr_engine(engine):
+    if not _USE_NUMEXPR and engine == 'numexpr':
+        raise nose.SkipTest
+
+
+def engine_has_neg_frac(engine):
+    return _engines[engine].has_neg_frac
+
+
+def fractional(x):
+    frac, _ = np.modf(np.asanyarray(x))
+    return frac
+
+
+def hasfractional(x):
+    return np.any(fractional(x) != 0.0)
+
+
+def _eval_from_expr(lhs, cmp1, rhs, binop, cmp2):
+    f1 = _binary_ops_dict[cmp1]
+    f2 = _binary_ops_dict[cmp2]
+    bf = _binary_ops_dict[binop]
+    typ, (lhs, rhs), axes = _align_core((lhs, rhs))
+    return _reconstruct_object(typ, bf(f1(lhs, rhs), f2(lhs, rhs)), axes)
+
+
+def _eval_single_bin(lhs, cmp1, rhs, has_neg_frac):
+    c = _binary_ops_dict[cmp1]
+    if has_neg_frac:
+        try:
+            result = c(lhs, rhs)
+        except ValueError:
+            result = np.nan
+    else:
+        result = c(lhs, rhs)
+    return result
+
+
+def isframe(x):
+    return isinstance(x, pd.DataFrame)
+
+
+def isseries(x):
+    return isinstance(x, pd.Series)
+
+
+def are_compatible_types(op, lhs, rhs):
+    if op in ('&', '|'):
+        if isframe(lhs) and isseries(rhs) or isframe(rhs) and isseries(lhs):
+            return False
+    return True
+
+
+def _eval_bin_and_unary(unary, lhs, arith1, rhs):
+    binop = _binary_ops_dict[arith1]
+    unop = expr._unary_ops_dict[unary]
+    return unop(binop(lhs, rhs))
+
+
+# Smoke testing
+class TestBasicEval(object):
+
+    @classmethod
+    def setUpClass(self):
+        self.cmp_ops = expr._cmp_ops_syms
+        self.cmp2_ops = self.cmp_ops[::-1]
+        self.bin_ops = expr._bool_ops_syms
+        self.arith_ops = tuple(o for o in expr._arith_ops_syms if o != '//')
+        self.unary_ops = '+', '-'
+
+    def set_current_engine(self):
+        self.engine = 'numexpr'
+
+    def setup_data(self):
+        self.lhses = (DataFrame(randn(10, 5)), Series(randn(5)), randn(),
+                      np.float64(randn()))
+        self.rhses = (DataFrame(randn(10, 5)), Series(randn(5)), randn(),
+                      np.float64(randn()))
+
+    def setUp(self):
+        try:
+            import numexpr as ne
+            self.ne = ne
+        except ImportError:
+            raise nose.SkipTest
+        self.set_current_engine()
+        self.setup_data()
+        self.current_engines = filter(lambda x: x != self.engine,
+                                      _engines.iterkeys())
+
+    @slow
+    def test_complex_cmp_ops(self):
+        self.setUp()
+        lhses, rhses = self.lhses, self.rhses
+        args = itertools.product(lhses, self.cmp_ops, rhses, self.bin_ops,
+                                 self.cmp2_ops)
+        for lhs, cmp1, rhs, binop, cmp2 in args:
+            self._create_cmp_op_t(lhs, cmp1, rhs, binop, cmp2)
+
+    def test_simple_cmp_ops(self):
+        bool_lhses = (DataFrame(randbool(size=(10, 5))),
+                      Series(randbool((5,))), randbool())
+        bool_rhses = (DataFrame(randbool(size=(10, 5))),
+                      Series(randbool((5,))), randbool())
+        args = itertools.product(bool_lhses, bool_rhses, self.cmp_ops)
+        for lhs, rhs, cmp_op in args:
+            self._create_simple_cmp_op_t(lhs, rhs, cmp_op)
+
+    def test_binary_arith_ops(self):
+        self.setUp()
+        lhses = DataFrame(randn(10, 5)), Series(randn(5)), randn()
+        rhses = DataFrame(randn(10, 5)), Series(randn(5)), randn()
+        args = itertools.product(lhses, self.arith_ops, rhses)
+        for lhs, op, rhs in args:
+            self._create_arith_op_t(lhs, op, rhs)
+
+    def test_unary_arith_ops(self):
+        self.setUp()
+        lhses = DataFrame(randn(10, 5)), Series(randn(5)), randn()
+        rhses = DataFrame(randn(10, 5)), Series(randn(5)), randn()
+        aops = tuple(aop for aop in self.arith_ops if aop not in '+-')
+        args = itertools.product(self.unary_ops, lhses, aops, rhses)
+        for unary_op, lhs, arith_op, rhs in args:
+            self._create_unary_arith_op_t(unary_op, lhs, arith_op, rhs)
+
+    def test_invert(self):
+        self.setUp()
+        lhses = DataFrame(randn(10, 5)), Series(randn(5)), randn()
+        rhses = DataFrame(randn(10, 5)), Series(randn(5)), randn()
+        args = itertools.product(lhses, self.cmp_ops, rhses)
+        for lhs, op, rhs in args:
+            self._create_invert_op_t(lhs, op, rhs)
+
+    def _create_cmp_op_t(self, lhs, cmp1, rhs, binop, cmp2):
+        ex = '(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)'.format(cmp1=cmp1,
+                                                                binop=binop,
+                                                                cmp2=cmp2)
+        expected = _eval_from_expr(lhs, cmp1, rhs, binop, cmp2)
+        result = pd.eval(ex, engine=self.engine)
+        assert_array_equal(result, expected)
+
+    def _create_simple_cmp_op_t(self, lhs, rhs, cmp1):
+        ex = 'lhs {0} rhs'.format(cmp1)
+
+        if are_compatible_types(cmp1, lhs, rhs):
+            expected = _eval_single_bin(lhs, cmp1, rhs,
+                                        engine_has_neg_frac(self.engine))
+            result = pd.eval(ex, engine=self.engine)
+            assert_array_equal(result, expected)
+        else:
+            assert_raises(TypeError, _eval_single_bin, lhs, cmp1, rhs,
+                          engine_has_neg_frac(self.engine))
+
+    def _create_arith_op_t(self, lhs, arith1, rhs):
+        ex = 'lhs {0} rhs'.format(arith1)
+        nan_frac_neg = (arith1 == '**' and np.any(lhs < 0) and
+                        hasfractional(rhs) and np.isscalar(lhs) and
+                        np.isscalar(rhs) and
+                        not (isinstance(lhs, tuple(np.typeDict.values()))
+                             or isinstance(rhs, tuple(np.typeDict.values()))))
+        if nan_frac_neg and not engine_has_neg_frac(self.engine):
+                assert_raises(ValueError, pd.eval, ex, engine=self.engine,
+                              local_dict=locals(), global_dict=globals())
+        else:
+            result = pd.eval(ex, engine=self.engine)
+
+            if arith1 != '//':
+                expected = _eval_single_bin(lhs, arith1, rhs,
+                                            engine_has_neg_frac(self.engine))
+                assert_array_equal(result, expected)
+
+            # sanity check on recursive parsing
+            try:
+                ghs = rhs.copy()
+            except AttributeError:
+                ghs = rhs
+
+        if nan_frac_neg and not engine_has_neg_frac(self.engine):
+            assert_raises(ValueError, pd.eval, ex, engine=self.engine,
+                          local_dict=locals(), global_dict=globals())
+        else:
+            if arith1 == '**':
+                ex = '(lhs {0} rhs) {0} ghs'.format(arith1)
+            else:
+                ex = 'lhs {0} rhs {0} ghs'.format(arith1)
+            result = pd.eval(ex, engine=self.engine)
+
+            try:
+                nlhs = _eval_single_bin(lhs, arith1, rhs,
+                                        engine_has_neg_frac(self.engine))
+            except ValueError:
+                assert_raises(ValueError, _eval_single_bin, lhs, arith1, rhs,
+                              engine_has_neg_frac(self.engine))
+            else:
+                try:
+                    nlhs, ghs = nlhs.align(ghs)
+                except:
+                    pass
+                if arith1 != '//':
+                    expected = self.ne.evaluate('nlhs {0} ghs'.format(arith1))
+                    assert_array_equal(result, expected)
+
+    def _create_invert_op_t(self, lhs, cmp1, rhs):
+        # simple
+        for el in (lhs, rhs):
+            try:
+                elb = el.astype(bool)
+            except AttributeError:
+                elb = np.array([bool(el)])
+            expected = ~elb
+            result = pd.eval('~elb', engine=self.engine)
+            assert_array_equal(expected, result)
+
+            for engine in self.current_engines:
+                assert_array_equal(result, pd.eval('~elb', engine=engine))
+
+        # compound
+        ex = '~(lhs {0} rhs)'.format(cmp1)
+        if np.isscalar(lhs) and np.isscalar(rhs):
+            lhs, rhs = map(lambda x: np.array([x]), (lhs, rhs))
+        expected = ~_eval_single_bin(lhs, cmp1, rhs,
+                                     engine_has_neg_frac(self.engine))
+        result = pd.eval(ex, engine=self.engine)
+        assert_array_equal(expected, result)
+
+        # make sure the other engines work
+        for engine in self.current_engines:
+            ev = pd.eval(ex, engine=self.engine)
+            assert_array_equal(ev, result)
+
+    def _create_unary_arith_op_t(self, unary_op, lhs, arith1, rhs):
+        # simple
+        ex = '{0}lhs'.format(unary_op, arith1)
+        f = _unary_ops_dict[unary_op]
+        bad_types = tuple(np.typeDict.values())
+
+        nan_frac_neg = (arith1 == '**' and
+                        np.any(lhs < 0) and
+                        hasfractional(rhs) and
+                        np.isscalar(lhs) and np.isscalar(rhs) and
+                        not (isinstance(lhs, bad_types) or
+                             isinstance(rhs, bad_types))
+                        and not engine_has_neg_frac(self.engine))
+        try:
+            expected = f(lhs.values)
+        except AttributeError:
+            expected = f(lhs)
+        result = pd.eval(ex, engine=self.engine)
+        assert_array_equal(result, expected)
+
+        for engine in self.current_engines:
+            assert_array_equal(result, pd.eval(ex, engine=engine))
+
+        ex = '{0}(lhs {1} rhs)'.format(unary_op, arith1)
+
+        if nan_frac_neg:
+            assert_raises(ValueError, pd.eval, ex, engine=self.engine,
+                          local_dict=locals(), global_dict=globals())
+        else:
+            # compound
+            result = pd.eval(ex, engine=self.engine)
+
+            #(lhs, rhs), _ = _align((lhs, rhs))
+            #if arith1 != '//':
+                #expected = self.ne.evaluate(ex)
+                #assert_array_equal(result, expected)
+            #else:
+                #assert_raises(TypeError, self.ne.evaluate, ex)
+
+            #for engine in self.current_engines:
+                #if arith1 != '//':
+                    #if engine_has_neg_frac(engine):
+                        #assert_array_equal(result, pd.eval(ex, engine=engine))
+                #else:
+                    #assert_raises(TypeError, pd.eval, ex, engine=engine,
+                                  #local_dict=locals(), global_dict=globals())
+
+
+class TestBasicEvalPython(TestBasicEval):
+
+    @classmethod
+    def setUpClass(cls):
+        cls.cmp_ops = expr._cmp_ops_syms
+        cls.cmp2_ops = cls.cmp_ops[::-1]
+        cls.bin_ops = expr._bool_ops_syms
+        cls.arith_ops = expr._arith_ops_syms
+        cls.unary_ops = '+', '-'
+
+    def set_current_engine(self):
+        self.engine = 'python'
+
+
+def test_syntax_error_exprs():
+    for engine in _engines:
+        e = 's +'
+        assert_raises(SyntaxError, pd.eval, e, engine=engine)
+
+
+def test_name_error_exprs():
+    for engine in _engines:
+        e = 's + t'
+        assert_raises(NameError, pd.eval, e, engine=engine)
+
+
+def test_align_nested_unary_op():
+    for engine in _engines:
+        yield check_align_nested_unary_op, engine
+
+
+f = lambda *args, **kwargs: np.random.randn()
+
+
+def check_align_nested_unary_op(engine):
+    skip_numexpr_engine(engine)
+    s = 'df * ~2'
+    df = mkdf(10, 10, data_gen_f=f)
+    res = pd.eval(s, engine)
+    assert_frame_equal(res, df * ~2)
+
+
+def check_basic_frame_alignment(engine):
+    df = mkdf(10, 10, data_gen_f=f)
+    df2 = mkdf(20, 10, data_gen_f=f)
+    res = pd.eval('df + df2', engine=engine)
+    assert_frame_equal(res, df + df2)
+
+
+def test_basic_frame_alignment():
+    for engine in _engines:
+        yield check_basic_frame_alignment, engine
+
+
+def check_medium_complex_frame_alignment(engine, r1, r2, c1, c2):
+    skip_numexpr_engine(engine)
+    df = mkdf(5, 2, data_gen_f=f, r_idx_type=r1, c_idx_type=c1)
+    df2 = mkdf(10, 2, data_gen_f=f, r_idx_type=r2, c_idx_type=c2)
+    df3 = mkdf(15, 2, data_gen_f=f, r_idx_type=r2, c_idx_type=c2)
+    res = pd.eval('df + df2 + df3', engine=engine)
+    assert_frame_equal(res, df + df2 + df3)
+
+
+@slow
+def test_medium_complex_frame_alignment():
+    args = product(_engines, *([INDEX_TYPES[:4]] * 4))
+    for engine, r1, r2, c1, c2 in args:
+        check_medium_complex_frame_alignment(engine, r1, r2, c1, c2)
+
+
+def check_basic_frame_series_alignment(engine, r_idx_type, c_idx_type,
+                                       index_name):
+    skip_numexpr_engine(engine)
+    df = mkdf(10, 10, data_gen_f=f, r_idx_type=r_idx_type,
+              c_idx_type=c_idx_type)
+    index = getattr(df, index_name)
+    s = Series(np.random.randn(5), index[:5])
+
+    if r_idx_type != 'p' and c_idx_type == 'p' and index_name == 'index':
+        assert_raises(ValueError, pd.eval, 'df + s', local_dict=locals())
+        assert_raises(ValueError, df.add, s, axis=1)
+    else:
+        res = pd.eval('df + s', engine=engine)
+        expected = df + s
+        assert_frame_equal(res, expected)
+
+
+def check_not_both_period_fails_otherwise_succeeds(lhs, rhs, r_idx_type,
+                                                   c_idx_type, index_name, s,
+                                                   df, *terms):
+    if r_idx_type != 'p' and c_idx_type == 'p' and index_name == 'index':
+        assert_raises(ValueError, pd.eval, lhs, local_dict=locals())
+        assert_raises(ValueError, pd.eval, rhs, local_dict=locals())
+    else:
+        a, b = pd.eval(lhs), pd.eval(rhs)
+        assert_frame_equal(a, b)
+
+
+def check_basic_series_frame_alignment(engine, r_idx_type, c_idx_type,
+                                       index_name):
+    skip_numexpr_engine(engine)
+    df = mkdf(10, 10, data_gen_f=f, r_idx_type=r_idx_type,
+              c_idx_type=c_idx_type)
+    index = getattr(df, index_name)
+    s = Series(np.random.randn(5), index[:5])
+
+    if r_idx_type != 'p' and c_idx_type == 'p' and index_name == 'index':
+        assert_raises(ValueError, pd.eval, 's + df', local_dict=locals())
+        assert_raises(ValueError, df.add, s, axis=1)
+    else:
+        res = pd.eval('s + df', engine=engine)
+        expected = s + df
+        assert_frame_equal(res, expected)
+
+
+@slow
+def check_basic_series_frame_alignment_datetime(engine, r_idx_type, c_idx_type,
+                                                index_name):
+    skip_numexpr_engine(engine)
+    df = mkdf(10, 10, data_gen_f=f, r_idx_type=r_idx_type,
+              c_idx_type=c_idx_type)
+    index = getattr(df, index_name)
+    s = Series(np.random.randn(5), index[:5])
+    if r_idx_type != 'p' and c_idx_type == 'p' and index_name == 'index':
+        assert_raises(ValueError, pd.eval, 's + df', local_dict=locals())
+        assert_raises(ValueError, df.add, s, axis=1)
+    else:
+        res = pd.eval('s + df', engine=engine)
+        expected = s + df
+        assert_frame_equal(res, expected)
+
+    if r_idx_type != 'p' and c_idx_type == 'p' and index_name == 'index':
+        assert_raises(ValueError, pd.eval, 'df + s', local_dict=locals())
+        assert_raises(ValueError, df.add, s, axis=1)
+    else:
+        res = pd.eval('df + s', engine=engine)
+        expected = df + s
+        assert_frame_equal(res, expected)
+
+
+def check_series_frame_commutativity(engine, r_idx_type, c_idx_type, op,
+                                     index_name):
+    skip_numexpr_engine(engine)
+    df = mkdf(10, 10, data_gen_f=f, r_idx_type=r_idx_type,
+              c_idx_type=c_idx_type)
+    index = getattr(df, index_name)
+    s = Series(np.random.randn(5), index[:5])
+
+    lhs = 's {0} df'.format(op)
+    rhs = 'df {0} s'.format(op)
+    check_not_both_period_fails_otherwise_succeeds(lhs, rhs, r_idx_type,
+                                                   c_idx_type, index_name, s,
+                                                   df)
+
+
+INDEX_TYPES = 'i', 'f', 's', 'u', 'dt',  # 'p'
+
+
+@slow
+def test_series_frame_commutativity():
+    args = product(_engines, INDEX_TYPES, INDEX_TYPES, ('+', '*'), ('index',
+                                                                    'columns'))
+    for engine, r_idx_type, c_idx_type, op, index_name in args:
+        check_series_frame_commutativity(engine, r_idx_type, c_idx_type, op,
+                                         index_name)
+
+
+def test_basic_frame_series_alignment():
+    args = product(_engines, INDEX_TYPES, INDEX_TYPES, ('index', 'columns'))
+    for engine, r_idx_type, c_idx_type, index_name in args:
+        check_basic_frame_series_alignment(engine, r_idx_type, c_idx_type,
+                                           index_name)
+
+
+@slow
+def test_basic_series_frame_alignment_datetime():
+    idx_types = INDEX_TYPES
+    args = product(_engines, idx_types, idx_types, ('index', 'columns'))
+    for engine, r_idx_type, c_idx_type, index_name in args:
+        check_basic_series_frame_alignment_datetime(engine, r_idx_type,
+                                                    c_idx_type, index_name)
+
+
+def test_basic_series_frame_alignment():
+    args = product(_engines, INDEX_TYPES, INDEX_TYPES, ('index', 'columns'))
+    for engine, r_idx_type, c_idx_type, index_name in args:
+        check_basic_series_frame_alignment(engine, r_idx_type, c_idx_type,
+                                           index_name)
+
+
+def check_complex_series_frame_alignment(engine, index_name, obj, r1, r2, c1,
+                                         c2):
+    skip_numexpr_engine(engine)
+    df = mkdf(10, 10, data_gen_f=f, r_idx_type=r1, c_idx_type=c1)
+    df2 = mkdf(20, 10, data_gen_f=f, r_idx_type=r2, c_idx_type=c2)
+    index = getattr(locals()[obj], index_name)
+    s = Series(np.random.randn(5), index[:5])
+    if engine != 'python':
+        expected = df2.add(s, axis=1).add(df)
+    else:
+        expected = df2 + s + df
+    res = pd.eval('df2 + s + df', engine=engine)
+    expected = df2 + s + df
+    assert_tuple_equal(res.shape, expected.shape)
+    assert_frame_equal(res, expected)
+
+
+@slow
+def test_complex_series_frame_alignment():
+    args = product(_engines, ('index', 'columns'), ('df', 'df2'),
+                   *([INDEX_TYPES[:4]] * 4))
+    for engine, index_name, obj, r1, r2, c1, c2 in args:
+        check_complex_series_frame_alignment(engine, index_name, obj, r1, r2,
+                                             c1, c2)
+
+
+def check_datetime_index_rows_punts_to_python(engine):
+    df = mkdf(10, 10, data_gen_f=f, r_idx_type='dt', c_idx_type='dt')
+    index = getattr(df, 'index')
+    s = Series(np.random.randn(5), index[:5])
+    env = Scope(globals(), locals())
+    assert_true(_scope_has_series_and_frame_datetime_index(env))
+    assert_equal(_maybe_convert_engine(env, engine), 'python')
+
+
+def test_datetime_index_rows_punts_to_python():
+    for engine in _engines:
+        check_datetime_index_rows_punts_to_python(engine)
+
+
+__var_s = randn(10)
+
+
+def check_global_scope(engine):
+    e = '__var_s * 2'
+    assert_array_equal(__var_s * 2, pd.eval(e, engine=engine))
+
+
+def test_global_scope():
+    for engine in _engines:
+        yield check_global_scope, engine
+
+
+if __name__ == '__main__':
+    nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
+                   exit=False)
diff --git a/pandas/computation/tests/test_expressions.py b/pandas/computation/tests/test_expressions.py
new file mode 100644
index 0000000000000..f197b8ef7a0ac
--- /dev/null
+++ b/pandas/computation/tests/test_expressions.py
@@ -0,0 +1,157 @@
+# pylint: disable-msg=W0612,E1101
+
+import unittest
+import operator
+
+import nose
+
+
+import numpy as np
+from numpy.testing import assert_array_equal
+
+from pandas.core.api import DataFrame
+from pandas.computation import expressions as expr
+
+if not expr._USE_NUMEXPR:
+    raise nose.SkipTest
+
+import numexpr as ne
+
+
+_frame = DataFrame(np.random.randn(10000, 4), columns=list('ABCD'),
+                   dtype='float64')
+_frame2 = DataFrame(np.random.randn(100, 4), columns=list('ABCD'),
+                    dtype='float64')
+_mixed = DataFrame({'A': _frame['A'].copy(),
+                    'B': _frame['B'].astype('float32'),
+                    'C': _frame['C'].astype('int64'),
+                    'D': _frame['D'].astype('int32')})
+_mixed2 = DataFrame({'A': _frame2['A'].copy(),
+                     'B': _frame2['B'].astype('float32'),
+                     'C': _frame2['C'].astype('int64'),
+                     'D': _frame2['D'].astype('int32')})
+
+
+class TestExpressions(unittest.TestCase):
+
+    _multiprocess_can_split_ = False
+
+    def setUp(self):
+        self.frame = _frame.copy()
+        self.frame2 = _frame2.copy()
+        self.mixed = _mixed.copy()
+        self.mixed2 = _mixed2.copy()
+
+    def test_invalid(self):
+        # no op
+        result = expr._can_use_numexpr(operator.add, None, self.frame,
+                                       self.frame, 'evaluate')
+        self.assertFalse(result)
+
+        # mixed
+        result = expr._can_use_numexpr(
+            operator.add, '+', self.mixed, self.frame, 'evaluate')
+        self.assertFalse(result)
+
+        # min elements
+        result = expr._can_use_numexpr(
+            operator.add, '+', self.frame2, self.frame2, 'evaluate')
+        self.assertFalse(result)
+
+        # ok, we only check on first part of expression
+        result = expr._can_use_numexpr(
+            operator.add, '+', self.frame, self.frame2, 'evaluate')
+        self.assert_(result)
+
+    def test_binary_ops(self):
+        def testit():
+
+            for f, f2 in [(self.frame, self.frame2),
+                          (self.mixed, self.mixed2)]:
+
+                for op, op_str in [('add', '+'), ('sub', '-'), ('mul', '*'),
+                                   ('div', '/'), ('pow', '**')]:
+
+                    op = getattr(operator, op, None)
+                    if op is not None:
+                        result = expr._can_use_numexpr(
+                            op, op_str, f, f, 'evaluate')
+                        self.assert_(result == (not f._is_mixed_type))
+
+                        result = expr.evaluate(
+                            op, op_str, f, f, use_numexpr=True)
+                        expected = expr.evaluate(
+                            op, op_str, f, f, use_numexpr=False)
+                        assert_array_equal(result, expected.values)
+
+                        result = expr._can_use_numexpr(
+                            op, op_str, f2, f2, 'evaluate')
+                        self.assertFalse(result)
+
+        expr.set_use_numexpr(False)
+        testit()
+        expr.set_use_numexpr(True)
+        expr.set_numexpr_threads(1)
+        testit()
+        expr.set_numexpr_threads()
+        testit()
+
+    def test_boolean_ops(self):
+        def testit():
+            for f, f2 in [(self.frame, self.frame2),
+                          (self.mixed, self.mixed2)]:
+
+                f11 = f
+                f12 = f + 1
+
+                f21 = f2
+                f22 = f2 + 1
+
+                for op, op_str in [('gt', '>'), ('lt', '<'), ('ge', '>='),
+                                   ('le', '<='), ('eq', '=='), ('ne', '!=')]:
+
+                    op = getattr(operator, op)
+
+                    result = expr._can_use_numexpr(
+                        op, op_str, f11, f12, 'evaluate')
+                    self.assert_(result == (not f11._is_mixed_type))
+
+                    result = expr.evaluate(
+                        op, op_str, f11, f12, use_numexpr=True)
+                    expected = expr.evaluate(
+                        op, op_str, f11, f12, use_numexpr=False)
+                    assert_array_equal(result, expected.values)
+
+                    result = expr._can_use_numexpr(
+                        op, op_str, f21, f22, 'evaluate')
+                    self.assertFalse(result)
+
+        expr.set_use_numexpr(False)
+        testit()
+        expr.set_use_numexpr(True)
+        expr.set_numexpr_threads(1)
+        testit()
+        expr.set_numexpr_threads()
+        testit()
+
+    def test_where(self):
+        def testit():
+            for f in [self.frame, self.frame2, self.mixed, self.mixed2]:
+
+                for cond in [True, False]:
+
+                    c = np.empty(f.shape, dtype=np.bool_)
+                    c.fill(cond)
+                    result = expr.where(c, f.values, f.values + 1)
+                    expected = np.where(c, f.values, f.values + 1)
+                    assert_array_equal(result, expected)
+
+        expr.set_use_numexpr(False)
+        testit()
+        expr.set_use_numexpr(True)
+        expr.set_numexpr_threads(1)
+        testit()
+        expr.set_numexpr_threads()
+        testit()
+
+
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 401a7746953cb..a8bb74f86a43e 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -35,7 +35,7 @@
                                    create_block_manager_from_arrays,
                                    create_block_manager_from_blocks)
 from pandas.core.series import Series, _radd_compat
-import pandas.core.expressions as expressions
+import pandas.computation.expressions as expressions
 from pandas.compat.scipy import scoreatpercentile as _quantile
 from pandas.util.compat import OrderedDict
 from pandas.util import py3compat
@@ -2652,6 +2652,8 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True,
             passed MultiIndex level
         limit : int, default None
             Maximum size gap to forward or backward fill
+        fill_value : object, default NA
+            The value to use to fill in missing data.
 
         Examples
         --------
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index f23a89635aaf2..ab29a38760a51 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -14,7 +14,7 @@
 import pandas.core.common as com
 import pandas.lib as lib
 import pandas.tslib as tslib
-import pandas.core.expressions as expressions
+import pandas.computation.expressions as expressions
 
 from pandas.tslib import Timestamp
 from pandas.util import py3compat
diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py
deleted file mode 100644
index ba0a9926dfa78..0000000000000
--- a/pandas/tests/test_expressions.py
+++ /dev/null
@@ -1,203 +0,0 @@
-# pylint: disable-msg=W0612,E1101
-
-import unittest
-import nose
-
-import operator
-from numpy import random, nan
-from numpy.random import randn
-import numpy as np
-from numpy.testing import assert_array_equal
-
-import pandas as pan
-from pandas.core.api import DataFrame, Series, notnull, isnull
-from pandas.core import expressions as expr
-
-from pandas.util.testing import (assert_almost_equal,
-                                 assert_series_equal,
-                                 assert_frame_equal)
-from pandas.util import py3compat
-
-import pandas.util.testing as tm
-import pandas.lib as lib
-
-from numpy.testing.decorators import slow
-
-if not expr._USE_NUMEXPR:
-    raise nose.SkipTest
-
-_frame  = DataFrame(np.random.randn(10000, 4), columns = list('ABCD'), dtype='float64')
-_frame2 = DataFrame(np.random.randn(100, 4),   columns = list('ABCD'), dtype='float64')
-_mixed  = DataFrame({ 'A' : _frame['A'].copy(), 'B' : _frame['B'].astype('float32'), 'C' : _frame['C'].astype('int64'), 'D' : _frame['D'].astype('int32') })
-_mixed2 = DataFrame({ 'A' : _frame2['A'].copy(), 'B' : _frame2['B'].astype('float32'), 'C' : _frame2['C'].astype('int64'), 'D' : _frame2['D'].astype('int32') })
-_integer  = DataFrame(np.random.randint(1, 100, size=(10001, 4)), columns = list('ABCD'), dtype='int64')
-
-class TestExpressions(unittest.TestCase):
-
-    _multiprocess_can_split_ = False
-
-    def setUp(self):
-
-        self.frame  = _frame.copy()
-        self.frame2 = _frame2.copy()
-        self.mixed  = _mixed.copy()
-        self.mixed2 = _mixed2.copy()
-        self.integer = _integer.copy()
-        self._MIN_ELEMENTS = expr._MIN_ELEMENTS
-
-    def tearDown(self):
-        expr._MIN_ELEMENTS = self._MIN_ELEMENTS
-
-    #TODO: add test for Panel
-    #TODO: add tests for binary operations
-    @nose.tools.nottest
-    def run_arithmetic_test(self, df, assert_func, check_dtype=False):
-        expr._MIN_ELEMENTS = 0
-        operations = ['add', 'sub', 'mul','mod','truediv','floordiv','pow']
-        if not py3compat.PY3:
-            operations.append('div')
-        for arith in operations:
-            op = getattr(operator, arith)
-            expr.set_use_numexpr(False)
-            expected = op(df, df)
-            expr.set_use_numexpr(True)
-            result = op(df, df)
-            try:
-                if check_dtype:
-                    if arith == 'div':
-                        assert expected.dtype.kind == df.dtype.kind
-                    if arith == 'truediv':
-                        assert expected.dtype.kind == 'f'
-                assert_func(expected, result)
-            except Exception:
-                print("Failed test with operator %r" % op.__name__)
-                raise
-
-    def test_integer_arithmetic(self):
-        self.run_arithmetic_test(self.integer, assert_frame_equal)
-        self.run_arithmetic_test(self.integer.icol(0), assert_series_equal,
-                                 check_dtype=True)
-
-    def test_float_arithemtic(self):
-        self.run_arithmetic_test(self.frame, assert_frame_equal)
-        self.run_arithmetic_test(self.frame.icol(0), assert_series_equal,
-                                check_dtype=True)
-
-    def test_mixed_arithmetic(self):
-        self.run_arithmetic_test(self.mixed, assert_frame_equal)
-        for col in self.mixed.columns:
-            self.run_arithmetic_test(self.mixed[col], assert_series_equal)
-
-    def test_integer_with_zeros(self):
-        self.integer *= np.random.randint(0, 2, size=np.shape(self.integer))
-        self.run_arithmetic_test(self.integer, assert_frame_equal)
-        self.run_arithmetic_test(self.integer.icol(0), assert_series_equal)
-
-    def test_invalid(self):
-
-        # no op
-        result   = expr._can_use_numexpr(operator.add, None, self.frame, self.frame, 'evaluate')
-        self.assert_(result == False)
-
-        # mixed
-        result   = expr._can_use_numexpr(operator.add, '+', self.mixed, self.frame, 'evaluate')
-        self.assert_(result == False)
-
-        # min elements
-        result   = expr._can_use_numexpr(operator.add, '+', self.frame2, self.frame2, 'evaluate')
-        self.assert_(result == False)
-
-        # ok, we only check on first part of expression
-        result   = expr._can_use_numexpr(operator.add, '+', self.frame, self.frame2, 'evaluate')
-        self.assert_(result == True)
-
-    def test_binary_ops(self):
-
-        def testit():
-
-            for f, f2 in [ (self.frame, self.frame2), (self.mixed, self.mixed2) ]:
-
-                for op, op_str in [('add','+'),('sub','-'),('mul','*'),('div','/'),('pow','**')]:
-
-                    op = getattr(operator,op,None)
-                    if op is not None:
-                        result   = expr._can_use_numexpr(op, op_str, f, f, 'evaluate')
-                        self.assert_(result == (not f._is_mixed_type))
-
-                        result   = expr.evaluate(op, op_str, f, f, use_numexpr=True)
-                        expected = expr.evaluate(op, op_str, f, f, use_numexpr=False)
-                        assert_array_equal(result,expected.values)
-                
-                        result   = expr._can_use_numexpr(op, op_str, f2, f2, 'evaluate')
-                        self.assert_(result == False)
-
-        
-        expr.set_use_numexpr(False)
-        testit()
-        expr.set_use_numexpr(True)
-        expr.set_numexpr_threads(1)
-        testit()
-        expr.set_numexpr_threads()
-        testit()
-
-    def test_boolean_ops(self):
-
-
-        def testit():
-            for f, f2 in [ (self.frame, self.frame2), (self.mixed, self.mixed2) ]:
-
-                f11 = f
-                f12 = f + 1
-            
-                f21 = f2
-                f22 = f2 + 1
-
-                for op, op_str in [('gt','>'),('lt','<'),('ge','>='),('le','<='),('eq','=='),('ne','!=')]:
-
-                    op = getattr(operator,op)
-
-                    result   = expr._can_use_numexpr(op, op_str, f11, f12, 'evaluate')
-                    self.assert_(result == (not f11._is_mixed_type))
-
-                    result   = expr.evaluate(op, op_str, f11, f12, use_numexpr=True)
-                    expected = expr.evaluate(op, op_str, f11, f12, use_numexpr=False)
-                    assert_array_equal(result,expected.values)
-                    
-                    result   = expr._can_use_numexpr(op, op_str, f21, f22, 'evaluate')
-                    self.assert_(result == False)
-
-        expr.set_use_numexpr(False)
-        testit()
-        expr.set_use_numexpr(True)
-        expr.set_numexpr_threads(1)
-        testit()
-        expr.set_numexpr_threads()
-        testit()
-
-    def test_where(self):
-
-        def testit():
-            for f in [ self.frame, self.frame2, self.mixed, self.mixed2 ]:
-
-                
-                for cond in [ True, False ]:
-
-                    c = np.empty(f.shape,dtype=np.bool_)
-                    c.fill(cond)
-                    result   = expr.where(c, f.values, f.values+1)
-                    expected = np.where(c, f.values, f.values+1)
-                    assert_array_equal(result,expected)
-
-        expr.set_use_numexpr(False)
-        testit()
-        expr.set_use_numexpr(True)
-        expr.set_numexpr_threads(1)
-        testit()
-        expr.set_numexpr_threads()
-        testit()
-
-if __name__ == '__main__':
-    # unittest.main()
-    import nose
-    nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
-                   exit=False)
diff --git a/setup.py b/setup.py
index 7d59e0f95f0e8..3984dc075d4f7 100755
--- a/setup.py
+++ b/setup.py
@@ -85,7 +85,7 @@
 except ImportError:
     cython = False
 
-from os.path import splitext, basename, join as pjoin
+from os.path import join as pjoin
 
 
 class build_ext(_build_ext):
@@ -502,6 +502,7 @@ def pxd(name):
       maintainer=AUTHOR,
       packages=['pandas',
                 'pandas.compat',
+                'pandas.computation',
                 'pandas.core',
                 'pandas.io',
                 'pandas.rpy',
diff --git a/vb_suite/binary_ops.py b/vb_suite/binary_ops.py
index 54774344520c9..3f076f9f922a3 100644
--- a/vb_suite/binary_ops.py
+++ b/vb_suite/binary_ops.py
@@ -21,7 +21,7 @@
               start_date=datetime(2012, 1, 1))
 
 setup = common_setup + """
-import pandas.core.expressions as expr
+import pandas.computation.expressions as expr
 df  = DataFrame(np.random.randn(20000, 100))
 df2 = DataFrame(np.random.randn(20000, 100))
 expr.set_numexpr_threads(1)
@@ -32,7 +32,7 @@
               start_date=datetime(2013, 2, 26))
 
 setup = common_setup + """
-import pandas.core.expressions as expr
+import pandas.computation.expressions as expr
 df  = DataFrame(np.random.randn(20000, 100))
 df2 = DataFrame(np.random.randn(20000, 100))
 expr.set_use_numexpr(False)
@@ -53,7 +53,7 @@
               start_date=datetime(2012, 1, 1))
 
 setup = common_setup + """
-import pandas.core.expressions as expr
+import pandas.computation.expressions as expr
 df  = DataFrame(np.random.randn(20000, 100))
 df2 = DataFrame(np.random.randn(20000, 100))
 expr.set_numexpr_threads(1)
@@ -63,7 +63,7 @@
               start_date=datetime(2013, 2, 26))
 
 setup = common_setup + """
-import pandas.core.expressions as expr
+import pandas.computation.expressions as expr
 df  = DataFrame(np.random.randn(20000, 100))
 df2 = DataFrame(np.random.randn(20000, 100))
 expr.set_use_numexpr(False)
@@ -84,7 +84,7 @@
               start_date=datetime(2012, 1, 1))
 
 setup = common_setup + """
-import pandas.core.expressions as expr
+import pandas.computation.expressions as expr
 df  = DataFrame(np.random.randn(20000, 100))
 df2 = DataFrame(np.random.randn(20000, 100))
 expr.set_numexpr_threads(1)
@@ -94,7 +94,7 @@
               start_date=datetime(2013, 2, 26))
 
 setup = common_setup + """
-import pandas.core.expressions as expr
+import pandas.computation.expressions as expr
 df  = DataFrame(np.random.randn(20000, 100))
 df2 = DataFrame(np.random.randn(20000, 100))
 expr.set_use_numexpr(False)
diff --git a/vb_suite/indexing.py b/vb_suite/indexing.py
index 9f07cc6ed15c3..2edb7548ebeef 100644
--- a/vb_suite/indexing.py
+++ b/vb_suite/indexing.py
@@ -103,7 +103,7 @@
               start_date=datetime(2012, 1, 1))
 
 setup = common_setup + """
-import pandas.core.expressions as expr
+import pandas.computation.expressions as expr
 df  = DataFrame(np.random.randn(50000, 100))
 df2 = DataFrame(np.random.randn(50000, 100))
 expr.set_numexpr_threads(1)
@@ -115,7 +115,7 @@
 
 
 setup = common_setup + """
-import pandas.core.expressions as expr
+import pandas.computation.expressions as expr
 df  = DataFrame(np.random.randn(50000, 100))
 df2 = DataFrame(np.random.randn(50000, 100))
 expr.set_use_numexpr(False)

From bcd17b090a32afd43de0a21f3829f281635a8b51 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Sat, 15 Jun 2013 21:35:22 -0400
Subject: [PATCH 02/48] ENH/TST: add new instance testing functions and their
 tests

---
 pandas/core/common.py       | 24 ++++++++++++++
 pandas/tests/test_common.py | 65 +++++++++++++++++++++++++++++++++++--
 pandas/util/testing.py      |  5 ++-
 3 files changed, 90 insertions(+), 4 deletions(-)

diff --git a/pandas/core/common.py b/pandas/core/common.py
index ddacb98a2ddf3..4615571c5d86c 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -21,6 +21,7 @@
 
 from pandas.core.config import get_option
 from pandas.core import array as pa
+import pandas as pd
 
 # XXX: HACK for NumPy 1.5.1 to suppress warnings
 try:
@@ -1509,6 +1510,29 @@ def is_bool(obj):
     return isinstance(obj, (bool, np.bool_))
 
 
+def is_string(obj):
+    return isinstance(obj, (basestring, np.str_, np.unicode_))
+
+
+def is_series(obj):
+    return isinstance(obj, pd.Series)
+
+
+def is_frame(obj):
+    return isinstance(obj, pd.DataFrame)
+
+
+def is_panel(obj):
+    return isinstance(obj, pd.Panel)
+
+
+def is_pd_obj(obj):
+    return isinstance(obj, pd.core.generic.PandasObject)
+
+
+def is_ndframe(obj):
+    return isinstance(obj, pd.core.generic.NDFrame)
+
 def is_integer(obj):
     return isinstance(obj, (int, long, np.integer))
 
diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py
index db01545fb3c9d..974e301c5d303 100644
--- a/pandas/tests/test_common.py
+++ b/pandas/tests/test_common.py
@@ -1,20 +1,19 @@
 from datetime import datetime
-import sys
 import re
 
 import nose
 import unittest
 
-from pandas import Series, DataFrame, date_range, DatetimeIndex
+from pandas import Series, DataFrame, date_range, DatetimeIndex, Panel
 from pandas.core.common import notnull, isnull
 import pandas.core.common as com
 import pandas.util.testing as tm
 import pandas.core.config as cf
 
 import numpy as np
+from numpy.random import randn
 
 from pandas.tslib import iNaT
-from pandas.util import py3compat
 
 _multiprocess_can_split_ = True
 
@@ -33,6 +32,7 @@ def __getitem__(self):
 
     assert(not is_seq(A()))
 
+
 def test_notnull():
     assert notnull(1.)
     assert not notnull(None)
@@ -98,6 +98,61 @@ def test_isnull_lists():
     assert(not result.any())
 
 
+def test_is_string():
+    class MyString(str):
+        pass
+
+    class MyUnicode(unicode):
+        pass
+
+    strings = ('s', np.str_('a'), np.unicode_('unicode_string'),
+               MyString('a _string blah'), u'asdf', MyUnicode(u'asdf'))
+    not_strings = [], 1, {}, set(), np.array(['1']), np.array([u'1'])
+
+    for string in strings:
+        assert com.is_string(string), '{0} is not a string'.format(string)
+
+    for not_string in not_strings:
+        assert not com.is_string(not_string), ('{0} is a '
+                                               'string'.format(not_string))
+
+
+def test_is_frame():
+    df = DataFrame(randn(2, 1))
+    assert com.is_frame(df)
+    assert not com.is_frame('s')
+
+
+def test_is_series():
+    s = Series(randn(2))
+    assert com.is_series(s)
+    assert not com.is_series(s.values)
+
+
+def test_is_panel():
+    p = Panel(randn(2, 3, 4))
+    assert com.is_panel(p)
+    assert not com.is_panel(2)
+
+
+def test_is_pd_obj():
+    df = DataFrame(randn(2, 1))
+    s = Series(randn(2))
+    p = Panel(randn(2, 3, 4))
+    for obj in (df, s, p):
+        assert com.is_pd_obj(obj)
+        assert not com.is_pd_obj(obj.values)
+
+
+def test_is_ndframe():
+    df = DataFrame(randn(2, 1))
+    p = Panel(randn(2, 3, 4))
+    # should add series after @jreback's ndframe to series pr
+    for obj in (df, p):
+        assert com.is_ndframe(obj)
+        assert not com.is_ndframe(obj.values)
+
+
 def test_isnull_datetime():
     assert (not isnull(datetime.now()))
     assert notnull(datetime.now())
@@ -112,11 +167,13 @@ def test_isnull_datetime():
     assert(mask[0])
     assert(not mask[1:].any())
 
+
 def test_datetimeindex_from_empty_datetime64_array():
     for unit in [ 'ms', 'us', 'ns' ]:
         idx = DatetimeIndex(np.array([], dtype='datetime64[%s]' % unit))
         assert(len(idx) == 0)
 
+
 def test_any_none():
     assert(com._any_none(1, 2, 3, None))
     assert(not com._any_none(1, 2, 3, 4))
@@ -266,6 +323,7 @@ def test_ensure_int32():
     result = com._ensure_int32(values)
     assert(result.dtype == np.int32)
 
+
 def test_ensure_platform_int():
 
     # verify that when we create certain types of indices
@@ -748,6 +806,7 @@ def test_2d_datetime64(self):
         expected[:, [2, 4]] = datetime(2007, 1, 1)
         tm.assert_almost_equal(result, expected)
 
+
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
                    exit=False)
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 47bde4ecb32a7..e1b2950b5c8d3 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -17,7 +17,7 @@
 from urllib2 import urlopen
 from distutils.version import LooseVersion
 
-from numpy.random import randn
+from numpy.random import randn, rand
 import numpy as np
 
 from pandas.core.common import isnull, _is_sequence
@@ -45,6 +45,9 @@
 _RAISE_NETWORK_ERROR_DEFAULT = False
 
 
+def randbool(size=(), p=0.5):
+    return rand(*size) <= p
+
 def rands(n):
     choices = string.ascii_letters + string.digits
     return ''.join(random.choice(choices) for _ in xrange(n))

From 81bacd1d9a8dbec90cbdf3d92d45b3180d0eeee2 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Sat, 15 Jun 2013 21:58:28 -0400
Subject: [PATCH 03/48] BUG: prevent certain index types from joining with
 DatetimeIndex

---
 pandas/tseries/index.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
index 7fdb6d9d2603d..4c75ef66feb08 100644
--- a/pandas/tseries/index.py
+++ b/pandas/tseries/index.py
@@ -912,7 +912,8 @@ def join(self, other, how='left', level=None, return_indexers=False):
         See Index.join
         """
         if (not isinstance(other, DatetimeIndex) and len(other) > 0 and
-            other.inferred_type != 'mixed-integer'):
+            other.inferred_type not in ('floating', 'mixed-integer',
+                                        'mixed-integer-float', 'mixed')):
             try:
                 other = DatetimeIndex(other)
             except TypeError:

From e380271278cba82d669cd07312d4f37106a4c47d Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Sat, 15 Jun 2013 23:26:01 -0400
Subject: [PATCH 04/48] TST/ENH: add 2d bare numpy array and nan support

---
 pandas/computation/engines.py         | 60 +++++++++++++++++----------
 pandas/computation/ops.py             |  4 +-
 pandas/computation/tests/test_eval.py | 35 ++++++++++++----
 3 files changed, 70 insertions(+), 29 deletions(-)

diff --git a/pandas/computation/engines.py b/pandas/computation/engines.py
index 0eb9875b85549..5bb43efec3e15 100644
--- a/pandas/computation/engines.py
+++ b/pandas/computation/engines.py
@@ -1,4 +1,5 @@
 import abc
+import functools
 from functools import partial
 from itertools import izip
 
@@ -66,25 +67,42 @@ def _maybe_promote_shape(values, naxes):
     return values[tuple(axes_slice)]
 
 
-def _align_core(terms):
-    # need to ensure that terms is not an iterator
-    terms = list(terms)
+def _any_pandas_objects(terms):
+    """Check a sequence of terms for instances of PandasObject."""
+    return any(com.is_pd_obj(term) for term in terms)
+
+
+def _filter_special_cases(f):
+    @functools.wraps(f)
+    def wrapper(terms):
+        # need to ensure that terms is not an iterator
+        terms = list(terms)
+
+        ## special cases
 
-    ## special cases
+        # single unary operand
+        if len(terms) == 1:
+            return _align_core_single_unary_op(terms[0])
 
-    # single unary operand
-    if len(terms) == 1:
-        return _align_core_single_unary_op(terms[0])
-    # only scalars
-    elif all(np.isscalar(term) for term in terms):
-        return np.result_type(*terms), terms, None
+        # only scalars
+        elif all(np.isscalar(term) for term in terms):
+            return np.result_type(*terms), terms, None
 
-    # single dim ndarrays
-    all_has_size = all(hasattr(term, 'size') for term in terms)
-    if (all_has_size and all(term.size == 1 for term in terms)):
-        return np.result_type(*terms), terms, None
+        # single element ndarrays
+        all_has_size = all(hasattr(term, 'size') for term in terms)
+        if (all_has_size and all(term.size == 1 for term in terms)):
+            return np.result_type(*terms), terms, None
 
-    # made it past the special cases
+        # no pandas so just punt to the evaluator
+        if not _any_pandas_objects(terms):
+            return np.result_type(*terms), terms, None
+
+        return f(terms)
+    return wrapper
+
+
+@_filter_special_cases
+def _align_core(terms):
     term_index = [i for i, term in enumerate(terms) if hasattr(term, 'axes')]
     term_dims = [terms[i].ndim for i in term_index]
     ndims = pd.Series(dict(zip(term_index, term_dims)))
@@ -145,8 +163,8 @@ def _filter_terms(flat):
 
     # literals are not names and names are not literals, by definition
     if literals_set & names_set:
-        raise AssertionError('literals cannot be names and names cannot be '
-                             'literals')
+        raise ValueError('literals cannot be names and names cannot be '
+                         'literals')
     return names, literals
 
 
@@ -154,10 +172,10 @@ def _align(terms, env):
     # flatten the parse tree (a nested list)
     flat = list(flatten(terms))
 
+    # separate names and literals
     names, literals = _filter_terms(flat)
 
-    # given an expression consisting of literals
-    if not names:
+    if not names:  # only literals so just promote to a common type
         return np.result_type(*literals).type, None
 
     # get the variables out
@@ -165,13 +183,13 @@ def _align(terms, env):
     resolved = map(resolve_in_env, names)
 
     # if all resolved variables are numeric scalars
-    if all(map(np.isscalar, resolved)):
+    if all(np.isscalar(rsv) for rsv in resolved):
         return np.result_type(*resolved).type, None
 
     # perform the main alignment
     typ, resolved, axes = _align_core(resolved)
 
-    # put them back in the symbol table
+    # put the aligned arrays back in the table
     _update_names(env, dict(izip(names, resolved)))
 
     # we need this to reconstruct things after evaluation since we CANNOT
diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py
index fb1965f45c52b..f79acc412023a 100644
--- a/pandas/computation/ops.py
+++ b/pandas/computation/ops.py
@@ -123,7 +123,7 @@ def __call__(self, env):
         except TypeError:
             left = self.lhs
 
-        # recursve over the right nodes
+        # recurse over the right nodes
         try:
             right = self.rhs(env)
         except TypeError:
@@ -166,6 +166,8 @@ def __init__(self, op, operand):
 
     def __call__(self, env):
         operand = self.operand
+
+        # recurse if operand is an Op
         try:
             operand = self.operand(env)
         except TypeError:
diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py
index 2d7bf4392cfea..cb52025e45df1 100644
--- a/pandas/computation/tests/test_eval.py
+++ b/pandas/computation/tests/test_eval.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python
 
+import unittest
 import itertools
 from itertools import product
 
@@ -7,12 +8,13 @@
 from nose.tools import assert_raises, assert_tuple_equal, assert_equal
 from nose.tools import assert_true
 
-from numpy.random import randn
+from numpy.random import randn, rand
 import numpy as np
 from numpy.testing import assert_array_equal
 from numpy.testing.decorators import slow
 
 import pandas as pd
+from pandas.core import common as com
 from pandas import DataFrame, Series
 from pandas.util.testing import makeCustomDataframe as mkdf
 from pandas.computation.engines import (_engines, _align_core,
@@ -85,8 +87,14 @@ def _eval_bin_and_unary(unary, lhs, arith1, rhs):
     return unop(binop(lhs, rhs))
 
 
+def _series_and_2d_ndarray(lhs, rhs):
+    return (com.is_series(lhs) and isinstance(rhs, np.ndarray) and rhs.ndim > 1
+            or com.is_series(rhs) and isinstance(lhs, np.ndarray) and lhs.ndim
+            > 1)
+
+
 # Smoke testing
-class TestBasicEval(object):
+class TestBasicEval(unittest.TestCase):
 
     @classmethod
     def setUpClass(self):
@@ -100,10 +108,14 @@ def set_current_engine(self):
         self.engine = 'numexpr'
 
     def setup_data(self):
+        nan_df = DataFrame(rand(10, 5))
+        nan_df[nan_df > 0.5] = np.nan
         self.lhses = (DataFrame(randn(10, 5)), Series(randn(5)), randn(),
-                      np.float64(randn()))
+                      np.float64(randn()), randn(10, 5), randn(5), np.nan,
+                      Series([1, 2, np.nan, np.nan, 5]), nan_df)
         self.rhses = (DataFrame(randn(10, 5)), Series(randn(5)), randn(),
-                      np.float64(randn()))
+                      np.float64(randn()), randn(10, 5), randn(5), np.nan,
+                      Series([1, 2, np.nan, np.nan, 5]), nan_df)
 
     def setUp(self):
         try:
@@ -163,9 +175,14 @@ def _create_cmp_op_t(self, lhs, cmp1, rhs, binop, cmp2):
         ex = '(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)'.format(cmp1=cmp1,
                                                                 binop=binop,
                                                                 cmp2=cmp2)
-        expected = _eval_from_expr(lhs, cmp1, rhs, binop, cmp2)
-        result = pd.eval(ex, engine=self.engine)
-        assert_array_equal(result, expected)
+        if _series_and_2d_ndarray(lhs, rhs):
+            self.assertRaises(Exception, _eval_from_expr, lhs, cmp1, rhs,
+                              binop, cmp2)
+            self.assertRaises(Exception, pd.eval, ex, engine=self.engine)
+        else:
+            expected = _eval_from_expr(lhs, cmp1, rhs, binop, cmp2)
+            result = pd.eval(ex, engine=self.engine)
+            assert_array_equal(result, expected)
 
     def _create_simple_cmp_op_t(self, lhs, rhs, cmp1):
         ex = 'lhs {0} rhs'.format(cmp1)
@@ -534,6 +551,10 @@ def test_datetime_index_rows_punts_to_python():
         check_datetime_index_rows_punts_to_python(engine)
 
 
+def check_truediv(engine):
+    s = randn(10)
+
+
 __var_s = randn(10)
 
 

From 99a3d280d86d6b6141086eef2fde29d979b9dc4f Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Mon, 17 Jun 2013 07:37:46 -0400
Subject: [PATCH 05/48] ENH: add modulus support

---
 pandas/computation/eval.py |  2 +-
 pandas/computation/expr.py | 13 +++++++------
 pandas/computation/ops.py  | 22 ++++++++++++++++++++--
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/pandas/computation/eval.py b/pandas/computation/eval.py
index 21348f221bc99..64345e8d3a143 100644
--- a/pandas/computation/eval.py
+++ b/pandas/computation/eval.py
@@ -57,7 +57,7 @@ def eval(expr, engine='numexpr', truediv=True, local_dict=None,
         engine = _maybe_convert_engine(env, engine)
 
         # parse the expression
-        parsed_expr = Expr(expr, engine, truediv)
+        parsed_expr = Expr(expr, engine, env, truediv)
 
         # choose the engine
         eng = _engines[engine]
diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py
index 105c0a020a2ad..f6d4ca39788ab 100644
--- a/pandas/computation/expr.py
+++ b/pandas/computation/expr.py
@@ -1,7 +1,7 @@
 import ast
 from functools import partial
 
-from pandas.computation.ops import BinOp, UnaryOp, _reductions, _mathops
+from pandas.computation.ops import BinOp, UnaryOp, _reductions, _mathops, Mod
 from pandas.computation.ops import _cmp_ops_syms, _bool_ops_syms
 from pandas.computation.ops import _arith_ops_syms, _unary_ops_syms
 
@@ -15,14 +15,14 @@ class ExprVisitor(ast.NodeVisitor):
     """
     bin_ops = _cmp_ops_syms + _bool_ops_syms + _arith_ops_syms
     bin_op_nodes = ('Gt', 'Lt', 'GtE', 'LtE', 'Eq', 'NotEq', 'BitAnd', 'BitOr',
-                    'Add', 'Sub', 'Mult', 'Div', 'Pow', 'FloorDiv')
+                    'Add', 'Sub', 'Mult', 'Div', 'Pow', 'FloorDiv', 'Mod')
     bin_op_nodes_map = dict(zip(bin_ops, bin_op_nodes))
 
     unary_ops = _unary_ops_syms
     unary_op_nodes = 'UAdd', 'USub', 'Invert'
     unary_op_nodes_map = dict(zip(unary_ops, unary_op_nodes))
 
-    def __init__(self):
+    def __init__(self, env):
         for bin_op in self.bin_ops:
             setattr(self, 'visit_{0}'.format(self.bin_op_nodes_map[bin_op]),
                     lambda node, bin_op=bin_op: partial(BinOp, bin_op))
@@ -31,6 +31,7 @@ def __init__(self):
             setattr(self,
                     'visit_{0}'.format(self.unary_op_nodes_map[unary_op]),
                     lambda node, unary_op=unary_op: partial(UnaryOp, unary_op))
+        self.env = env
 
     def visit(self, node):
         if not (isinstance(node, ast.AST) or isinstance(node, basestring)):
@@ -91,15 +92,15 @@ def visit_Attribute(self, node):
         raise NotImplementedError("attribute access is not yet supported")
 
     def visit_Mod(self, node):
-        raise NotImplementedError("modulo operator not yet supported")
+        return partial(Mod, env=self.env)
 
 
 class Expr(object):
     """Expr object for pandas
     """
-    def __init__(self, expr, engine, truediv):
+    def __init__(self, expr, engine, env, truediv):
         self.expr = expr
-        self._visitor = ExprVisitor()
+        self._visitor = ExprVisitor(env)
         self.terms = self.parse()
         self.engine = engine
         self.truediv = truediv
diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py
index f79acc412023a..f81844d787a5a 100644
--- a/pandas/computation/ops.py
+++ b/pandas/computation/ops.py
@@ -1,6 +1,7 @@
 import operator as op
 from functools import partial
 
+import numpy as np
 from pandas.util.py3compat import PY3
 
 
@@ -74,9 +75,9 @@ def name(self):
 _bool_ops_funcs = op.and_, op.or_
 _bool_ops_dict = dict(zip(_bool_ops_syms, _bool_ops_funcs))
 
-_arith_ops_syms = '+', '-', '*', '/', '**', '//'
+_arith_ops_syms = '+', '-', '*', '/', '**', '//', '%'
 _arith_ops_funcs = (op.add, op.sub, op.mul, op.truediv if PY3 else op.div,
-                    op.pow, op.floordiv)
+                    op.pow, op.floordiv, op.mod)
 _arith_ops_dict = dict(zip(_arith_ops_syms, _arith_ops_funcs))
 
 _binary_ops_dict = {}
@@ -85,6 +86,17 @@ def name(self):
     _binary_ops_dict.update(d)
 
 
+def _cast(terms, env, dtype):
+    resolver = partial(_resolve_name, env)
+    updater = partial(_update_name, env)
+    for term in terms:
+        t = resolver(term)
+        try:
+            new_value = t.astype(dtype)
+        except AttributeError:
+            new_value = dtype.type(t)
+        updater(term, t)
+
 class BinOp(Op):
     """Hold a binary operator and its operands
 
@@ -145,6 +157,12 @@ def __call__(self, env):
         return res
 
 
+class Mod(BinOp):
+    def __init__(self, lhs, rhs, env=None):
+        super(Mod, self).__init__('%', lhs, rhs)
+        _cast(env, (lhs, rhs), np.float_)
+
+
 _unary_ops_syms = '+', '-', '~'
 _unary_ops_funcs = op.pos, op.neg, op.invert
 _unary_ops_dict = dict(zip(_unary_ops_syms, _unary_ops_funcs))

From 4db95fe90b529e2f25294acfad0408cdfe60f8ec Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Mon, 17 Jun 2013 08:02:44 -0400
Subject: [PATCH 06/48] TST: add failing modulus tests

---
 pandas/computation/tests/test_eval.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py
index cb52025e45df1..4e062d6a4e99b 100644
--- a/pandas/computation/tests/test_eval.py
+++ b/pandas/computation/tests/test_eval.py
@@ -10,7 +10,7 @@
 
 from numpy.random import randn, rand
 import numpy as np
-from numpy.testing import assert_array_equal
+from numpy.testing import assert_array_equal, assert_allclose
 from numpy.testing.decorators import slow
 
 import pandas as pd
@@ -212,7 +212,11 @@ def _create_arith_op_t(self, lhs, arith1, rhs):
             if arith1 != '//':
                 expected = _eval_single_bin(lhs, arith1, rhs,
                                             engine_has_neg_frac(self.engine))
-                assert_array_equal(result, expected)
+                # roundoff error with modulus
+                if arith1 == '%':
+                    assert_allclose(result, expected)
+                else:
+                    assert_array_equal(result, expected)
 
             # sanity check on recursive parsing
             try:
@@ -243,7 +247,12 @@ def _create_arith_op_t(self, lhs, arith1, rhs):
                     pass
                 if arith1 != '//':
                     expected = self.ne.evaluate('nlhs {0} ghs'.format(arith1))
-                    assert_array_equal(result, expected)
+
+                    # roundoff error with modulus
+                    if arith1 == '%':
+                        assert_allclose(result, expected)
+                    else:
+                        assert_array_equal(result, expected)
 
     def _create_invert_op_t(self, lhs, cmp1, rhs):
         # simple
@@ -551,6 +560,11 @@ def test_datetime_index_rows_punts_to_python():
         check_datetime_index_rows_punts_to_python(engine)
 
 
+def test_truediv():
+    for engine in _engines:
+        check_truediv(engine)
+
+
 def check_truediv(engine):
     s = randn(10)
 

From 6000c89fe9af20c974ef9b5ff19ea13c4f49178a Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Mon, 17 Jun 2013 20:34:24 -0400
Subject: [PATCH 07/48] CLN: use format string for unicode

---
 pandas/computation/engines.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/computation/engines.py b/pandas/computation/engines.py
index 5bb43efec3e15..11843ffef1705 100644
--- a/pandas/computation/engines.py
+++ b/pandas/computation/engines.py
@@ -275,7 +275,7 @@ def __init__(self, expr):
 
     def convert(self):
         """Return a string"""
-        return str(self.expr)
+        return '%s' % self.expr
 
     def _evaluate(self, env):
         import numexpr as ne

From c25a1d4b0853578183e75d341aaab051941bdce7 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Mon, 17 Jun 2013 20:35:45 -0400
Subject: [PATCH 08/48] CLN: remove engine detection and manip for datetimes

---
 pandas/computation/eval.py            | 34 ++-----------------
 pandas/computation/expressions.py     | 48 +++++++++++++--------------
 pandas/computation/ops.py             | 35 ++++++++++++++++---
 pandas/computation/tests/test_eval.py | 23 +++++++++----
 4 files changed, 74 insertions(+), 66 deletions(-)

diff --git a/pandas/computation/eval.py b/pandas/computation/eval.py
index 64345e8d3a143..298554005d6ed 100644
--- a/pandas/computation/eval.py
+++ b/pandas/computation/eval.py
@@ -2,40 +2,13 @@
 
 import sys
 import numbers
-import collections
-import itertools
 
 import numpy as np
 
-Scope = collections.namedtuple('Scope', 'globals locals')
-
-import pandas.core.common as com
-from pandas.computation.expr import Expr
+from pandas.computation.expr import Expr, Scope
 from pandas.computation.engines import _engines
 
 
-def _scope_has_series_and_frame_datetime_index(env):
-    from pandas import DatetimeIndex
-    series_index = frame_index = 0
-
-    for v in itertools.chain(env.locals.itervalues(),
-                             env.globals.itervalues()):
-        series_index += com.is_series(v) and isinstance(v.index, DatetimeIndex)
-        frame_index += com.is_frame(v) and isinstance(v.index, DatetimeIndex)
-    return series_index, frame_index
-
-
-def _maybe_convert_engine(env, engine):
-    assert isinstance(env, Scope), 'environment must be an instance of Scope'
-    assert isinstance(engine, basestring), 'engine name must be a string'
-
-    ret = engine
-
-    if all(_scope_has_series_and_frame_datetime_index(env)):
-        ret = 'python'
-    return ret
-
-
 def eval(expr, engine='numexpr', truediv=True, local_dict=None,
          global_dict=None):
     # make sure we're passed a valid engine
@@ -44,7 +17,8 @@ def eval(expr, engine='numexpr', truediv=True, local_dict=None,
                        ' {1}'.format(_engines.keys()))
 
     # 1 up in the call stack for locals/globals; see the documentation for the
-    # inspect module for why you must decrease the refcount of frame
+    # inspect module for why you must decrease the refcount of frame at all
+    # costs
     frame = sys._getframe(1)
 
     try:
@@ -54,8 +28,6 @@ def eval(expr, engine='numexpr', truediv=True, local_dict=None,
         # shallow copy the scope so we don't overwrite everything
         env = Scope(gbl.copy(), lcl.copy())
 
-        engine = _maybe_convert_engine(env, engine)
-
         # parse the expression
         parsed_expr = Expr(expr, engine, env, truediv)
 
diff --git a/pandas/computation/expressions.py b/pandas/computation/expressions.py
index e1551f9b0548e..0c13a50d15618 100644
--- a/pandas/computation/expressions.py
+++ b/pandas/computation/expressions.py
@@ -7,6 +7,7 @@
 """
 
 import numpy as np
+import pandas.core.common as com
 
 try:
     import numexpr as ne
@@ -46,13 +47,10 @@ def set_use_numexpr(v=True):
 def set_numexpr_threads(n=None):
     # if we are using numexpr, set the threads to n
     # otherwise reset
-    try:
-        if _NUMEXPR_INSTALLED and _USE_NUMEXPR:
-            if n is None:
-                n = ne.detect_number_of_cores()
-            ne.set_num_threads(n)
-    except:
-        pass
+    if _NUMEXPR_INSTALLED and _USE_NUMEXPR:
+        if n is None:
+            n = ne.detect_number_of_cores()
+        ne.set_num_threads(n)
 
 
 def _evaluate_standard(op, op_str, a, b, raise_on_error=True, **eval_kwargs):
@@ -84,7 +82,8 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check):
 
     return False
 
-def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False, **eval_kwargs):
+
+def _evaluate_numexpr(op, op_str, a, b, raise_on_error=False, **eval_kwargs):
     result = None
 
     if _can_use_numexpr(op, op_str, a, b, 'evaluate'):
@@ -94,15 +93,13 @@ def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False, **eval_kwargs):
                 a_value = a_value.values
             if hasattr(b_value, 'values'):
                 b_value = b_value.values
-            result = ne.evaluate('a_value %s b_value' % op_str, 
-                                 local_dict={ 'a_value' : a_value, 
-                                              'b_value' : b_value }, 
+            result = ne.evaluate('a_value %s b_value' % op_str,
+                                 local_dict={'a_value': a_value,
+                                             'b_value': b_value},
                                  casting='safe', **eval_kwargs)
-        except (ValueError), detail:
-            if 'unknown type object' in str(detail):
-                pass
-        except (Exception), detail:
-            if raise_on_error:
+        except Exception as detail:
+            if ('unknown type object' not in com.pprint_thing(detail) and
+                raise_on_error):
                 raise
 
     if result is None:
@@ -128,17 +125,15 @@ def _where_numexpr(cond, a, b, raise_on_error=False):
                 a_value = a_value.values
             if hasattr(b_value, 'values'):
                 b_value = b_value.values
-            result = ne.evaluate('where(cond_value,a_value,b_value)',
+            result = ne.evaluate('where(cond_value, a_value, b_value)',
                                  local_dict={'cond_value': cond_value,
                                              'a_value': a_value,
                                              'b_value': b_value},
                                  casting='safe')
-        except (ValueError), detail:
-            if 'unknown type object' in str(detail):
-                pass
-        except (Exception), detail:
-            if raise_on_error:
-                raise TypeError(str(detail))
+        except Exception as detail:
+            if ('unknown type object' not in com.pprint_thing(detail) and
+                raise_on_error):
+                raise
 
     if result is None:
         result = _where_standard(cond, a, b, raise_on_error)
@@ -149,7 +144,9 @@ def _where_numexpr(cond, a, b, raise_on_error=False):
 # turn myself on
 set_use_numexpr(True)
 
-def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True, **eval_kwargs):
+
+def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True,
+             **eval_kwargs):
     """ evaluate and return the expression of the op on a and b
 
         Parameters
@@ -166,7 +163,8 @@ def evaluate(op, op_str, a, b, raise_on_error=False, use_numexpr=True, **eval_kw
         """
 
     if use_numexpr:
-        return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error, **eval_kwargs)
+        return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error,
+                         **eval_kwargs)
     return _evaluate_standard(op, op_str, a, b, raise_on_error=raise_on_error)
 
 
diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py
index f81844d787a5a..1a6d3fd1672ba 100644
--- a/pandas/computation/ops.py
+++ b/pandas/computation/ops.py
@@ -52,6 +52,32 @@ def _update_names(env, mapping):
         updater(key, value)
 
 
+class Term(object):
+    def __init__(self, value, name, env):
+        self.value = value
+        self.name = name
+        self.env = env
+        self.type = type(value)
+
+    def __iter__(self):
+        yield self.value
+        raise StopIteration
+
+    def __str__(self):
+        return '{0}({1!r})'.format(self.__class__.__name__, self.name)
+
+    __repr__ = __str__
+
+    def update(self, env, value):
+        _update_name(self.env, self.name, value)
+        self.value = value
+
+
+class Constant(Term):
+    def __init__(self, value, env):
+        super(Constant, self).__init__(value, value, env)
+
+
 class Op(object):
     """Hold an operator of unknown arity
     """
@@ -89,13 +115,14 @@ def name(self):
 def _cast(terms, env, dtype):
     resolver = partial(_resolve_name, env)
     updater = partial(_update_name, env)
+    dt = np.dtype(dtype)
     for term in terms:
         t = resolver(term)
         try:
-            new_value = t.astype(dtype)
+            new_value = t.astype(dt)
         except AttributeError:
-            new_value = dtype.type(t)
-        updater(term, t)
+            new_value = dt.type(t)
+        updater(term, new_value)
 
 class BinOp(Op):
     """Hold a binary operator and its operands
@@ -160,7 +187,7 @@ def __call__(self, env):
 class Mod(BinOp):
     def __init__(self, lhs, rhs, env=None):
         super(Mod, self).__init__('%', lhs, rhs)
-        _cast(env, (lhs, rhs), np.float_)
+        _cast((lhs, rhs), env, np.float_)
 
 
 _unary_ops_syms = '+', '-', '~'
diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py
index 4e062d6a4e99b..417fb106f90fa 100644
--- a/pandas/computation/tests/test_eval.py
+++ b/pandas/computation/tests/test_eval.py
@@ -5,8 +5,8 @@
 from itertools import product
 
 import nose
-from nose.tools import assert_raises, assert_tuple_equal, assert_equal
-from nose.tools import assert_true
+from nose.tools import assert_raises, assert_tuple_equal
+from nose.tools import assert_true, assert_false
 
 from numpy.random import randn, rand
 import numpy as np
@@ -23,8 +23,6 @@
 import pandas.computation.expr as expr
 from pandas.computation.expressions import _USE_NUMEXPR
 from pandas.computation.eval import Scope
-from pandas.computation.eval import _scope_has_series_and_frame_datetime_index
-from pandas.computation.eval import _maybe_convert_engine
 from pandas.util.testing import assert_frame_equal, randbool
 
 
@@ -551,8 +549,6 @@ def check_datetime_index_rows_punts_to_python(engine):
     index = getattr(df, 'index')
     s = Series(np.random.randn(5), index[:5])
     env = Scope(globals(), locals())
-    assert_true(_scope_has_series_and_frame_datetime_index(env))
-    assert_equal(_maybe_convert_engine(env, engine), 'python')
 
 
 def test_datetime_index_rows_punts_to_python():
@@ -582,6 +578,21 @@ def test_global_scope():
         yield check_global_scope, engine
 
 
+def check_is_expr(engine):
+    s = 1
+    valid = 's + 1'
+    invalid = 's +'
+    assert_true(expr.isexpr(valid, check_names=True))
+    assert_false(expr.isexpr(valid, check_names=False))
+    assert_false(expr.isexpr(invalid, check_names=False))
+    assert_false(expr.isexpr(invalid, check_names=True))
+
+
+def test_is_expr():
+    for engine in _engines:
+        check_is_expr(engine)
+
+
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
                    exit=False)

From 1132bc40843626fd2eb1afbb6755490a79924337 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Wed, 19 Jun 2013 21:40:11 -0400
Subject: [PATCH 09/48] CLN/ENH: add new interface to encapsulate Terms and
 Constants

---
 pandas/computation/engines.py | 100 +++++++++++++++-------------------
 pandas/computation/expr.py    |  38 ++++++++++---
 pandas/computation/ops.py     |  72 +++++++++++++-----------
 3 files changed, 112 insertions(+), 98 deletions(-)

diff --git a/pandas/computation/engines.py b/pandas/computation/engines.py
index 11843ffef1705..4ebb4a15fdee4 100644
--- a/pandas/computation/engines.py
+++ b/pandas/computation/engines.py
@@ -1,27 +1,26 @@
 import abc
-import functools
-from functools import partial
+from functools import partial, wraps
 from itertools import izip
 
 import numpy as np
 
 import pandas as pd
 import pandas.core.common as com
-from pandas.computation.ops import _resolve_name, _update_names
+from pandas.computation.ops import is_const
 from pandas.computation.common import flatten
 
 
 def _align_core_single_unary_op(term):
-    if isinstance(term, np.ndarray) and not com.is_series(term):
-        typ = np.asanyarray
+    if isinstance(term.value, np.ndarray) and not com.is_series(term.value):
+        typ = partial(np.asanyarray, dtype=term.value.dtype)
     else:
-        typ = type(term)
-    ret = typ, [term]
+        typ = type(term.value)
+    ret = typ,
 
-    if not hasattr(term, 'axes'):
+    if not hasattr(term.value, 'axes'):
         ret += None,
     else:
-        ret += _zip_axes_from_type(typ, term.axes),
+        ret += _zip_axes_from_type(typ, term.value.axes),
     return ret
 
 
@@ -69,33 +68,28 @@ def _maybe_promote_shape(values, naxes):
 
 def _any_pandas_objects(terms):
     """Check a sequence of terms for instances of PandasObject."""
-    return any(com.is_pd_obj(term) for term in terms)
+    return any(com.is_pd_obj(term.value) for term in terms)
 
 
 def _filter_special_cases(f):
-    @functools.wraps(f)
+    @wraps(f)
     def wrapper(terms):
-        # need to ensure that terms is not an iterator
-        terms = list(terms)
-
-        ## special cases
-
         # single unary operand
         if len(terms) == 1:
             return _align_core_single_unary_op(terms[0])
 
         # only scalars
-        elif all(np.isscalar(term) for term in terms):
-            return np.result_type(*terms), terms, None
+        elif all(term.isscalar for term in terms):
+            return np.result_type(*(term.value for term in terms)), None
 
         # single element ndarrays
-        all_has_size = all(hasattr(term, 'size') for term in terms)
-        if (all_has_size and all(term.size == 1 for term in terms)):
-            return np.result_type(*terms), terms, None
+        all_has_size = all(hasattr(term.value, 'size') for term in terms)
+        if (all_has_size and all(term.value.size == 1 for term in terms)):
+            return np.result_type(*(term.value for term in terms)), None
 
         # no pandas so just punt to the evaluator
         if not _any_pandas_objects(terms):
-            return np.result_type(*terms), terms, None
+            return np.result_type(*(term.value for term in terms)), None
 
         return f(terms)
     return wrapper
@@ -103,27 +97,28 @@ def wrapper(terms):
 
 @_filter_special_cases
 def _align_core(terms):
-    term_index = [i for i, term in enumerate(terms) if hasattr(term, 'axes')]
-    term_dims = [terms[i].ndim for i in term_index]
+    term_index = [i for i, term in enumerate(terms) if hasattr(term.value,
+                                                               'axes')]
+    term_dims = [terms[i].value.ndim for i in term_index]
     ndims = pd.Series(dict(zip(term_index, term_dims)))
 
     # initial axes are the axes of the largest-axis'd term
-    biggest = terms[ndims.idxmax()]
+    biggest = terms[ndims.idxmax()].value
     typ = biggest._constructor
     axes = biggest.axes
     naxes = len(axes)
 
     for i in term_index:
-        for axis, items in enumerate(terms[i].axes):
-            if com.is_series(terms[i]) and naxes > 1:
-                axes[naxes - 1] = axes[naxes - 1].join(terms[i].index,
+        for axis, items in enumerate(terms[i].value.axes):
+            if com.is_series(terms[i].value) and naxes > 1:
+                axes[naxes - 1] = axes[naxes - 1].join(terms[i].value.index,
                                                        how='outer')
             else:
                 axes[axis] = axes[axis].join(items, how='outer')
 
     for i, ndim in ndims.iteritems():
         for axis, items in izip(xrange(ndim), axes):
-            ti = terms[i]  # needed here because we modify it in the inner loop
+            ti = terms[i].value  # needed here because we modify it in the inner loop
 
             if hasattr(ti, 'reindex_axis'):
                 transpose = com.is_series(ti) and naxes > 1
@@ -138,31 +133,31 @@ def _align_core(terms):
                 else:
                     r = f()
 
-                terms[i] = r
+                terms[i].update(r)
 
-        res = _maybe_promote_shape(terms[i].T if transpose else terms[i],
-                                   naxes)
+        res = _maybe_promote_shape(terms[i].value.T if transpose else
+                                   terms[i].value, naxes)
         res = res.T if transpose else res
 
         try:
-            terms[i] = res.values
+            v = res.values
         except AttributeError:
-            terms[i] = res
+            v = res
+        terms[i].update(v)
 
-    return typ, terms, _zip_axes_from_type(typ, axes)
+    return typ, _zip_axes_from_type(typ, axes)
 
 
 def _filter_terms(flat):
     # numeric literals
-    literals = filter(lambda string: not com.is_string(string), flat)
-    literals_set = set(literals)
+    literals = set(filter(is_const, flat))
 
     # these are strings which are variable names
-    names = filter(com.is_string, flat)
-    names_set = set(names)
+    names = set(flat) - literals
 
-    # literals are not names and names are not literals, by definition
-    if literals_set & names_set:
+    # literals are not names and names are not literals, so intersection should
+    # be empty
+    if literals & names:
         raise ValueError('literals cannot be names and names cannot be '
                          'literals')
     return names, literals
@@ -170,30 +165,20 @@ def _filter_terms(flat):
 
 def _align(terms, env):
     # flatten the parse tree (a nested list)
-    flat = list(flatten(terms))
+    terms = list(flatten(terms))
 
     # separate names and literals
-    names, literals = _filter_terms(flat)
+    names, literals = _filter_terms(terms)
 
     if not names:  # only literals so just promote to a common type
         return np.result_type(*literals).type, None
 
-    # get the variables out
-    resolve_in_env = partial(_resolve_name, env)
-    resolved = map(resolve_in_env, names)
-
     # if all resolved variables are numeric scalars
-    if all(np.isscalar(rsv) for rsv in resolved):
-        return np.result_type(*resolved).type, None
+    if all(term.isscalar for term in terms):
+        return np.result_type(*(term.value for term in terms)).type, None
 
     # perform the main alignment
-    typ, resolved, axes = _align_core(resolved)
-
-    # put the aligned arrays back in the table
-    _update_names(env, dict(izip(names, resolved)))
-
-    # we need this to reconstruct things after evaluation since we CANNOT
-    # depend on the array interface
+    typ, axes = _align_core(terms)
     return typ, axes
 
 
@@ -222,7 +207,8 @@ def _reconstruct_object(typ, obj, axes):
     except AttributeError:
         pass
 
-    if typ != np.asanyarray and issubclass(typ, pd.core.generic.PandasObject):
+    if (not isinstance(typ, partial) and
+        issubclass(typ, pd.core.generic.PandasObject)):
         return typ(obj, **axes)
 
     ret_value = typ(obj)
diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py
index f6d4ca39788ab..f0ed6b5de9ed6 100644
--- a/pandas/computation/expr.py
+++ b/pandas/computation/expr.py
@@ -1,9 +1,16 @@
 import ast
+import sys
 from functools import partial
+import collections
+
 
 from pandas.computation.ops import BinOp, UnaryOp, _reductions, _mathops, Mod
 from pandas.computation.ops import _cmp_ops_syms, _bool_ops_syms
 from pandas.computation.ops import _arith_ops_syms, _unary_ops_syms
+from pandas.computation.ops import _resolve_name, Term, Constant
+
+
+Scope = collections.namedtuple('Scope', 'globals locals')
 
 
 class ExprParserError(Exception):
@@ -65,10 +72,11 @@ def visit_UnaryOp(self, node):
         return op(self.visit(node.operand))
 
     def visit_Name(self, node):
-        return node.id
+        name = node.id
+        return Term(_resolve_name(self.env, name), name, self.env)
 
     def visit_Num(self, node):
-        return node.n
+        return Constant(node.n, self.env)
 
     def visit_Compare(self, node):
         ops = node.ops
@@ -92,19 +100,29 @@ def visit_Attribute(self, node):
         raise NotImplementedError("attribute access is not yet supported")
 
     def visit_Mod(self, node):
-        return partial(Mod, env=self.env)
+        return Mod
 
 
 class Expr(object):
     """Expr object for pandas
     """
-    def __init__(self, expr, engine, env, truediv):
+    def __init__(self, expr, engine='numexpr', env=None, truediv=True):
         self.expr = expr
-        self._visitor = ExprVisitor(env)
+        self.env = env or self._get_calling_scope()
+        self._visitor = ExprVisitor(self.env)
         self.terms = self.parse()
         self.engine = engine
         self.truediv = truediv
 
+    def _get_calling_scope(self):
+        frame = sys._getframe(1)
+        gbl, lcl = frame.f_globals, frame.f_locals
+
+        try:
+            return Scope(gbl, lcl)
+        finally:
+            del frame
+
     def __call__(self, env):
         env.locals['truediv'] = self.truediv
         return self.terms(env)
@@ -123,14 +141,16 @@ def parse(self):
             raise e
         return visited
 
-    def align(self, env):
+    def align(self):
         """align a set of Terms"""
-        return self.terms.align(env)
+        return self.terms.align(self.env)
 
 
-def isexpr(s):
+def isexpr(s, check_names=True):
     try:
-        Expr(s, engine=None)
+        Expr(s)
     except SyntaxError:
         return False
+    except NameError:
+        return not check_names
     return True
diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py
index 1a6d3fd1672ba..8c66fd0d122d5 100644
--- a/pandas/computation/ops.py
+++ b/pandas/computation/ops.py
@@ -3,6 +3,7 @@
 
 import numpy as np
 from pandas.util.py3compat import PY3
+import pandas.core.common as com
 
 
 _reductions = 'sum', 'prod'
@@ -59,19 +60,19 @@ def __init__(self, value, name, env):
         self.env = env
         self.type = type(value)
 
-    def __iter__(self):
-        yield self.value
-        raise StopIteration
-
     def __str__(self):
         return '{0}({1!r})'.format(self.__class__.__name__, self.name)
 
     __repr__ = __str__
 
-    def update(self, env, value):
+    def update(self, value):
         _update_name(self.env, self.name, value)
         self.value = value
 
+    @property
+    def isscalar(self):
+        return np.isscalar(self.value)
+
 
 class Constant(Term):
     def __init__(self, value, env):
@@ -112,17 +113,28 @@ def name(self):
     _binary_ops_dict.update(d)
 
 
-def _cast(terms, env, dtype):
-    resolver = partial(_resolve_name, env)
-    updater = partial(_update_name, env)
+def _cast(terms, dtype):
     dt = np.dtype(dtype)
     for term in terms:
-        t = resolver(term)
+        # cast all the way down the tree since operands must be
         try:
-            new_value = t.astype(dt)
+            _cast(term.operands, dtype)
         except AttributeError:
-            new_value = dt.type(t)
-        updater(term, new_value)
+            # we've bottomed out so cast
+            try:
+                new_value = term.value.astype(dt)
+            except AttributeError:
+                new_value = dt.type(term.value)
+            term.update(new_value)
+
+
+def is_term(obj):
+    return isinstance(obj, Term)
+
+
+def is_const(obj):
+    return isinstance(obj, Constant)
+
 
 class BinOp(Op):
     """Hold a binary operator and its operands
@@ -146,8 +158,9 @@ def __init__(self, op, lhs, rhs):
                                       ' operators are {1}'.format(op, keys))
 
     def __repr__(self):
-        return '{0}(op={1!r}, lhs={2!r}, rhs={3!r})'.format(self.name, self.op,
-                                                            self.lhs, self.rhs)
+        return com.pprint_thing('{0}(op={1!r}, lhs={2!r}, '
+                                'rhs={3!r})'.format(self.name, self.op,
+                                                    self.lhs, self.rhs))
 
     __str__ = __repr__
 
@@ -169,25 +182,22 @@ def __call__(self, env):
             right = self.rhs
 
         # base cases
-        if not (isinstance(left, basestring) or isinstance(right, basestring)):
+        if is_term(left) and is_term(right):
+            res = self.func(left.value, right.value)
+        elif not is_term(left) and is_term(right):
+            res = self.func(left, right.value)
+        elif is_term(left) and not is_term(right):
+            res = self.func(left.value, right)
+        elif not (is_term(left) or is_term(right)):
             res = self.func(left, right)
-        elif isinstance(left, basestring) and not isinstance(right,
-                                                             basestring):
-            res = self.func(_resolve_name(env, left), right)
-        elif not isinstance(left, basestring) and isinstance(right,
-                                                             basestring):
-            res = self.func(left, _resolve_name(env, right))
-        elif isinstance(left, basestring) and isinstance(right, basestring):
-            res = self.func(_resolve_name(env, left), _resolve_name(env,
-                                                                    right))
 
         return res
 
 
 class Mod(BinOp):
-    def __init__(self, lhs, rhs, env=None):
+    def __init__(self, lhs, rhs):
         super(Mod, self).__init__('%', lhs, rhs)
-        _cast((lhs, rhs), env, np.float_)
+        _cast(self.operands, np.float_)
 
 
 _unary_ops_syms = '+', '-', '~'
@@ -218,10 +228,7 @@ def __call__(self, env):
         except TypeError:
             operand = self.operand
 
-        if isinstance(operand, basestring):
-            v = _resolve_name(env, operand)
-        else:
-            v = operand
+        v = operand.value if is_term(operand) else operand
 
         try:
             res = self.func(v)
@@ -231,5 +238,6 @@ def __call__(self, env):
         return res
 
     def __repr__(self):
-        return '{0}(op={1!r}, operand={2!r})'.format(self.name, self.op,
-                                                     self.operand)
+        return com.pprint_thing('{0}(op={1!r}, '
+                                'operand={2!r})'.format(self.name, self.op,
+                                                        self.operand))

From 54f1897da4c119dec68b5f215981cd12787d5c77 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Wed, 19 Jun 2013 23:56:00 -0400
Subject: [PATCH 10/48] ENH: allow an already-parsed expression to be passed to
 eval

---
 pandas/computation/engines.py |  2 +-
 pandas/computation/eval.py    | 22 ++++++++++++++--------
 pandas/computation/expr.py    |  5 +++--
 3 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/pandas/computation/engines.py b/pandas/computation/engines.py
index 4ebb4a15fdee4..342bde7b2beeb 100644
--- a/pandas/computation/engines.py
+++ b/pandas/computation/engines.py
@@ -48,7 +48,7 @@ def _maybe_promote_shape(values, naxes):
 
     axes_slice = [slice(None)] * naxes
 
-    # symmetric difference
+    # symmetric difference of numaxes and ndims
     slices = nax - ndim
 
     if ndims == naxes:
diff --git a/pandas/computation/eval.py b/pandas/computation/eval.py
index 298554005d6ed..828ee334d71f9 100644
--- a/pandas/computation/eval.py
+++ b/pandas/computation/eval.py
@@ -22,14 +22,20 @@ def eval(expr, engine='numexpr', truediv=True, local_dict=None,
     frame = sys._getframe(1)
 
     try:
-        # get the globals and locals
-        gbl, lcl = global_dict or frame.f_globals, local_dict or frame.f_locals
-
-        # shallow copy the scope so we don't overwrite everything
-        env = Scope(gbl.copy(), lcl.copy())
-
-        # parse the expression
-        parsed_expr = Expr(expr, engine, env, truediv)
+        # parse the expression from a string
+        if isinstance(expr, basestring):
+            # get the globals and locals
+            gbl, lcl = (global_dict or frame.f_globals,
+                        local_dict or frame.f_locals)
+
+            # shallow copy the scope so we don't overwrite everything
+            env = Scope(gbl.copy(), lcl.copy())
+            parsed_expr = Expr(expr, engine, env, truediv)
+        elif isinstance(expr, Expr):
+            parsed_expr = expr
+        else:
+            raise TypeError("eval only accepts strings and Expr objects, you "
+                            "passed a {0!r}".format(expr.__class__.__name__))
 
         # choose the engine
         eng = _engines[engine]
diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py
index f0ed6b5de9ed6..63779da24394f 100644
--- a/pandas/computation/expr.py
+++ b/pandas/computation/expr.py
@@ -115,8 +115,9 @@ def __init__(self, expr, engine='numexpr', env=None, truediv=True):
         self.truediv = truediv
 
     def _get_calling_scope(self):
-        frame = sys._getframe(1)
-        gbl, lcl = frame.f_globals, frame.f_locals
+        # call this method **only** in the constructor
+        frame = sys._getframe(2)
+        gbl, lcl = frame.f_globals.copy(), frame.f_locals.copy()
 
         try:
             return Scope(gbl, lcl)

From e20900ac61f7c22f5b78e6388f519a01eb53c12e Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Tue, 25 Jun 2013 20:30:09 -0400
Subject: [PATCH 11/48] CLN: add automatic scope creating object

---
 pandas/computation/expr.py            | 31 +++++++++++++--------------
 pandas/computation/tests/test_eval.py | 27 ++++++++++++++++++-----
 2 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py
index 63779da24394f..987f694bf0904 100644
--- a/pandas/computation/expr.py
+++ b/pandas/computation/expr.py
@@ -1,7 +1,6 @@
 import ast
 import sys
 from functools import partial
-import collections
 
 
 from pandas.computation.ops import BinOp, UnaryOp, _reductions, _mathops, Mod
@@ -10,7 +9,17 @@
 from pandas.computation.ops import _resolve_name, Term, Constant
 
 
-Scope = collections.namedtuple('Scope', 'globals locals')
+class Scope(object):
+    __slots__ = 'globals', 'locals'
+
+    def __init__(self, gbls=None, lcls=None, frame_level=1):
+        frame = sys._getframe(frame_level)
+
+        try:
+            self.globals = gbls or frame.f_globals.copy()
+            self.locals = lcls or frame.f_locals.copy()
+        finally:
+            del frame
 
 
 class ExprParserError(Exception):
@@ -104,26 +113,15 @@ def visit_Mod(self, node):
 
 
 class Expr(object):
-    """Expr object for pandas
-    """
+    """Expr object"""
     def __init__(self, expr, engine='numexpr', env=None, truediv=True):
         self.expr = expr
-        self.env = env or self._get_calling_scope()
+        self.env = env or Scope(frame_level=2)
         self._visitor = ExprVisitor(self.env)
         self.terms = self.parse()
         self.engine = engine
         self.truediv = truediv
 
-    def _get_calling_scope(self):
-        # call this method **only** in the constructor
-        frame = sys._getframe(2)
-        gbl, lcl = frame.f_globals.copy(), frame.f_locals.copy()
-
-        try:
-            return Scope(gbl, lcl)
-        finally:
-            del frame
-
     def __call__(self, env):
         env.locals['truediv'] = self.truediv
         return self.terms(env)
@@ -154,4 +152,5 @@ def isexpr(s, check_names=True):
         return False
     except NameError:
         return not check_names
-    return True
+    else:
+        return True
diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py
index 417fb106f90fa..18fe641db5ed2 100644
--- a/pandas/computation/tests/test_eval.py
+++ b/pandas/computation/tests/test_eval.py
@@ -19,11 +19,12 @@
 from pandas.util.testing import makeCustomDataframe as mkdf
 from pandas.computation.engines import (_engines, _align_core,
                                         _reconstruct_object)
-from pandas.computation.ops import _binary_ops_dict, _unary_ops_dict
+from pandas.computation.ops import _binary_ops_dict, _unary_ops_dict, Term
 import pandas.computation.expr as expr
 from pandas.computation.expressions import _USE_NUMEXPR
 from pandas.computation.eval import Scope
 from pandas.util.testing import assert_frame_equal, randbool
+from pandas.util.py3compat import PY3
 
 
 def skip_numexpr_engine(engine):
@@ -48,7 +49,9 @@ def _eval_from_expr(lhs, cmp1, rhs, binop, cmp2):
     f1 = _binary_ops_dict[cmp1]
     f2 = _binary_ops_dict[cmp2]
     bf = _binary_ops_dict[binop]
-    typ, (lhs, rhs), axes = _align_core((lhs, rhs))
+    env = Scope()
+    typ, axes = _align_core((Term(lhs, 'lhs', env), Term(rhs, 'rhs', env)))
+    lhs, rhs = env.locals['lhs'], env.locals['rhs']
     return _reconstruct_object(typ, bf(f1(lhs, rhs), f2(lhs, rhs)), axes)
 
 
@@ -483,7 +486,7 @@ def check_series_frame_commutativity(engine, r_idx_type, c_idx_type, op,
                                                    df)
 
 
-INDEX_TYPES = 'i', 'f', 's', 'u', 'dt',  # 'p'
+INDEX_TYPES = 'i', 'f', 's', 'u', # 'dt',  # 'p'
 
 
 @slow
@@ -562,7 +565,21 @@ def test_truediv():
 
 
 def check_truediv(engine):
-    s = randn(10)
+    s = np.array([1])
+    ex = 's / 1'
+
+    if PY3:
+        res = pd.eval(ex, truediv=False)
+        assert_array_equal(res, np.array([1.0]))
+
+        res = pd.eval(ex, truediv=True)
+        assert_array_equal(res, np.array([1.0]))
+    else:
+        res = pd.eval(ex, truediv=False)
+        assert_array_equal(res, np.array([1]))
+
+        res = pd.eval(ex, truediv=True)
+        assert_array_equal(res, np.array([1.0]))
 
 
 __var_s = randn(10)
@@ -583,7 +600,7 @@ def check_is_expr(engine):
     valid = 's + 1'
     invalid = 's +'
     assert_true(expr.isexpr(valid, check_names=True))
-    assert_false(expr.isexpr(valid, check_names=False))
+    assert_true(expr.isexpr(valid, check_names=False))
     assert_false(expr.isexpr(invalid, check_names=False))
     assert_false(expr.isexpr(invalid, check_names=True))
 

From 51d80f6ca4febdcef4c11c65fa77b6861dae10bf Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Fri, 28 Jun 2013 16:15:37 -0400
Subject: [PATCH 12/48] CLN: make the environment an implementation detail

---
 pandas/computation/engines.py | 11 ++++---
 pandas/computation/eval.py    | 61 +++++++++++++++++++++--------------
 2 files changed, 42 insertions(+), 30 deletions(-)

diff --git a/pandas/computation/engines.py b/pandas/computation/engines.py
index 342bde7b2beeb..39155ad112847 100644
--- a/pandas/computation/engines.py
+++ b/pandas/computation/engines.py
@@ -235,11 +235,12 @@ def convert(self):
         """Convert an expression for evaluation."""
         pass
 
-    def evaluate(self, env):
+    def evaluate(self):
         if not self._is_aligned:
-            self.result_type, self.aligned_axes = _align(self.expr.terms, env)
+            self.result_type, self.aligned_axes = _align(self.expr.terms,
+                                                         self.expr.env)
 
-        res = self._evaluate(env)
+        res = self._evaluate(self.expr.env)
         return _reconstruct_object(self.result_type, res, self.aligned_axes)
 
     @property
@@ -284,8 +285,8 @@ def __init__(self, expr):
     def convert(self):
         pass
 
-    def evaluate(self, env):
-        return self.expr(env)
+    def evaluate(self):
+        return self.expr(self.expr.env)
 
     def _evaluate(self, env):
         pass
diff --git a/pandas/computation/eval.py b/pandas/computation/eval.py
index 828ee334d71f9..7788eddf96f87 100644
--- a/pandas/computation/eval.py
+++ b/pandas/computation/eval.py
@@ -11,39 +11,50 @@
 
 def eval(expr, engine='numexpr', truediv=True, local_dict=None,
          global_dict=None):
+    """Evaluate a Python expression as a string.
+
+    Parameters
+    ----------
+    expr : string or Expr object
+    engine : string, optional, default 'numexpr'
+        The engine to use to evaluate the passed expression
+    truediv : bool, optional, default True
+    local_dict : dict or None, optional, default None
+    global_dict : dict or None, optional, default None
+
+    Returns
+    -------
+    obj : ndarray, scalar, DataFrame, Series, or Panel
+    """
     # make sure we're passed a valid engine
     if not engine in _engines:
         raise KeyError('Invalid engine {0} passed, valid engines are'
                        ' {1}'.format(_engines.keys()))
 
-    # 1 up in the call stack for locals/globals; see the documentation for the
-    # inspect module for why you must decrease the refcount of frame at all
-    # costs
-    frame = sys._getframe(1)
+    eng = _engines[engine]
+
+    if isinstance(expr, basestring):
+        frame = sys._getframe(1)
 
-    try:
-        # parse the expression from a string
-        if isinstance(expr, basestring):
-            # get the globals and locals
-            gbl, lcl = (global_dict or frame.f_globals,
-                        local_dict or frame.f_locals)
+        # get the globals and locals
+        gbl, lcl = (global_dict or frame.f_globals,
+                    local_dict or frame.f_locals)
 
-            # shallow copy the scope so we don't overwrite everything
+        try:
+            # shallow copy the scope so we don't overwrite anything
             env = Scope(gbl.copy(), lcl.copy())
-            parsed_expr = Expr(expr, engine, env, truediv)
-        elif isinstance(expr, Expr):
-            parsed_expr = expr
-        else:
-            raise TypeError("eval only accepts strings and Expr objects, you "
-                            "passed a {0!r}".format(expr.__class__.__name__))
-
-        # choose the engine
-        eng = _engines[engine]
-
-        # construct the engine and evaluate
-        ret = eng(parsed_expr).evaluate(env)
-    finally:
-        del frame
+        finally:
+            del frame
+        parsed_expr = Expr(expr, engine, env, truediv)
+    elif isinstance(expr, Expr):
+        parsed_expr = expr
+    else:
+        raise TypeError("eval only accepts strings and Expr objects, you "
+                        "passed a {0!r}".format(expr.__class__.__name__))
+
+
+    # construct the engine and evaluate
+    ret = eng(parsed_expr).evaluate()
 
     # sanity check for a number
     if np.isscalar(ret):

From 038d79c25cf2c8968a176e37e8b6f2d14e44414a Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Fri, 28 Jun 2013 16:26:58 -0400
Subject: [PATCH 13/48] DOC: add docstring to eval

---
 pandas/computation/eval.py | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/pandas/computation/eval.py b/pandas/computation/eval.py
index 7788eddf96f87..38248c26f88e3 100644
--- a/pandas/computation/eval.py
+++ b/pandas/computation/eval.py
@@ -11,20 +11,44 @@
 
 def eval(expr, engine='numexpr', truediv=True, local_dict=None,
          global_dict=None):
-    """Evaluate a Python expression as a string.
+    """Evaluate a Python expression as a string using various backends.
+
+    The following arithmetic operations are supported: +, -, *, /, **, %, //
+    (python engine only) along with the following boolean operations: | (or), &
+    (and), and ~ (not). All Pandas objects are supported and behave as they
+    would with in-Python evaluation.
 
     Parameters
     ----------
     expr : string or Expr object
-    engine : string, optional, default 'numexpr'
-        The engine to use to evaluate the passed expression
+        The expression to evaluate. This can be either a string or an ``Expr``
+        object.
+    engine : string, optional, default 'numexpr', {'python', 'numexpr', 'pytables'}
+        The engine used to evaluate the expression. Supported engines are
+
+        - 'numexpr': This default engine evaluates pandas objects using numexpr
+                     for large speed ups in complex expressions with large
+                     frames.
+        - 'python': Performs operations as if you had eval'd in top level
+                    python
+        - 'pytables': Engine used for evaluating expressions for selection of
+                      objects from PyTables HDF5 tables.
+
     truediv : bool, optional, default True
+        Whether to use true division, like in Python >= 3
     local_dict : dict or None, optional, default None
+        A dictionary of local variables, taken from locals() by default.
     global_dict : dict or None, optional, default None
+        A dictionary of global variables, taken from globals() by default.
 
     Returns
     -------
     obj : ndarray, scalar, DataFrame, Series, or Panel
+
+    Notes
+    -----
+    The benefits of using ``eval`` are that very large frames that are terms in
+    long expressions are sped up, sometimes by as much as 10x.
     """
     # make sure we're passed a valid engine
     if not engine in _engines:

From 599cf32bdaaaf65e26478a6a9ae2e669f6ab7014 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Fri, 28 Jun 2013 17:09:42 -0400
Subject: [PATCH 14/48] CLN: cleanup pytables.py a bit

---
 pandas/io/pytables.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index fdb86c43b7160..a53907c518aab 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -219,7 +219,7 @@ def read_hdf(path_or_buf, key, **kwargs):
     # a passed store; user controls open/close
     f(path_or_buf, False)
 
-class HDFStore(StringMixin):
+class HDFStore(object):
     """
     dict-like IO interface for storing pandas objects in PyTables
     format.
@@ -421,7 +421,8 @@ def get(self, key):
             raise KeyError('No object named %s in the file' % key)
         return self._read_group(group)
 
-    def select(self, key, where=None, start=None, stop=None, columns=None, iterator=False, chunksize=None, auto_close=False, **kwargs):
+    def select(self, key, where=None, start=None, stop=None, columns=None,
+               iterator=False, chunksize=None, auto_close=False, **kwargs):
         """
         Retrieve pandas object stored in file, optionally based on where
         criteria
@@ -448,14 +449,18 @@ def select(self, key, where=None, start=None, stop=None, columns=None, iterator=
 
         # what we are actually going to do for a chunk
         def func(_start, _stop):
-            return s.read(where=where, start=_start, stop=_stop, columns=columns, **kwargs)
+            return s.read(where=where, start=_start, stop=_stop,
+                          columns=columns, **kwargs)
 
         if iterator or chunksize is not None:
             if not s.is_table:
                 raise TypeError("can only use an iterator or chunksize on a table")
-            return TableIterator(self, func, nrows=s.nrows, start=start, stop=stop, chunksize=chunksize, auto_close=auto_close)
+            return TableIterator(self, func, nrows=s.nrows, start=start,
+                                 stop=stop, chunksize=chunksize,
+                                 auto_close=auto_close)
 
-        return TableIterator(self, func, nrows=s.nrows, start=start, stop=stop, auto_close=auto_close).get_values()
+        return TableIterator(self, func, nrows=s.nrows, start=start, stop=stop,
+                             auto_close=auto_close).get_values()
 
     def select_as_coordinates(self, key, where=None, start=None, stop=None, **kwargs):
         """
@@ -1620,6 +1625,9 @@ def __unicode__(self):
             return "%-12.12s (shape->%s)" % (self.pandas_type,s)
         return self.pandas_type
 
+    def __str__(self):
+        return self.__repr__()
+
     def set_object_info(self):
         """ set my pandas type & version """
         self.attrs.pandas_type = self.pandas_kind

From ea769e664d32e413bd44fedd5849ab077e8812f3 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Sat, 29 Jun 2013 11:08:00 -0400
Subject: [PATCH 15/48] CLN: clean up engines

---
 pandas/computation/engines.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/pandas/computation/engines.py b/pandas/computation/engines.py
index 39155ad112847..64582192a9874 100644
--- a/pandas/computation/engines.py
+++ b/pandas/computation/engines.py
@@ -108,17 +108,17 @@ def _align_core(terms):
     axes = biggest.axes
     naxes = len(axes)
 
-    for i in term_index:
-        for axis, items in enumerate(terms[i].value.axes):
-            if com.is_series(terms[i].value) and naxes > 1:
-                axes[naxes - 1] = axes[naxes - 1].join(terms[i].value.index,
-                                                       how='outer')
+    for term in (terms[i] for i in term_index):
+        for axis, items in enumerate(term.value.axes):
+            if com.is_series(term.value) and naxes > 1:
+                ax, itm = naxes - 1, term.value.index
             else:
-                axes[axis] = axes[axis].join(items, how='outer')
+                ax, itm = axis, items
+            axes[ax] = axes[ax].join(itm, how='outer')
 
     for i, ndim in ndims.iteritems():
         for axis, items in izip(xrange(ndim), axes):
-            ti = terms[i].value  # needed here because we modify it in the inner loop
+            ti = terms[i].value
 
             if hasattr(ti, 'reindex_axis'):
                 transpose = com.is_series(ti) and naxes > 1

From ff78c08139f2c5f7e632827f313ccbf88aba9100 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Wed, 3 Jul 2013 21:21:49 -0400
Subject: [PATCH 16/48] CLN: clean up eval and have the Scope instance auto
 create the scope if none exists

---
 pandas/computation/eval.py | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/pandas/computation/eval.py b/pandas/computation/eval.py
index 38248c26f88e3..591993bc4f228 100644
--- a/pandas/computation/eval.py
+++ b/pandas/computation/eval.py
@@ -58,17 +58,9 @@ def eval(expr, engine='numexpr', truediv=True, local_dict=None,
     eng = _engines[engine]
 
     if isinstance(expr, basestring):
-        frame = sys._getframe(1)
-
-        # get the globals and locals
-        gbl, lcl = (global_dict or frame.f_globals,
-                    local_dict or frame.f_locals)
-
-        try:
-            # shallow copy the scope so we don't overwrite anything
-            env = Scope(gbl.copy(), lcl.copy())
-        finally:
-            del frame
+        # need to go 2 up in the call stack from the constructor since we want
+        # the calling scope's variables
+        env = Scope(global_dict, local_dict, frame_level=2)
         parsed_expr = Expr(expr, engine, env, truediv)
     elif isinstance(expr, Expr):
         parsed_expr = expr
@@ -80,7 +72,7 @@ def eval(expr, engine='numexpr', truediv=True, local_dict=None,
     # construct the engine and evaluate
     ret = eng(parsed_expr).evaluate()
 
-    # sanity check for a number
+    # sanity check for a number TODO: eventually take out
     if np.isscalar(ret):
         if not isinstance(ret, (np.number, numbers.Number, np.bool_, bool)):
             raise TypeError('scalar result must be numeric or bool, type is '

From f9f7fd7b6f841eae34ac1795f02320646b15708c Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Wed, 3 Jul 2013 21:29:19 -0400
Subject: [PATCH 17/48] CLN: add six.string_types checking instead of
 basestring

---
 pandas/computation/eval.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/pandas/computation/eval.py b/pandas/computation/eval.py
index 591993bc4f228..b7d15d1d009bc 100644
--- a/pandas/computation/eval.py
+++ b/pandas/computation/eval.py
@@ -5,6 +5,8 @@
 
 import numpy as np
 
+import six
+
 from pandas.computation.expr import Expr, Scope
 from pandas.computation.engines import _engines
 
@@ -57,7 +59,7 @@ def eval(expr, engine='numexpr', truediv=True, local_dict=None,
 
     eng = _engines[engine]
 
-    if isinstance(expr, basestring):
+    if isinstance(expr, six.string_types):
         # need to go 2 up in the call stack from the constructor since we want
         # the calling scope's variables
         env = Scope(global_dict, local_dict, frame_level=2)
@@ -72,9 +74,11 @@ def eval(expr, engine='numexpr', truediv=True, local_dict=None,
     # construct the engine and evaluate
     ret = eng(parsed_expr).evaluate()
 
-    # sanity check for a number TODO: eventually take out
+    # sanity check for a number
+    # TODO: eventually take out
+    # TODO: pytables engine will probably need a string check
     if np.isscalar(ret):
-        if not isinstance(ret, (np.number, numbers.Number, np.bool_, bool)):
-            raise TypeError('scalar result must be numeric or bool, type is '
-                            '{0!r}'.format(ret.__class__.__name__))
+        if not isinstance(ret, (np.number, np.bool_, numbers.Number)):
+            raise TypeError('scalar result must be numeric or bool, passed '
+                            'type is {0!r}'.format(ret.__class__.__name__))
     return ret

From 48eff13c0418c146dbca43f4893d027d0624ffe2 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Wed, 3 Jul 2013 22:13:08 -0400
Subject: [PATCH 18/48] TST: clean up some tests, add minor assertions where
 none existed

---
 pandas/io/tests/test_pytables.py | 33 +++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 00d8089ad2ee7..6737408081f3d 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -1,8 +1,9 @@
 import nose
 import unittest
-import os
 import sys
+import os
 import warnings
+from contextlib import contextmanager
 
 import datetime
 import numpy as np
@@ -19,7 +20,6 @@
 from pandas import concat, Timestamp
 from pandas.util import py3compat
 
-from numpy.testing.decorators import slow
 
 try:
     import tables
@@ -36,12 +36,12 @@
 # contextmanager to ensure the file cleanup
 def safe_remove(path):
     if path is not None:
-        import os
         try:
             os.remove(path)
         except:
             pass
 
+
 def safe_close(store):
     try:
         if store is not None:
@@ -49,7 +49,6 @@ def safe_close(store):
     except:
         pass
 
-from contextlib import contextmanager
 
 @contextmanager
 def ensure_clean(path, mode='a', complevel=None, complib=None,
@@ -620,7 +619,6 @@ def test_append_with_different_block_ordering(self):
 
                 store.append('df',df)
 
-
     def test_ndim_indexables(self):
         """ test using ndim tables in new ways"""
 
@@ -1011,6 +1009,7 @@ def test_big_table_frame(self):
             store.append('df', df)
             rows = store.root.df.table.nrows
             recons = store.select('df')
+            assert isinstance(recons, DataFrame)
 
         print ("\nbig_table frame [%s] -> %5.2f" % (rows, time.time() - x))
 
@@ -1064,7 +1063,7 @@ def test_big_put_frame(self):
 
         with ensure_clean(self.path, mode='w') as store:
             start_time = time.time()
-            store = HDFStore(fn, mode='w')
+            store = HDFStore(self.path, mode='w')
             store.put('df', df)
 
             print (df.get_dtype_counts())
@@ -1092,6 +1091,7 @@ def test_big_table_panel(self):
             store.append('wp', wp)
             rows = store.root.wp.table.nrows
             recons = store.select('wp')
+            assert isinstance(recons, Panel)
 
         print ("\nbig_table panel [%s] -> %5.2f" % (rows, time.time() - x))
 
@@ -1254,7 +1254,6 @@ def test_table_values_dtypes_roundtrip(self):
             expected.sort()
             tm.assert_series_equal(result,expected)
 
-
     def test_table_mixed_dtypes(self):
 
         # frame
@@ -2352,7 +2351,6 @@ def test_string_select(self):
             expected = df[df.int!=2]
             assert_frame_equal(result,expected)
 
-
     def test_read_column(self):
 
         df = tm.makeTimeDataFrame()
@@ -2580,7 +2578,6 @@ def _check_double_roundtrip(self, obj, comparator, compression=False,
             again = store['obj']
             comparator(again, obj, **kwargs)
 
-
     def _check_roundtrip_table(self, obj, comparator, compression=False):
         options = {}
         if compression:
@@ -2597,6 +2594,7 @@ def test_pytables_native_read(self):
         try:
             store = HDFStore(tm.get_data_path('legacy_hdf/pytables_native.h5'), 'r')
             d2 = store['detector/readout']
+            assert isinstance(d2, DataFrame)
         finally:
             safe_close(store)
 
@@ -2604,6 +2602,7 @@ def test_pytables_native_read(self):
             store = HDFStore(tm.get_data_path('legacy_hdf/pytables_native2.h5'), 'r')
             str(store)
             d1 = store['detector']
+            assert isinstance(d1, DataFrame)
         finally:
             safe_close(store)
 
@@ -2653,11 +2652,18 @@ def test_legacy_0_10_read(self):
     def test_legacy_0_11_read(self):
         # legacy from 0.11
         try:
-            store = HDFStore(tm.get_data_path('legacy_hdf/legacy_table_0.11.h5'), 'r')
+            path = os.path.join('legacy_hdf', 'legacy_table_0.11.h5')
+            store = HDFStore(tm.get_data_path(path), 'r')
             str(store)
+            assert 'df' in store
+            assert 'df1' in store
+            assert 'mi' in store
             df = store.select('df')
             df1 = store.select('df1')
             mi = store.select('mi')
+            assert isinstance(df, DataFrame)
+            assert isinstance(df1, DataFrame)
+            assert isinstance(mi, DataFrame)
         finally:
             safe_close(store)
 
@@ -2665,10 +2671,9 @@ def test_copy(self):
 
         def do_copy(f = None, new_f = None, keys = None, propindexes = True, **kwargs):
             try:
-                import os
-
                 if f is None:
-                    f = tm.get_data_path('legacy_hdf/legacy_0.10.h5')
+                    f = tm.get_data_path(os.path.join('legacy_hdf',
+                                                      'legacy_0.10.h5'))
 
 
                 store = HDFStore(f, 'r')
@@ -2738,6 +2743,7 @@ def test_legacy_table_write(self):
 
         df = DataFrame(dict(A = 'foo', B = 'bar'),index=range(10))
         store.append('df', df, data_columns = ['B'], min_itemsize={'A' : 200 })
+        store.append('wp', wp)
 
         store.close()
 
@@ -2824,6 +2830,7 @@ def _test_sort(obj):
     else:
         raise ValueError('type not supported here')
 
+
 if __name__ == '__main__':
     import nose
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

From d87f0271669824091ec3822956011bc0e2b55900 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Thu, 4 Jul 2013 08:24:45 -0400
Subject: [PATCH 19/48] CLN: clean up frame.py a bit

---
 pandas/core/frame.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index a8bb74f86a43e..7f0a8492a4403 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5681,6 +5681,7 @@ def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None):
 
     return create_block_manager_from_arrays(arrays, arr_names, axes)
 
+
 def extract_index(data):
     from pandas.core.index import _union_indexes
 
@@ -5941,6 +5942,7 @@ def _homogenize(data, index, dtype=None):
 
     return homogenized
 
+
 def _from_nested_dict(data):
     # TODO: this should be seriously cythonized
     new_data = OrderedDict()

From 5b58a08d77141ef7ef1faab4d33089bd8f71c64c Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Thu, 4 Jul 2013 08:25:17 -0400
Subject: [PATCH 20/48] CLN: clean up pytables arguments a bit

---
 pandas/core/base.py   | 1 +
 pandas/io/pytables.py | 6 ++++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 6122e78fa8bce..2caaf00723824 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -39,6 +39,7 @@ def __repr__(self):
         """
         return str(self)
 
+
 class PandasObject(StringMixin):
     """baseclass for various pandas objects"""
 
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index a53907c518aab..4a538b22bf939 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -182,7 +182,8 @@ def get_store(path, mode='a', complevel=None, complib=None,
 
 ### interface to/from ###
 
-def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None, append=None, **kwargs):
+def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None,
+           append=None, **kwargs):
     """ store this object, close it if we opened it """
     if append:
         f = lambda store: store.append(key, value, **kwargs)
@@ -190,7 +191,8 @@ def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None, app
         f = lambda store: store.put(key, value, **kwargs)
 
     if isinstance(path_or_buf, basestring):
-        with get_store(path_or_buf, mode=mode, complevel=complevel, complib=complib) as store:
+        with get_store(path_or_buf, mode=mode, complevel=complevel,
+                       complib=complib) as store:
             f(store)
     else:
         f(path_or_buf)

From 7482a277a8c0309faec6481d0a4885670deb7369 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Thu, 4 Jul 2013 12:45:54 -0400
Subject: [PATCH 21/48] CLN: use shiny new string mixin to refactor repring

---
 pandas/computation/ops.py             | 60 ++++++++++++++-------------
 pandas/computation/tests/test_eval.py |  2 +-
 2 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py
index 8c66fd0d122d5..26774c17959fb 100644
--- a/pandas/computation/ops.py
+++ b/pandas/computation/ops.py
@@ -1,13 +1,15 @@
 import operator as op
-from functools import partial
 
 import numpy as np
 from pandas.util.py3compat import PY3
 import pandas.core.common as com
+from pandas.core.base import StringMixin
 
 
 _reductions = 'sum', 'prod'
-_mathops = 'sin', 'cos', 'tan'
+_mathops = ('sin', 'cos', 'exp', 'log', 'expm1', 'log1p', 'pow', 'div', 'sqrt',
+            'inv', 'sinh', 'cosh', 'tanh', 'arcsin', 'arccos', 'arctan',
+            'arccosh', 'arcsinh', 'arctanh', 'arctan2', 'abs')
 
 
 class OperatorError(Exception):
@@ -47,23 +49,21 @@ def _update_name(env, key, value):
                 raise NameError('{0!r} is undefined'.format(key))
 
 
-def _update_names(env, mapping):
-    updater = partial(_update_name, env)
-    for key, value in mapping.iteritems():
-        updater(key, value)
+class NamedObjectMixin(object):
+    @property
+    def typename(self):
+        return com.pprint_thing(self.__class__.__name__)
 
 
-class Term(object):
-    def __init__(self, value, name, env):
-        self.value = value
+class Term(StringMixin, NamedObjectMixin):
+    def __init__(self, name, env):
         self.name = name
+        self.value = _resolve_name(env, name)
         self.env = env
-        self.type = type(value)
-
-    def __str__(self):
-        return '{0}({1!r})'.format(self.__class__.__name__, self.name)
+        self.type = type(self.value)
 
-    __repr__ = __str__
+    def __unicode__(self):
+        return com.pprint_thing('{0}({1!r})'.format(self.typename, self.name))
 
     def update(self, value):
         _update_name(self.env, self.name, value)
@@ -76,10 +76,10 @@ def isscalar(self):
 
 class Constant(Term):
     def __init__(self, value, env):
-        super(Constant, self).__init__(value, value, env)
+        super(Constant, self).__init__(value, env)
 
 
-class Op(object):
+class Op(NamedObjectMixin, StringMixin):
     """Hold an operator of unknown arity
     """
     def __init__(self, op, operands):
@@ -89,9 +89,13 @@ def __init__(self, op, operands):
     def __iter__(self):
         return iter(self.operands)
 
-    @property
-    def name(self):
-        return self.__class__.__name__
+    def __unicode__(self):
+        op = 'op={1!r}'.format(self.op)
+        operands = ', '.join('opr_{i}={opr}'.format(i=i, opr=opr)
+                             for i, opr in enumerate(self.operands))
+        return com.pprint_thing('{0}({op}, '
+                                '{operands})'.format(self.name, op=op,
+                                                     operands=operands))
 
 
 _cmp_ops_syms = '>', '<', '>=', '<=', '==', '!='
@@ -113,14 +117,14 @@ def name(self):
     _binary_ops_dict.update(d)
 
 
-def _cast(terms, dtype):
+def _cast_inplace(terms, dtype):
     dt = np.dtype(dtype)
     for term in terms:
         # cast all the way down the tree since operands must be
         try:
-            _cast(term.operands, dtype)
+            _cast_inplace(term.operands, dtype)
         except AttributeError:
-            # we've bottomed out so cast
+            # we've bottomed out so actually do the cast
             try:
                 new_value = term.value.astype(dt)
             except AttributeError:
@@ -157,13 +161,11 @@ def __init__(self, op, lhs, rhs):
             raise BinaryOperatorError('Invalid binary operator {0}, valid'
                                       ' operators are {1}'.format(op, keys))
 
-    def __repr__(self):
+    def __unicode__(self):
         return com.pprint_thing('{0}(op={1!r}, lhs={2!r}, '
-                                'rhs={3!r})'.format(self.name, self.op,
+                                'rhs={3!r})'.format(self.typename, self.op,
                                                     self.lhs, self.rhs))
 
-    __str__ = __repr__
-
     def __call__(self, env):
         # handle truediv
         if self.op == '/' and env.locals['truediv']:
@@ -197,7 +199,7 @@ def __call__(self, env):
 class Mod(BinOp):
     def __init__(self, lhs, rhs):
         super(Mod, self).__init__('%', lhs, rhs)
-        _cast(self.operands, np.float_)
+        _cast_inplace(self.operands, np.float_)
 
 
 _unary_ops_syms = '+', '-', '~'
@@ -237,7 +239,7 @@ def __call__(self, env):
 
         return res
 
-    def __repr__(self):
+    def __unicode__(self):
         return com.pprint_thing('{0}(op={1!r}, '
-                                'operand={2!r})'.format(self.name, self.op,
+                                'operand={2!r})'.format(self.typename, self.op,
                                                         self.operand))
diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py
index 18fe641db5ed2..15509e2e489df 100644
--- a/pandas/computation/tests/test_eval.py
+++ b/pandas/computation/tests/test_eval.py
@@ -50,7 +50,7 @@ def _eval_from_expr(lhs, cmp1, rhs, binop, cmp2):
     f2 = _binary_ops_dict[cmp2]
     bf = _binary_ops_dict[binop]
     env = Scope()
-    typ, axes = _align_core((Term(lhs, 'lhs', env), Term(rhs, 'rhs', env)))
+    typ, axes = _align_core((Term('lhs', env), Term('rhs', env)))
     lhs, rhs = env.locals['lhs'], env.locals['rhs']
     return _reconstruct_object(typ, bf(f1(lhs, rhs), f2(lhs, rhs)), axes)
 

From 0d40fe182af58cceda1d4fcc99a6556ac83293d3 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Thu, 4 Jul 2013 14:02:16 -0400
Subject: [PATCH 22/48] CLN: move align to its own file

---
 pandas/computation/align.py   | 219 ++++++++++++++++++++++++++++++++++
 pandas/computation/engines.py | 218 +--------------------------------
 2 files changed, 220 insertions(+), 217 deletions(-)
 create mode 100644 pandas/computation/align.py

diff --git a/pandas/computation/align.py b/pandas/computation/align.py
new file mode 100644
index 0000000000000..f2bf11d41e185
--- /dev/null
+++ b/pandas/computation/align.py
@@ -0,0 +1,219 @@
+from functools import partial, wraps
+from itertools import izip
+
+import numpy as np
+
+import pandas as pd
+import pandas.core.common as com
+from pandas.computation.ops import is_const
+from pandas.computation.common import flatten
+
+
+def _align_core_single_unary_op(term):
+    if isinstance(term.value, np.ndarray) and not com.is_series(term.value):
+        typ = partial(np.asanyarray, dtype=term.value.dtype)
+    else:
+        typ = type(term.value)
+    ret = typ,
+
+    if not hasattr(term.value, 'axes'):
+        ret += None,
+    else:
+        ret += _zip_axes_from_type(typ, term.value.axes),
+    return ret
+
+
+def _zip_axes_from_type(typ, new_axes):
+    axes = {}
+    for ax_ind, ax_name in typ._AXIS_NAMES.iteritems():
+        axes[ax_name] = new_axes[ax_ind]
+    return axes
+
+
+def _maybe_promote_shape(values, naxes):
+    # test to see if we have an array else leave since must be a number
+    if not isinstance(values, np.ndarray):
+        return values
+
+    ndims = values.ndim
+    if ndims > naxes:
+        raise AssertionError('cannot have more dims than axes, '
+                             '{0} > {1}'.format(ndims, naxes))
+    if ndims == naxes:
+        return values
+
+    ndim = set(xrange(ndims))
+    nax = set(xrange(naxes))
+
+    axes_slice = [slice(None)] * naxes
+
+    # symmetric difference of numaxes and ndims
+    slices = nax - ndim
+
+    if ndims == naxes:
+        if slices:
+            raise AssertionError('slices should be empty if ndims == naxes '
+                                 '{0}'.format(slices))
+    else:
+        if not slices:
+            raise AssertionError('slices should NOT be empty if ndim != naxes '
+                                 '{0}'.format(slices))
+
+    for sl in slices:
+        axes_slice[sl] = np.newaxis
+
+    return values[tuple(axes_slice)]
+
+
+def _any_pandas_objects(terms):
+    """Check a sequence of terms for instances of PandasObject."""
+    return any(com.is_pd_obj(term.value) for term in terms)
+
+
+def _filter_special_cases(f):
+    @wraps(f)
+    def wrapper(terms):
+        # single unary operand
+        if len(terms) == 1:
+            return _align_core_single_unary_op(terms[0])
+
+        # only scalars
+        elif all(term.isscalar for term in terms):
+            return np.result_type(*(term.value for term in terms)), None
+
+        # single element ndarrays
+        all_has_size = all(hasattr(term.value, 'size') for term in terms)
+        if (all_has_size and all(term.value.size == 1 for term in terms)):
+            return np.result_type(*(term.value for term in terms)), None
+
+        # no pandas so just punt to the evaluator
+        if not _any_pandas_objects(terms):
+            return np.result_type(*(term.value for term in terms)), None
+
+        return f(terms)
+    return wrapper
+
+
+@_filter_special_cases
+def _align_core(terms):
+    term_index = [i for i, term in enumerate(terms) if hasattr(term.value,
+                                                               'axes')]
+    term_dims = [terms[i].value.ndim for i in term_index]
+    ndims = pd.Series(dict(zip(term_index, term_dims)))
+
+    # initial axes are the axes of the largest-axis'd term
+    biggest = terms[ndims.idxmax()].value
+    typ = biggest._constructor
+    axes = biggest.axes
+    naxes = len(axes)
+
+    for term in (terms[i] for i in term_index):
+        for axis, items in enumerate(term.value.axes):
+            if com.is_series(term.value) and naxes > 1:
+                ax, itm = naxes - 1, term.value.index
+            else:
+                ax, itm = axis, items
+            axes[ax] = axes[ax].join(itm, how='outer')
+
+    for i, ndim in ndims.iteritems():
+        for axis, items in izip(xrange(ndim), axes):
+            ti = terms[i].value
+
+            if hasattr(ti, 'reindex_axis'):
+                transpose = com.is_series(ti) and naxes > 1
+
+                if transpose:
+                    f = partial(ti.reindex, index=axes[naxes - 1], copy=False)
+                else:
+                    f = partial(ti.reindex_axis, items, axis=axis, copy=False)
+
+                if pd.lib.is_bool_array(ti.values):
+                    r = f(fill_value=True)
+                else:
+                    r = f()
+
+                terms[i].update(r)
+
+        res = _maybe_promote_shape(terms[i].value.T if transpose else
+                                   terms[i].value, naxes)
+        res = res.T if transpose else res
+
+        try:
+            v = res.values
+        except AttributeError:
+            v = res
+        terms[i].update(v)
+
+    return typ, _zip_axes_from_type(typ, axes)
+
+
+def _filter_terms(flat):
+    # numeric literals
+    literals = set(filter(is_const, flat))
+
+    # these are strings which are variable names
+    names = set(flat) - literals
+
+    # literals are not names and names are not literals, so intersection should
+    # be empty
+    if literals & names:
+        raise ValueError('literals cannot be names and names cannot be '
+                         'literals')
+    return names, literals
+
+
+def _align(terms, env):
+    # flatten the parse tree (a nested list)
+    terms = list(flatten(terms))
+
+    # separate names and literals
+    names, literals = _filter_terms(terms)
+
+    if not names:  # only literals so just promote to a common type
+        return np.result_type(*literals).type, None
+
+    # if all resolved variables are numeric scalars
+    if all(term.isscalar for term in terms):
+        return np.result_type(*(term.value for term in terms)).type, None
+
+    # perform the main alignment
+    typ, axes = _align_core(terms)
+    return typ, axes
+
+
+def _reconstruct_object(typ, obj, axes):
+    """Reconstruct an object given its type, raw value, and possibly empty
+    (None) axes.
+
+    Parameters
+    ----------
+    typ : object
+        A type
+    obj : object
+        The value to use in the type constructor
+    axes : dict
+        The axes to use to construct the resulting pandas object
+
+    Returns
+    -------
+    reconst : typ
+        An object of type ``typ`` with the value `obj` and possible axes
+        `axes`.
+    """
+    try:
+        # handle numpy dtypes
+        typ = typ.type
+    except AttributeError:
+        pass
+
+    if (not isinstance(typ, partial) and
+        issubclass(typ, pd.core.generic.PandasObject)):
+        return typ(obj, **axes)
+
+    ret_value = typ(obj)
+
+    try:
+        return ret_value.item()
+    except (AttributeError, ValueError):
+        return ret_value
+
diff --git a/pandas/computation/engines.py b/pandas/computation/engines.py
index 64582192a9874..db6beb87da3a5 100644
--- a/pandas/computation/engines.py
+++ b/pandas/computation/engines.py
@@ -1,222 +1,6 @@
 import abc
-from functools import partial, wraps
-from itertools import izip
 
-import numpy as np
-
-import pandas as pd
-import pandas.core.common as com
-from pandas.computation.ops import is_const
-from pandas.computation.common import flatten
-
-
-def _align_core_single_unary_op(term):
-    if isinstance(term.value, np.ndarray) and not com.is_series(term.value):
-        typ = partial(np.asanyarray, dtype=term.value.dtype)
-    else:
-        typ = type(term.value)
-    ret = typ,
-
-    if not hasattr(term.value, 'axes'):
-        ret += None,
-    else:
-        ret += _zip_axes_from_type(typ, term.value.axes),
-    return ret
-
-
-def _zip_axes_from_type(typ, new_axes):
-    axes = {}
-    for ax_ind, ax_name in typ._AXIS_NAMES.iteritems():
-        axes[ax_name] = new_axes[ax_ind]
-    return axes
-
-
-def _maybe_promote_shape(values, naxes):
-    # test to see if we have an array else leave since must be a number
-    if not isinstance(values, np.ndarray):
-        return values
-
-    ndims = values.ndim
-    if ndims > naxes:
-        raise AssertionError('cannot have more dims than axes, '
-                             '{0} > {1}'.format(ndims, naxes))
-    if ndims == naxes:
-        return values
-
-    ndim = set(xrange(ndims))
-    nax = set(xrange(naxes))
-
-    axes_slice = [slice(None)] * naxes
-
-    # symmetric difference of numaxes and ndims
-    slices = nax - ndim
-
-    if ndims == naxes:
-        if slices:
-            raise AssertionError('slices should be empty if ndims == naxes '
-                                 '{0}'.format(slices))
-    else:
-        if not slices:
-            raise AssertionError('slices should NOT be empty if ndim != naxes '
-                                 '{0}'.format(slices))
-
-    for sl in slices:
-        axes_slice[sl] = np.newaxis
-
-    return values[tuple(axes_slice)]
-
-
-def _any_pandas_objects(terms):
-    """Check a sequence of terms for instances of PandasObject."""
-    return any(com.is_pd_obj(term.value) for term in terms)
-
-
-def _filter_special_cases(f):
-    @wraps(f)
-    def wrapper(terms):
-        # single unary operand
-        if len(terms) == 1:
-            return _align_core_single_unary_op(terms[0])
-
-        # only scalars
-        elif all(term.isscalar for term in terms):
-            return np.result_type(*(term.value for term in terms)), None
-
-        # single element ndarrays
-        all_has_size = all(hasattr(term.value, 'size') for term in terms)
-        if (all_has_size and all(term.value.size == 1 for term in terms)):
-            return np.result_type(*(term.value for term in terms)), None
-
-        # no pandas so just punt to the evaluator
-        if not _any_pandas_objects(terms):
-            return np.result_type(*(term.value for term in terms)), None
-
-        return f(terms)
-    return wrapper
-
-
-@_filter_special_cases
-def _align_core(terms):
-    term_index = [i for i, term in enumerate(terms) if hasattr(term.value,
-                                                               'axes')]
-    term_dims = [terms[i].value.ndim for i in term_index]
-    ndims = pd.Series(dict(zip(term_index, term_dims)))
-
-    # initial axes are the axes of the largest-axis'd term
-    biggest = terms[ndims.idxmax()].value
-    typ = biggest._constructor
-    axes = biggest.axes
-    naxes = len(axes)
-
-    for term in (terms[i] for i in term_index):
-        for axis, items in enumerate(term.value.axes):
-            if com.is_series(term.value) and naxes > 1:
-                ax, itm = naxes - 1, term.value.index
-            else:
-                ax, itm = axis, items
-            axes[ax] = axes[ax].join(itm, how='outer')
-
-    for i, ndim in ndims.iteritems():
-        for axis, items in izip(xrange(ndim), axes):
-            ti = terms[i].value
-
-            if hasattr(ti, 'reindex_axis'):
-                transpose = com.is_series(ti) and naxes > 1
-
-                if transpose:
-                    f = partial(ti.reindex, index=axes[naxes - 1], copy=False)
-                else:
-                    f = partial(ti.reindex_axis, items, axis=axis, copy=False)
-
-                if pd.lib.is_bool_array(ti.values):
-                    r = f(fill_value=True)
-                else:
-                    r = f()
-
-                terms[i].update(r)
-
-        res = _maybe_promote_shape(terms[i].value.T if transpose else
-                                   terms[i].value, naxes)
-        res = res.T if transpose else res
-
-        try:
-            v = res.values
-        except AttributeError:
-            v = res
-        terms[i].update(v)
-
-    return typ, _zip_axes_from_type(typ, axes)
-
-
-def _filter_terms(flat):
-    # numeric literals
-    literals = set(filter(is_const, flat))
-
-    # these are strings which are variable names
-    names = set(flat) - literals
-
-    # literals are not names and names are not literals, so intersection should
-    # be empty
-    if literals & names:
-        raise ValueError('literals cannot be names and names cannot be '
-                         'literals')
-    return names, literals
-
-
-def _align(terms, env):
-    # flatten the parse tree (a nested list)
-    terms = list(flatten(terms))
-
-    # separate names and literals
-    names, literals = _filter_terms(terms)
-
-    if not names:  # only literals so just promote to a common type
-        return np.result_type(*literals).type, None
-
-    # if all resolved variables are numeric scalars
-    if all(term.isscalar for term in terms):
-        return np.result_type(*(term.value for term in terms)).type, None
-
-    # perform the main alignment
-    typ, axes = _align_core(terms)
-    return typ, axes
-
-
-def _reconstruct_object(typ, obj, axes):
-    """Reconstruct an object given its type, raw value, and possibly empty
-    (None) axes.
-
-    Parameters
-    ----------
-    typ : object
-        A type
-    obj : object
-        The value to use in the type constructor
-    axes : dict
-        The axes to use to construct the resulting pandas object
-
-    Returns
-    -------
-    reconst : typ
-        An object of type ``typ`` with the value `obj` and possible axes
-        `axes`.
-    """
-    try:
-        # handle numpy dtypes
-        typ = typ.type
-    except AttributeError:
-        pass
-
-    if (not isinstance(typ, partial) and
-        issubclass(typ, pd.core.generic.PandasObject)):
-        return typ(obj, **axes)
-
-    ret_value = typ(obj)
-
-    try:
-        return ret_value.item()
-    except (AttributeError, ValueError):
-        return ret_value
+from pandas.computation.align import _align, _reconstruct_object
 
 
 class AbstractEngine(object):

From 87957d24f08f09f2f4a8574e435d4a9dad75ec55 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Thu, 4 Jul 2013 14:06:20 -0400
Subject: [PATCH 23/48] CLN: clean up and use new stringmixin for Expr

---
 pandas/computation/expr.py | 36 +++++++++++++-----------------------
 1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py
index 987f694bf0904..777ac2a03beea 100644
--- a/pandas/computation/expr.py
+++ b/pandas/computation/expr.py
@@ -2,11 +2,11 @@
 import sys
 from functools import partial
 
-
-from pandas.computation.ops import BinOp, UnaryOp, _reductions, _mathops, Mod
+from pandas.core.base import StringMixin
+from pandas.computation.ops import BinOp, UnaryOp, _reductions, _mathops
 from pandas.computation.ops import _cmp_ops_syms, _bool_ops_syms
 from pandas.computation.ops import _arith_ops_syms, _unary_ops_syms
-from pandas.computation.ops import _resolve_name, Term, Constant
+from pandas.computation.ops import Term, Constant
 
 
 class Scope(object):
@@ -51,8 +51,8 @@ def __init__(self, env):
 
     def visit(self, node):
         if not (isinstance(node, ast.AST) or isinstance(node, basestring)):
-            raise AssertionError('"node" must be an AST node or a string, you'
-                                 ' passed a(n) {0}'.format(node.__class__))
+            raise TypeError('"node" must be an AST node or a string, you'
+                            ' passed a(n) {0}'.format(node.__class__))
         if isinstance(node, basestring):
             node = ast.fix_missing_locations(ast.parse(node))
         return super(ExprVisitor, self).visit(node)
@@ -81,8 +81,7 @@ def visit_UnaryOp(self, node):
         return op(self.visit(node.operand))
 
     def visit_Name(self, node):
-        name = node.id
-        return Term(_resolve_name(self.env, name), name, self.env)
+        return Term(node.id, self.env)
 
     def visit_Num(self, node):
         return Constant(node.n, self.env)
@@ -108,16 +107,14 @@ def visit_Call(self, node):
     def visit_Attribute(self, node):
         raise NotImplementedError("attribute access is not yet supported")
 
-    def visit_Mod(self, node):
-        return Mod
-
 
-class Expr(object):
+class Expr(StringMixin):
     """Expr object"""
-    def __init__(self, expr, engine='numexpr', env=None, truediv=True):
+    def __init__(self, expr, engine='numexpr', env=None, truediv=True,
+                 parsing='strict'):
         self.expr = expr
         self.env = env or Scope(frame_level=2)
-        self._visitor = ExprVisitor(self.env)
+        self._visitor = ExprVisitor(self.env, parsing)
         self.terms = self.parse()
         self.engine = engine
         self.truediv = truediv
@@ -126,19 +123,12 @@ def __call__(self, env):
         env.locals['truediv'] = self.truediv
         return self.terms(env)
 
-    def __repr__(self):
-        return '{0} -> {1}'.format(self.expr, self.terms)
-
-    def __str__(self):
-        return self.expr
+    def __unicode__(self):
+        return unicode(self.terms)
 
     def parse(self):
         """return a Termset"""
-        try:
-            visited = self._visitor.visit(self.expr)
-        except SyntaxError as e:
-            raise e
-        return visited
+        return self._visitor.visit(self.expr)
 
     def align(self):
         """align a set of Terms"""

From e35cb5cf07b38390bdd66e583d0c98a0ae154193 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Thu, 4 Jul 2013 14:06:36 -0400
Subject: [PATCH 24/48] ENH/CLN: be more careful about unicode

---
 pandas/computation/eval.py            |  1 -
 pandas/computation/expr.py            |  5 ++---
 pandas/computation/ops.py             | 12 +++++-------
 pandas/computation/tests/test_eval.py |  4 ++--
 4 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/pandas/computation/eval.py b/pandas/computation/eval.py
index b7d15d1d009bc..e08e0f28d7877 100644
--- a/pandas/computation/eval.py
+++ b/pandas/computation/eval.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python
 
-import sys
 import numbers
 
 import numpy as np
diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py
index 777ac2a03beea..60fea6e935070 100644
--- a/pandas/computation/expr.py
+++ b/pandas/computation/expr.py
@@ -110,11 +110,10 @@ def visit_Attribute(self, node):
 
 class Expr(StringMixin):
     """Expr object"""
-    def __init__(self, expr, engine='numexpr', env=None, truediv=True,
-                 parsing='strict'):
+    def __init__(self, expr, engine='numexpr', env=None, truediv=True):
         self.expr = expr
         self.env = env or Scope(frame_level=2)
-        self._visitor = ExprVisitor(self.env, parsing)
+        self._visitor = ExprVisitor(self.env)
         self.terms = self.parse()
         self.engine = engine
         self.truediv = truediv
diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py
index 26774c17959fb..24000b27a033a 100644
--- a/pandas/computation/ops.py
+++ b/pandas/computation/ops.py
@@ -63,7 +63,7 @@ def __init__(self, name, env):
         self.type = type(self.value)
 
     def __unicode__(self):
-        return com.pprint_thing('{0}({1!r})'.format(self.typename, self.name))
+        return com.pprint_thing(self.name)
 
     def update(self, value):
         _update_name(self.env, self.name, value)
@@ -162,9 +162,8 @@ def __init__(self, op, lhs, rhs):
                                       ' operators are {1}'.format(op, keys))
 
     def __unicode__(self):
-        return com.pprint_thing('{0}(op={1!r}, lhs={2!r}, '
-                                'rhs={3!r})'.format(self.typename, self.op,
-                                                    self.lhs, self.rhs))
+        return com.pprint_thing('({0}) {1} ({2})'.format(self.lhs, self.op,
+                                                         self.rhs))
 
     def __call__(self, env):
         # handle truediv
@@ -240,6 +239,5 @@ def __call__(self, env):
         return res
 
     def __unicode__(self):
-        return com.pprint_thing('{0}(op={1!r}, '
-                                'operand={2!r})'.format(self.typename, self.op,
-                                                        self.operand))
+        return com.pprint_thing('{0}({1})'.format(self.op, self.operand))
+
diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py
index 15509e2e489df..0a1356915523a 100644
--- a/pandas/computation/tests/test_eval.py
+++ b/pandas/computation/tests/test_eval.py
@@ -17,8 +17,8 @@
 from pandas.core import common as com
 from pandas import DataFrame, Series
 from pandas.util.testing import makeCustomDataframe as mkdf
-from pandas.computation.engines import (_engines, _align_core,
-                                        _reconstruct_object)
+from pandas.computation.engines import _engines, _reconstruct_object
+from pandas.computation.align import _align_core
 from pandas.computation.ops import _binary_ops_dict, _unary_ops_dict, Term
 import pandas.computation.expr as expr
 from pandas.computation.expressions import _USE_NUMEXPR

From 1ceec39bf7e983d0deec9a5dec2fe8583e411a5e Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Thu, 4 Jul 2013 14:30:29 -0400
Subject: [PATCH 25/48] CLN: run autopep8 on pandas/io/pytables.py

---
 pandas/io/pytables.py | 811 +++++++++++++++++++++++++++---------------
 1 file changed, 520 insertions(+), 291 deletions(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 4a538b22bf939..013e596320250 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -87,40 +87,40 @@ class AttributeConflictWarning(Warning):
 # map object types
 _TYPE_MAP = {
 
-    Series          : u'series',
-    SparseSeries    : u'sparse_series',
-    TimeSeries      : u'series',
-    DataFrame       : u'frame',
-    SparseDataFrame : u'sparse_frame',
-    Panel           : u'wide',
-    Panel4D         : u'ndim',
-    SparsePanel     : u'sparse_panel'
+    Series: u'series',
+    SparseSeries: u'sparse_series',
+    TimeSeries: u'series',
+    DataFrame: u'frame',
+    SparseDataFrame: u'sparse_frame',
+    Panel: u'wide',
+    Panel4D: u'ndim',
+    SparsePanel: u'sparse_panel'
 }
 
 # storer class map
 _STORER_MAP = {
-    u'TimeSeries'    : 'LegacySeriesStorer',
-    u'Series'        : 'LegacySeriesStorer',
-    u'DataFrame'     : 'LegacyFrameStorer',
-    u'DataMatrix'    : 'LegacyFrameStorer',
-    u'series'        : 'SeriesStorer',
-    u'sparse_series' : 'SparseSeriesStorer',
-    u'frame'         : 'FrameStorer',
-    u'sparse_frame'  : 'SparseFrameStorer',
-    u'wide'          : 'PanelStorer',
-    u'sparse_panel'  : 'SparsePanelStorer',
+    u'TimeSeries': 'LegacySeriesStorer',
+    u'Series': 'LegacySeriesStorer',
+    u'DataFrame': 'LegacyFrameStorer',
+    u'DataMatrix': 'LegacyFrameStorer',
+    u'series': 'SeriesStorer',
+    u'sparse_series': 'SparseSeriesStorer',
+    u'frame': 'FrameStorer',
+    u'sparse_frame': 'SparseFrameStorer',
+    u'wide': 'PanelStorer',
+    u'sparse_panel': 'SparsePanelStorer',
 }
 
 # table class map
 _TABLE_MAP = {
-    u'generic_table'    : 'GenericTable',
-    u'appendable_frame'      : 'AppendableFrameTable',
-    u'appendable_multiframe' : 'AppendableMultiFrameTable',
-    u'appendable_panel' : 'AppendablePanelTable',
-    u'appendable_ndim'  : 'AppendableNDimTable',
-    u'worm'             : 'WORMTable',
-    u'legacy_frame'     : 'LegacyFrameTable',
-    u'legacy_panel'     : 'LegacyPanelTable',
+    u'generic_table': 'GenericTable',
+    u'appendable_frame': 'AppendableFrameTable',
+    u'appendable_multiframe': 'AppendableMultiFrameTable',
+    u'appendable_panel': 'AppendablePanelTable',
+    u'appendable_ndim': 'AppendableNDimTable',
+    u'worm': 'WORMTable',
+    u'legacy_frame': 'LegacyFrameTable',
+    u'legacy_panel': 'LegacyPanelTable',
 }
 
 # axes map
@@ -149,6 +149,7 @@ def _tables():
 
     return _table_mod
 
+
 def h5_open(path, mode):
     tables = _tables()
     return tables.openFile(path, mode)
@@ -180,7 +181,7 @@ def get_store(path, mode='a', complevel=None, complib=None,
             store.close()
 
 
-### interface to/from ###
+# interface to/from ###
 
 def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None,
            append=None, **kwargs):
@@ -197,9 +198,11 @@ def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None,
     else:
         f(path_or_buf)
 
+
 def read_hdf(path_or_buf, key, **kwargs):
     """ read from the store, closeit if we opened it """
-    f = lambda store, auto_close: store.select(key, auto_close=auto_close, **kwargs)
+    f = lambda store, auto_close: store.select(
+        key, auto_close=auto_close, **kwargs)
 
     if isinstance(path_or_buf, basestring):
 
@@ -221,7 +224,9 @@ def read_hdf(path_or_buf, key, **kwargs):
     # a passed store; user controls open/close
     f(path_or_buf, False)
 
+
 class HDFStore(object):
+
     """
     dict-like IO interface for storing pandas objects in PyTables
     format.
@@ -322,7 +327,7 @@ def __unicode__(self):
         output = '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path))
 
         if len(self.keys()):
-            keys   = []
+            keys = []
             values = []
 
             for k in self.keys():
@@ -330,10 +335,13 @@ def __unicode__(self):
                     s = self.get_storer(k)
                     if s is not None:
                         keys.append(pprint_thing(s.pathname or k))
-                        values.append(pprint_thing(s or 'invalid_HDFStore node'))
+                        values.append(
+                            pprint_thing(s or 'invalid_HDFStore node'))
                 except Exception as detail:
                     keys.append(k)
-                    values.append("[invalid_HDFStore node: %s]" % pprint_thing(detail))
+                    values.append(
+                        "[invalid_HDFStore node: %s]" %
+                        pprint_thing(detail))
 
             output += adjoin(12, keys, values)
         else:
@@ -387,7 +395,7 @@ def open(self, mode='a', warn=True):
 
         try:
             self._handle = h5_open(self._path, self._mode)
-        except IOError, e:  # pragma: no cover
+        except IOError as e:  # pragma: no cover
             if 'can not be written' in str(e):
                 print ('Opening %s in read-only mode' % self._path)
                 self._handle = h5_open(self._path, 'r')
@@ -456,7 +464,8 @@ def func(_start, _stop):
 
         if iterator or chunksize is not None:
             if not s.is_table:
-                raise TypeError("can only use an iterator or chunksize on a table")
+                raise TypeError(
+                    "can only use an iterator or chunksize on a table")
             return TableIterator(self, func, nrows=s.nrows, start=start,
                                  stop=stop, chunksize=chunksize,
                                  auto_close=auto_close)
@@ -464,7 +473,8 @@ def func(_start, _stop):
         return TableIterator(self, func, nrows=s.nrows, start=start, stop=stop,
                              auto_close=auto_close).get_values()
 
-    def select_as_coordinates(self, key, where=None, start=None, stop=None, **kwargs):
+    def select_as_coordinates(
+            self, key, where=None, start=None, stop=None, **kwargs):
         """
         return the selection as a Coordinates.
 
@@ -480,7 +490,7 @@ def select_as_coordinates(self, key, where=None, start=None, stop=None, **kwargs
     def unique(self, key, column, **kwargs):
         warnings.warn("unique(key,column) is deprecated\n"
                       "use select_column(key,column).unique() instead")
-        return self.get_storer(key).read_column(column = column, **kwargs).unique()
+        return self.get_storer(key).read_column(column=column, **kwargs).unique()
 
     def select_column(self, key, column, **kwargs):
         """
@@ -497,9 +507,10 @@ def select_column(self, key, column, **kwargs):
         raises ValueError if the column can not be extracted indivually (it is part of a data block)
 
         """
-        return self.get_storer(key).read_column(column = column, **kwargs)
+        return self.get_storer(key).read_column(column=column, **kwargs)
 
-    def select_as_multiple(self, keys, where=None, selector=None, columns=None, start=None, stop=None, iterator=False, chunksize=None, auto_close=False, **kwargs):
+    def select_as_multiple(self, keys, where=None, selector=None, columns=None,
+                           start=None, stop=None, iterator=False, chunksize=None, auto_close=False, **kwargs):
         """ Retrieve pandas objects from multiple tables
 
         Parameters
@@ -533,7 +544,7 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None, star
             selector = keys[0]
 
         # collect the tables
-        tbls = [ self.get_storer(k) for k in keys ]
+        tbls = [self.get_storer(k) for k in keys]
 
         # validate rows
         nrows = None
@@ -541,24 +552,32 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None, star
             if t is None:
                 raise TypeError("Invalid table [%s]" % k)
             if not t.is_table:
-                raise TypeError("object [%s] is not a table, and cannot be used in all select as multiple" % t.pathname)
+                raise TypeError(
+                    "object [%s] is not a table, and cannot be used in all select as multiple" %
+                    t.pathname)
 
             if nrows is None:
                 nrows = t.nrows
             elif t.nrows != nrows:
-                raise ValueError("all tables must have exactly the same nrows!")
+                raise ValueError(
+                    "all tables must have exactly the same nrows!")
 
         # select coordinates from the selector table
         try:
-            c = self.select_as_coordinates(selector, where, start=start, stop=stop)
+            c = self.select_as_coordinates(
+                selector,
+                where,
+                start=start,
+                stop=stop)
             nrows = len(c)
-        except (Exception), detail:
+        except (Exception) as detail:
             raise ValueError("invalid selector [%s]" % selector)
 
         def func(_start, _stop):
 
             # collect the returns objs
-            objs = [t.read(where=c[_start:_stop], columns=columns) for t in tbls]
+            objs = [t.read(where=c[_start:_stop], columns=columns)
+                    for t in tbls]
 
             # axis is the concentation axes
             axis = list(set([t.non_index_axes[0][0] for t in tbls]))[0]
@@ -571,7 +590,6 @@ def func(_start, _stop):
 
         return TableIterator(self, func, nrows=nrows, start=start, stop=stop, auto_close=auto_close).get_values()
 
-
     def put(self, key, value, table=None, append=False, **kwargs):
         """
         Store object in HDFStore
@@ -617,7 +635,8 @@ def remove(self, key, where=None, start=None, stop=None):
         except:
 
             if where is not None:
-                raise ValueError("trying to remove a node with a non-None where clause!")
+                raise ValueError(
+                    "trying to remove a node with a non-None where clause!")
 
             # we are actually trying to remove a node (with children)
             s = self.get_node(key)
@@ -635,8 +654,9 @@ def remove(self, key, where=None, start=None, stop=None):
         # delete from the table
         else:
             if not s.is_table:
-                raise ValueError('can only remove with where on objects written as tables')
-            return s.delete(where = where, start=start, stop=stop)
+                raise ValueError(
+                    'can only remove with where on objects written as tables')
+            return s.delete(where=where, start=start, stop=stop)
 
     def append(self, key, value, columns=None, **kwargs):
         """
@@ -660,11 +680,13 @@ def append(self, key, value, columns=None, **kwargs):
         data in the table, so be careful
         """
         if columns is not None:
-            raise Exception("columns is not a supported keyword in append, try data_columns")
+            raise Exception(
+                "columns is not a supported keyword in append, try data_columns")
 
         self._write_to_group(key, value, table=True, append=True, **kwargs)
 
-    def append_to_multiple(self, d, value, selector, data_columns=None, axes=None, **kwargs):
+    def append_to_multiple(
+            self, d, value, selector, data_columns=None, axes=None, **kwargs):
         """
         Append to multiple tables
 
@@ -683,13 +705,16 @@ def append_to_multiple(self, d, value, selector, data_columns=None, axes=None, *
 
         """
         if axes is not None:
-            raise Exception("axes is currently not accepted as a paremter to append_to_multiple; you can create the tables indepdently instead")
+            raise Exception(
+                "axes is currently not accepted as a paremter to append_to_multiple; you can create the tables indepdently instead")
 
         if not isinstance(d, dict):
-            raise ValueError("append_to_multiple must have a dictionary specified as the way to split the value")
+            raise ValueError(
+                "append_to_multiple must have a dictionary specified as the way to split the value")
 
         if selector not in d:
-            raise ValueError("append_to_multiple requires a selector that is in passed dict")
+            raise ValueError(
+                "append_to_multiple requires a selector that is in passed dict")
 
         # figure out the splitting axis (the non_index_axis)
         axis = list(set(range(value.ndim)) - set(_AXES_MAP[type(value)]))[0]
@@ -700,7 +725,8 @@ def append_to_multiple(self, d, value, selector, data_columns=None, axes=None, *
         for k, v in d.items():
             if v is None:
                 if remain_key is not None:
-                    raise ValueError("append_to_multiple can only have one value in d that is None")
+                    raise ValueError(
+                        "append_to_multiple can only have one value in d that is None")
                 remain_key = k
             else:
                 remain_values.extend(v)
@@ -741,7 +767,8 @@ def create_table_index(self, key, **kwargs):
             raise Exception("PyTables >= 2.3 is required for table indexing")
 
         s = self.get_storer(key)
-        if s is None: return
+        if s is None:
+            return
 
         if not s.is_table:
             raise TypeError("cannot create table index on a non-table")
@@ -750,8 +777,8 @@ def create_table_index(self, key, **kwargs):
     def groups(self):
         """ return a list of all the top-level nodes (that are not themselves a pandas storage object) """
         _tables()
-        return [ g for g in self._handle.walkNodes() if getattr(g._v_attrs,'pandas_type',None) or getattr(
-            g,'table',None) or (isinstance(g,_table_mod.table.Table) and g._v_name != u'table') ]
+        return [g for g in self._handle.walkNodes() if getattr(g._v_attrs, 'pandas_type', None) or getattr(
+            g, 'table', None) or (isinstance(g, _table_mod.table.Table) and g._v_name != u'table')]
 
     def get_node(self, key):
         """ return the node with the key or None if it does not exist """
@@ -771,8 +798,9 @@ def get_storer(self, key):
         s.infer_axes()
         return s
 
-    def copy(self, file, mode = 'w', propindexes = True, keys = None, complib = None, complevel = None,
-             fletcher32 = False, overwrite = True):
+    def copy(
+        self, file, mode='w', propindexes=True, keys=None, complib=None, complevel=None,
+            fletcher32=False, overwrite=True):
         """ copy the existing store to a new file, upgrading in place
 
             Parameters
@@ -787,13 +815,18 @@ def copy(self, file, mode = 'w', propindexes = True, keys = None, complib = None
             open file handle of the new store
 
         """
-        new_store = HDFStore(file, mode = mode, complib = complib, complevel = complevel, fletcher32 = fletcher32)
+        new_store = HDFStore(
+            file,
+            mode=mode,
+            complib=complib,
+            complevel=complevel,
+            fletcher32=fletcher32)
         if keys is None:
             keys = self.keys()
-        if not isinstance(keys, (tuple,list)):
-            keys = [ keys ]
+        if not isinstance(keys, (tuple, list)):
+            keys = [keys]
         for k in keys:
-            s    = self.get_storer(k)
+            s = self.get_storer(k)
             if s is not None:
 
                 if k in new_store:
@@ -805,35 +838,45 @@ def copy(self, file, mode = 'w', propindexes = True, keys = None, complib = None
 
                     index = False
                     if propindexes:
-                        index = [ a.name for a in s.axes if a.is_indexed ]
-                    new_store.append(k, data, index=index, data_columns=getattr(s,'data_columns',None), encoding=s.encoding)
+                        index = [a.name for a in s.axes if a.is_indexed]
+                    new_store.append(
+                        k,
+                        data,
+                        index=index,
+                        data_columns=getattr(
+                            s,
+                            'data_columns',
+                            None),
+                        encoding=s.encoding)
                 else:
                     new_store.put(k, data, encoding=s.encoding)
 
         return new_store
 
-    ###### private methods ######
+    # private methods ######
 
-    def _create_storer(self, group, value = None, table = False, append = False, **kwargs):
+    def _create_storer(
+            self, group, value=None, table=False, append=False, **kwargs):
         """ return a suitable Storer class to operate """
 
         def error(t):
             raise TypeError("cannot properly create the storer for: [%s] [group->%s,value->%s,table->%s,append->%s,kwargs->%s]" %
-                            (t,group,type(value),table,append,kwargs))
+                            (t, group, type(value), table, append, kwargs))
 
-        pt = _ensure_decoded(getattr(group._v_attrs,'pandas_type',None))
-        tt = _ensure_decoded(getattr(group._v_attrs,'table_type',None))
+        pt = _ensure_decoded(getattr(group._v_attrs, 'pandas_type', None))
+        tt = _ensure_decoded(getattr(group._v_attrs, 'table_type', None))
 
         # infer the pt from the passed value
         if pt is None:
             if value is None:
 
                 _tables()
-                if getattr(group,'table',None) or isinstance(group,_table_mod.table.Table):
+                if getattr(group, 'table', None) or isinstance(group, _table_mod.table.Table):
                     pt = u'frame_table'
                     tt = u'generic_table'
                 else:
-                    raise TypeError("cannot create a storer if the object is not existing nor a value are passed")
+                    raise TypeError(
+                        "cannot create a storer if the object is not existing nor a value are passed")
             else:
 
                 try:
@@ -859,14 +902,14 @@ def error(t):
             if value is not None:
 
                 if pt == u'frame_table':
-                    index = getattr(value,'index',None)
+                    index = getattr(value, 'index', None)
                     if index is not None:
                         if index.nlevels == 1:
                             tt = u'appendable_frame'
                         elif index.nlevels > 1:
                             tt = u'appendable_multiframe'
                 elif pt == u'wide_table':
-                    tt  = u'appendable_panel'
+                    tt = u'appendable_panel'
                 elif pt == u'ndim_table':
                     tt = u'appendable_ndim'
 
@@ -886,8 +929,9 @@ def error(t):
         except:
             error('_TABLE_MAP')
 
-    def _write_to_group(self, key, value, index=True, table=False, append=False,
-                        complib=None, encoding=None, **kwargs):
+    def _write_to_group(
+        self, key, value, index=True, table=False, append=False,
+            complib=None, encoding=None, **kwargs):
         group = self.get_node(key)
 
         # remove the node if we are not appending
@@ -927,16 +971,18 @@ def _write_to_group(self, key, value, index=True, table=False, append=False,
         if not s.is_table and complib:
             raise ValueError('Compression not supported on non-table')
 
-        s.write(obj = value, append=append, complib=complib, **kwargs)
+        s.write(obj=value, append=append, complib=complib, **kwargs)
         if s.is_table and index:
-            s.create_index(columns = index)
+            s.create_index(columns=index)
 
     def _read_group(self, group, **kwargs):
         s = self._create_storer(group)
         s.infer_axes()
         return s.read(**kwargs)
 
+
 class TableIterator(object):
+
     """ define the iteration interface on a table
 
         Parameters
@@ -953,15 +999,16 @@ class TableIterator(object):
         kwargs : the passed kwargs
         """
 
-    def __init__(self, store, func, nrows, start=None, stop=None, chunksize=None, auto_close=False):
+    def __init__(self, store, func, nrows, start=None,
+                 stop=None, chunksize=None, auto_close=False):
         self.store = store
-        self.func  = func
+        self.func = func
         self.nrows = nrows or 0
         self.start = start or 0
 
         if stop is None:
             stop = self.nrows
-        self.stop  = min(self.nrows,stop)
+        self.stop = min(self.nrows, stop)
 
         if chunksize is None:
             chunksize = 100000
@@ -992,7 +1039,9 @@ def get_values(self):
         self.close()
         return results
 
+
 class IndexCol(StringMixin):
+
     """ an index column description class
 
         Parameters
@@ -1008,11 +1057,12 @@ class IndexCol(StringMixin):
     is_an_indexable = True
     is_data_indexable = True
     is_searchable = False
-    _info_fields = ['freq','tz','index_name']
+    _info_fields = ['freq', 'tz', 'index_name']
 
-    def __init__(self, values=None, kind=None, typ=None, cname=None, itemsize=None,
-                 name=None, axis=None, kind_attr=None, pos=None, freq=None, tz=None,
-                 index_name=None, **kwargs):
+    def __init__(
+        self, values=None, kind=None, typ=None, cname=None, itemsize=None,
+        name=None, axis=None, kind_attr=None, pos=None, freq=None, tz=None,
+            index_name=None, **kwargs):
         self.values = values
         self.kind = kind
         self.typ = typ
@@ -1059,7 +1109,13 @@ def set_table(self, table):
         return self
 
     def __unicode__(self):
-        temp = tuple(map(pprint_thing, (self.name, self.cname, self.axis, self.pos, self.kind)))
+        temp = tuple(
+            map(pprint_thing,
+                    (self.name,
+                     self.cname,
+                     self.axis,
+                     self.pos,
+                     self.kind)))
         return "name->%s,cname->%s,axis->%s,pos->%s,kind->%s" % temp
 
     def __eq__(self, other):
@@ -1073,7 +1129,7 @@ def __ne__(self, other):
     def is_indexed(self):
         """ return whether I am an indexed column """
         try:
-            return getattr(self.table.cols,self.cname).is_indexed
+            return getattr(self.table.cols, self.cname).is_indexed
         except:
             False
 
@@ -1095,7 +1151,7 @@ def convert(self, values, nan_rep, encoding):
         except:
             pass
 
-        values =_maybe_convert(values, self.kind, encoding)
+        values = _maybe_convert(values, self.kind, encoding)
 
         kwargs = dict()
         if self.freq is not None:
@@ -1106,15 +1162,22 @@ def convert(self, values, nan_rep, encoding):
             self.values = Index(values, **kwargs)
         except:
 
-            # if the output freq is different that what we recorded, then infer it
+            # if the output freq is different that what we recorded, then infer
+            # it
             if 'freq' in kwargs:
                 kwargs['freq'] = 'infer'
-            self.values = Index(_maybe_convert(values, self.kind, encoding), **kwargs)
+            self.values = Index(
+                _maybe_convert(
+                    values,
+                    self.kind,
+                    encoding),
+                **kwargs)
 
         # set the timezone if indicated
         # we stored in utc, so reverse to local timezone
         if self.tz is not None:
-            self.values = self.values.tz_localize('UTC').tz_convert(_ensure_decoded(self.tz))
+            self.values = self.values.tz_localize(
+                'UTC').tz_convert(_ensure_decoded(self.tz))
 
         return self
 
@@ -1177,7 +1240,7 @@ def validate_col(self, itemsize=None):
                     raise ValueError("Trying to store a string with len [%s] in [%s] column but\n"
                                      "this column has a limit of [%s]!\n"
                                      "Consider using min_itemsize to preset the sizes on these columns"
-                                     % (itemsize,self.cname, c.itemsize))
+                                     % (itemsize, self.cname, c.itemsize))
                 return c.itemsize
 
         return None
@@ -1196,7 +1259,7 @@ def update_info(self, info):
 
         for key in self._info_fields:
 
-            value = getattr(self,key,None)
+            value = getattr(self, key, None)
 
             try:
                 idx = info[self.name]
@@ -1207,18 +1270,18 @@ def update_info(self, info):
             if key in idx and value is not None and existing_value != value:
 
                 # frequency/name just warn
-                if key in ['freq','index_name']:
-                    ws = attribute_conflict_doc % (key,existing_value,value)
+                if key in ['freq', 'index_name']:
+                    ws = attribute_conflict_doc % (key, existing_value, value)
                     warnings.warn(ws, AttributeConflictWarning)
 
                     # reset
                     idx[key] = None
-                    setattr(self,key,None)
+                    setattr(self, key, None)
 
                 else:
                     raise ValueError("invalid info for [%s] for [%s]"""
                                      ", existing_value [%s] conflicts with new value [%s]" % (self.name,
-                                                                                              key,existing_value,value))
+                                                                                              key, existing_value, value))
             else:
                 if value is not None or existing_value is not None:
                     idx[key] = value
@@ -1239,7 +1302,9 @@ def set_attr(self):
         """ set the kind for this colummn """
         setattr(self.attrs, self.kind_attr, self.kind)
 
+
 class GenericIndexCol(IndexCol):
+
     """ an index which is not represented in the data of the table """
 
     @property
@@ -1258,7 +1323,9 @@ def get_attr(self):
     def set_attr(self):
         pass
 
+
 class DataCol(IndexCol):
+
     """ a data holding column, by definition this is not indexable
 
         Parameters
@@ -1273,7 +1340,8 @@ class DataCol(IndexCol):
     _info_fields = ['tz']
 
     @classmethod
-    def create_for_block(cls, i=None, name=None, cname=None, version=None, **kwargs):
+    def create_for_block(
+            cls, i=None, name=None, cname=None, version=None, **kwargs):
         """ return a new datacol with the block i """
 
         if cname is None:
@@ -1293,7 +1361,8 @@ def create_for_block(cls, i=None, name=None, cname=None, version=None, **kwargs)
 
         return cls(name=name, cname=cname, **kwargs)
 
-    def __init__(self, values=None, kind=None, typ=None, cname=None, data=None, block=None, **kwargs):
+    def __init__(self, values=None, kind=None, typ=None,
+                 cname=None, data=None, block=None, **kwargs):
         super(DataCol, self).__init__(
             values=values, kind=kind, typ=typ, cname=cname, **kwargs)
         self.dtype = None
@@ -1337,13 +1406,16 @@ def set_kind(self):
             elif dtype.startswith(u'bool'):
                 self.kind = 'bool'
             else:
-                raise AssertionError("cannot interpret dtype of [%s] in [%s]" % (dtype,self))
+                raise AssertionError(
+                    "cannot interpret dtype of [%s] in [%s]" %
+                    (dtype, self))
 
             # set my typ if we need
             if self.typ is None:
-                self.typ = getattr(self.description,self.cname,None)
+                self.typ = getattr(self.description, self.cname, None)
 
-    def set_atom(self, block, existing_col, min_itemsize, nan_rep, info, encoding=None, **kwargs):
+    def set_atom(self, block, existing_col, min_itemsize,
+                 nan_rep, info, encoding=None, **kwargs):
         """ create and setup my atom from the block b """
 
         self.values = list(block.items)
@@ -1357,7 +1429,7 @@ def set_atom(self, block, existing_col, min_itemsize, nan_rep, info, encoding=No
             raise TypeError(
                 "[date] is not implemented as a table column")
         elif inferred_type == 'datetime':
-            if getattr(rvalues[0],'tzinfo',None) is not None:
+            if getattr(rvalues[0], 'tzinfo', None) is not None:
 
                 # if this block has more than one timezone, raise
                 if len(set([r.tzinfo for r in rvalues])) != 1:
@@ -1366,7 +1438,7 @@ def set_atom(self, block, existing_col, min_itemsize, nan_rep, info, encoding=No
 
                 # convert this column to datetime64[ns] utc, and save the tz
                 index = DatetimeIndex(rvalues)
-                tz = getattr(index,'tz',None)
+                tz = getattr(index, 'tz', None)
                 if tz is None:
                     raise TypeError(
                         "invalid timezone specification")
@@ -1380,7 +1452,9 @@ def set_atom(self, block, existing_col, min_itemsize, nan_rep, info, encoding=No
                 self.tz = zone
 
                 self.update_info(info)
-                self.set_atom_datetime64(block, values.reshape(block.values.shape))
+                self.set_atom_datetime64(
+                    block,
+                    values.reshape(block.values.shape))
 
             else:
                 raise TypeError(
@@ -1392,7 +1466,12 @@ def set_atom(self, block, existing_col, min_itemsize, nan_rep, info, encoding=No
         # this is basically a catchall; if say a datetime64 has nans then will
         # end up here ###
         elif inferred_type == 'string' or dtype == 'object':
-            self.set_atom_string(block, existing_col, min_itemsize, nan_rep, encoding)
+            self.set_atom_string(
+                block,
+                existing_col,
+                min_itemsize,
+                nan_rep,
+                encoding)
         else:
             self.set_atom_data(block)
 
@@ -1401,16 +1480,18 @@ def set_atom(self, block, existing_col, min_itemsize, nan_rep, info, encoding=No
     def get_atom_string(self, block, itemsize):
         return _tables().StringCol(itemsize=itemsize, shape=block.shape[0])
 
-    def set_atom_string(self, block, existing_col, min_itemsize, nan_rep, encoding):
+    def set_atom_string(
+            self, block, existing_col, min_itemsize, nan_rep, encoding):
         # fill nan items with myself
         block = block.fillna(nan_rep)
-        data  = block.values
+        data = block.values
 
         # see if we have a valid string type
         inferred_type = lib.infer_dtype(data.ravel())
         if inferred_type != 'string':
 
-            # we cannot serialize this data, so report an exception on a column by column basis
+            # we cannot serialize this data, so report an exception on a column
+            # by column basis
             for item in block.items:
 
                 col = block.get(item)
@@ -1418,8 +1499,7 @@ def set_atom_string(self, block, existing_col, min_itemsize, nan_rep, encoding):
                 if inferred_type != 'string':
                     raise TypeError("Cannot serialize the column [%s] because\n"
                                     "its data contents are [%s] object dtype" %
-                                    (item,inferred_type))
-
+                                    (item, inferred_type))
 
         # itemsize is the maximum length of a string (along any dimension)
         itemsize = lib.max_len_string_array(com._ensure_object(data.ravel()))
@@ -1464,7 +1544,7 @@ def set_atom_data(self, block):
     def get_atom_datetime64(self, block):
         return _tables().Int64Col(shape=block.shape[0])
 
-    def set_atom_datetime64(self, block, values = None):
+    def set_atom_datetime64(self, block, values=None):
         self.kind = 'datetime64'
         self.typ = self.get_atom_datetime64(block)
         if values is None:
@@ -1487,13 +1567,13 @@ def validate_attr(self, append):
             if (existing_fields is not None and
                     existing_fields != list(self.values)):
                 raise ValueError("appended items do not match existing items"
-                                " in table!")
+                                 " in table!")
 
             existing_dtype = getattr(self.attrs, self.dtype_attr, None)
             if (existing_dtype is not None and
                     existing_dtype != self.dtype):
                 raise ValueError("appended items dtype do not match existing items dtype"
-                                " in table!")
+                                 " in table!")
 
     def convert(self, values, nan_rep, encoding):
         """ set the data from this selection (and convert to the correct dtype if we can) """
@@ -1515,8 +1595,12 @@ def convert(self, values, nan_rep, encoding):
                     # data should be 2-dim here
                     # we stored as utc, so just set the tz
 
-                    index = DatetimeIndex(self.data.ravel(),tz='UTC').tz_convert(self.tz)
-                    self.data = np.array(index.tolist(),dtype=object).reshape(self.data.shape)
+                    index = DatetimeIndex(
+                        self.data.ravel(),
+                        tz='UTC').tz_convert(self.tz)
+                    self.data = np.array(
+                        index.tolist(),
+                        dtype=object).reshape(self.data.shape)
 
                 else:
                     self.data = np.asarray(self.data, dtype='M8[ns]')
@@ -1537,14 +1621,17 @@ def convert(self, values, nan_rep, encoding):
 
         # convert nans / decode
         if _ensure_decoded(self.kind) == u'string':
-            self.data = _unconvert_string_array(self.data, nan_rep=nan_rep, encoding=encoding)
+            self.data = _unconvert_string_array(
+                self.data,
+                nan_rep=nan_rep,
+                encoding=encoding)
 
         return self
 
     def get_attr(self):
         """ get the data for this colummn """
         self.values = getattr(self.attrs, self.kind_attr, None)
-        self.dtype  = getattr(self.attrs, self.dtype_attr, None)
+        self.dtype = getattr(self.attrs, self.dtype_attr, None)
         self.set_kind()
 
     def set_attr(self):
@@ -1555,6 +1642,7 @@ def set_attr(self):
 
 
 class DataIndexableCol(DataCol):
+
     """ represent a data column that can be indexed """
     is_data_indexable = True
 
@@ -1571,13 +1659,17 @@ def get_atom_data(self, block):
     def get_atom_datetime64(self, block):
         return _tables().Int64Col()
 
+
 class GenericDataIndexableCol(DataIndexableCol):
+
     """ represent a generic pytables data column """
 
     def get_attr(self):
         pass
 
+
 class Storer(StringMixin):
+
     """ represent an object in my store
           facilitate read/write of various types of objects
           this is an abstract base class
@@ -1589,14 +1681,14 @@ class Storer(StringMixin):
         group  : the group node where the table resides
         """
     pandas_kind = None
-    obj_type    = None
-    ndim        = None
-    is_table    = False
+    obj_type = None
+    ndim = None
+    is_table = False
 
     def __init__(self, parent, group, encoding=None, **kwargs):
-        self.parent      = parent
-        self.group       = group
-        self.encoding    = _ensure_encoding(encoding)
+        self.parent = parent
+        self.group = group
+        self.encoding = _ensure_encoding(encoding)
         self.set_version()
 
     @property
@@ -1605,7 +1697,11 @@ def is_old_version(self):
 
     def set_version(self):
         """ compute and set our version """
-        version = _ensure_decoded(getattr(self.group._v_attrs,'pandas_version',None))
+        version = _ensure_decoded(
+            getattr(
+                self.group._v_attrs,
+                'pandas_version',
+                None))
         try:
             self.version = tuple([int(x) for x in version.split('.')])
             if len(self.version) == 2:
@@ -1622,9 +1718,9 @@ def __unicode__(self):
         self.infer_axes()
         s = self.shape
         if s is not None:
-            if isinstance(s, (list,tuple)):
+            if isinstance(s, (list, tuple)):
                 s = "[%s]" % ','.join([pprint_thing(x) for x in s])
-            return "%-12.12s (shape->%s)" % (self.pandas_type,s)
+            return "%-12.12s (shape->%s)" % (self.pandas_type, s)
         return self.pandas_type
 
     def __str__(self):
@@ -1695,14 +1791,15 @@ def is_exists(self):
 
     @property
     def nrows(self):
-        return getattr(self.storable,'nrows',None)
+        return getattr(self.storable, 'nrows', None)
 
     def validate(self, other):
         """ validate against an existing storable """
-        if other is None: return
+        if other is None:
+            return
         return True
 
-    def validate_version(self, where = None):
+    def validate_version(self, where=None):
         """ are we trying to operate on an old version? """
         return True
 
@@ -1717,12 +1814,14 @@ def infer_axes(self):
         return True
 
     def read(self, **kwargs):
-        raise NotImplementedError("cannot read on an abstract storer: subclasses should implement")
+        raise NotImplementedError(
+            "cannot read on an abstract storer: subclasses should implement")
 
     def write(self, **kwargs):
-        raise NotImplementedError("cannot write on an abstract storer: sublcasses should implement")
+        raise NotImplementedError(
+            "cannot write on an abstract storer: sublcasses should implement")
 
-    def delete(self, where = None, **kwargs):
+    def delete(self, where=None, **kwargs):
         """ support fully deleting the node in its entirety (only) - where specification must be None """
         if where is None:
             self._handle.removeNode(self.group, recursive=True)
@@ -1730,11 +1829,14 @@ def delete(self, where = None, **kwargs):
 
         raise TypeError("cannot delete on an abstract storer")
 
+
 class GenericStorer(Storer):
+
     """ a generified storer version """
-    _index_type_map    = { DatetimeIndex: 'datetime',
-                           PeriodIndex: 'period'}
-    _reverse_index_map = dict([ (v,k) for k, v in _index_type_map.iteritems() ])
+    _index_type_map = {DatetimeIndex: 'datetime',
+                       PeriodIndex: 'period'}
+    _reverse_index_map = dict([(v, k)
+                              for k, v in _index_type_map.iteritems()])
     attributes = []
 
     # indexer helpders
@@ -1756,9 +1858,11 @@ def f(values, freq=None, tz=None):
 
     def validate_read(self, kwargs):
         if kwargs.get('columns') is not None:
-            raise TypeError("cannot pass a column specification when reading a Storer")
+            raise TypeError(
+                "cannot pass a column specification when reading a Storer")
         if kwargs.get('where') is not None:
-            raise TypeError("cannot pass a where specification when reading a Storer")
+            raise TypeError(
+                "cannot pass a where specification when reading a Storer")
 
     @property
     def is_exists(self):
@@ -1770,9 +1874,9 @@ def set_attrs(self):
 
     def get_attrs(self):
         """ retrieve our attributes """
-        self.encoding = _ensure_encoding(getattr(self.attrs,'encoding',None))
+        self.encoding = _ensure_encoding(getattr(self.attrs, 'encoding', None))
         for n in self.attributes:
-            setattr(self,n,_ensure_decoded(getattr(self.attrs, n, None)))
+            setattr(self, n, _ensure_decoded(getattr(self.attrs, n, None)))
 
     def write(self, obj, **kwargs):
         self.set_attrs()
@@ -1833,7 +1937,7 @@ def write_index(self, key, index):
             self.write_sparse_intindex(key, index)
         else:
             setattr(self.attrs, '%s_variety' % key, 'regular')
-            converted = _convert_index(index,self.encoding).set_name('index')
+            converted = _convert_index(index, self.encoding).set_name('index')
             self.write_array(key, converted.values)
             node = getattr(self.group, key)
             node._v_attrs.kind = converted.kind
@@ -1851,7 +1955,6 @@ def write_index(self, key, index):
                     zone = tslib.tot_seconds(index.tz.utcoffset())
                 node._v_attrs.tz = zone
 
-
     def write_block_index(self, key, index):
         self.write_array('%s_blocs' % key, index.blocs)
         self.write_array('%s_blengths' % key, index.blengths)
@@ -1931,10 +2034,15 @@ def read_index_node(self, node):
             kwargs['tz'] = node._v_attrs['tz']
 
         if kind in (u'date', u'datetime'):
-            index = factory(_unconvert_index(data, kind, encoding=self.encoding), dtype=object,
-                            **kwargs)
+            index = factory(
+                _unconvert_index(data, kind, encoding=self.encoding), dtype=object,
+                **kwargs)
         else:
-            index = factory(_unconvert_index(data, kind, encoding=self.encoding), **kwargs)
+            index = factory(
+                _unconvert_index(data,
+                                 kind,
+                                 encoding=self.encoding),
+                **kwargs)
 
         index.name = name
 
@@ -1985,7 +2093,8 @@ def write_array(self, key, value, items=None):
 
         if value.dtype.type == np.object_:
 
-            # infer the type, warn if we have a non-string type here (for performance)
+            # infer the type, warn if we have a non-string type here (for
+            # performance)
             inferred_type = lib.infer_dtype(value.ravel())
             if empty_array:
                 pass
@@ -1996,11 +2105,11 @@ def write_array(self, key, value, items=None):
                     items = list(items)
                 except:
                     pass
-                ws = performance_doc % (inferred_type,key,items)
+                ws = performance_doc % (inferred_type, key, items)
                 warnings.warn(ws, PerformanceWarning)
 
             vlarr = self._handle.createVLArray(self.group, key,
-                                              _tables().ObjectAtom())
+                                               _tables().ObjectAtom())
             vlarr.append(value)
         elif value.dtype.type == np.datetime64:
             self._handle.createArray(self.group, key, value.view('i8'))
@@ -2013,14 +2122,16 @@ def write_array(self, key, value, items=None):
 
         getattr(self.group, key)._v_attrs.transposed = transposed
 
+
 class LegacyStorer(GenericStorer):
 
     def read_index_legacy(self, key):
-        node = getattr(self.group,key)
+        node = getattr(self.group, key)
         data = node[:]
         kind = node._v_attrs.kind
         return _unconvert_index_legacy(data, kind, encoding=self.encoding)
 
+
 class LegacySeriesStorer(LegacyStorer):
 
     def read(self, **kwargs):
@@ -2029,6 +2140,7 @@ def read(self, **kwargs):
         values = self.read_array('values')
         return Series(values, index=index)
 
+
 class LegacyFrameStorer(LegacyStorer):
 
     def read(self, **kwargs):
@@ -2038,6 +2150,7 @@ def read(self, **kwargs):
         values = self.read_array('values')
         return DataFrame(values, index=index, columns=columns)
 
+
 class SeriesStorer(GenericStorer):
     pandas_kind = u'series'
     attributes = ['name']
@@ -2045,7 +2158,7 @@ class SeriesStorer(GenericStorer):
     @property
     def shape(self):
         try:
-            return len(getattr(self.group,'values')),
+            return len(getattr(self.group, 'values')),
         except:
             return None
 
@@ -2065,9 +2178,10 @@ def write(self, obj, **kwargs):
         self.write_array('values', obj.values)
         self.attrs.name = obj.name
 
+
 class SparseSeriesStorer(GenericStorer):
     pandas_kind = u'sparse_series'
-    attributes = ['name','fill_value','kind']
+    attributes = ['name', 'fill_value', 'kind']
 
     def read(self, **kwargs):
         self.validate_read(kwargs)
@@ -2087,9 +2201,10 @@ def write(self, obj, **kwargs):
         self.attrs.fill_value = obj.fill_value
         self.attrs.kind = obj.kind
 
+
 class SparseFrameStorer(GenericStorer):
     pandas_kind = u'sparse_frame'
-    attributes = ['default_kind','default_fill_value']
+    attributes = ['default_kind', 'default_fill_value']
 
     def read(self, **kwargs):
         self.validate_read(kwargs)
@@ -2097,7 +2212,7 @@ def read(self, **kwargs):
         sdict = {}
         for c in columns:
             key = 'sparse_series_%s' % c
-            s = SparseSeriesStorer(self.parent, getattr(self.group,key))
+            s = SparseSeriesStorer(self.parent, getattr(self.group, key))
             s.infer_axes()
             sdict[c] = s.read()
         return SparseDataFrame(sdict, columns=columns,
@@ -2116,12 +2231,13 @@ def write(self, obj, **kwargs):
             s = SparseSeriesStorer(self.parent, node)
             s.write(ss)
         self.attrs.default_fill_value = obj.default_fill_value
-        self.attrs.default_kind       = obj.default_kind
+        self.attrs.default_kind = obj.default_kind
         self.write_index('columns', obj.columns)
 
+
 class SparsePanelStorer(GenericStorer):
     pandas_kind = u'sparse_panel'
-    attributes = ['default_kind','default_fill_value']
+    attributes = ['default_kind', 'default_fill_value']
 
     def read(self, **kwargs):
         self.validate_read(kwargs)
@@ -2131,7 +2247,7 @@ def read(self, **kwargs):
         for name in items:
             key = 'sparse_frame_%s' % name
             node = getattr(self.group, key)
-            s = SparseFrameStorer(self.parent, getattr(self.group,key))
+            s = SparseFrameStorer(self.parent, getattr(self.group, key))
             s.infer_axes()
             sdict[name] = s.read()
         return SparsePanel(sdict, items=items, default_kind=self.default_kind,
@@ -2140,7 +2256,7 @@ def read(self, **kwargs):
     def write(self, obj, **kwargs):
         super(SparsePanelStorer, self).write(obj, **kwargs)
         self.attrs.default_fill_value = obj.default_fill_value
-        self.attrs.default_kind       = obj.default_kind
+        self.attrs.default_kind = obj.default_kind
         self.write_index('items', obj.items)
 
         for name, sdf in obj.iterkv():
@@ -2152,8 +2268,9 @@ def write(self, obj, **kwargs):
             s = SparseFrameStorer(self.parent, node)
             s.write(sdf)
 
+
 class BlockManagerStorer(GenericStorer):
-    attributes = ['ndim','nblocks']
+    attributes = ['ndim', 'nblocks']
     is_shape_reversed = False
 
     @property
@@ -2165,15 +2282,15 @@ def shape(self):
             items = 0
             for i in range(self.nblocks):
                 node = getattr(self.group, 'block%d_items' % i)
-                shape = getattr(node,'shape',None)
+                shape = getattr(node, 'shape', None)
                 if shape is not None:
                     items += shape[0]
 
             # data shape
             node = getattr(self.group, 'block0_values')
-            shape = getattr(node,'shape',None)
+            shape = getattr(node, 'shape', None)
             if shape is not None:
-                shape = list(shape[0:(ndim-1)])
+                shape = list(shape[0:(ndim - 1)])
             else:
                 shape = []
 
@@ -2223,20 +2340,24 @@ def write(self, obj, **kwargs):
             self.write_array('block%d_values' % i, blk.values, items=blk.items)
             self.write_index('block%d_items' % i, blk.items)
 
+
 class FrameStorer(BlockManagerStorer):
     pandas_kind = u'frame'
-    obj_type    = DataFrame
+    obj_type = DataFrame
+
 
 class PanelStorer(BlockManagerStorer):
     pandas_kind = u'wide'
-    obj_type    = Panel
+    obj_type = Panel
     is_shape_reversed = True
 
     def write(self, obj, **kwargs):
         obj._consolidate_inplace()
         return super(PanelStorer, self).write(obj, **kwargs)
 
+
 class Table(Storer):
+
     """ represent a table:
           facilitate read/write of various types of tables
 
@@ -2254,20 +2375,20 @@ class Table(Storer):
 
         """
     pandas_kind = u'wide_table'
-    table_type  = None
-    levels      = 1
-    is_table    = True
+    table_type = None
+    levels = 1
+    is_table = True
     is_shape_reversed = False
 
     def __init__(self, *args, **kwargs):
         super(Table, self).__init__(*args, **kwargs)
-        self.index_axes     = []
+        self.index_axes = []
         self.non_index_axes = []
-        self.values_axes    = []
-        self.data_columns   = []
-        self.info           = dict()
-        self.nan_rep        = None
-        self.selection      = None
+        self.values_axes = []
+        self.data_columns = []
+        self.info = dict()
+        self.nan_rep = None
+        self.selection = None
 
     @property
     def table_type_short(self):
@@ -2276,18 +2397,21 @@ def table_type_short(self):
     def __repr__(self):
         """ return a pretty representatgion of myself """
         self.infer_axes()
-        dc = ",dc->[%s]" % ','.join(self.data_columns) if len(self.data_columns) else ''
+        dc = ",dc->[%s]" % ','.join(
+            self.data_columns) if len(
+                self.data_columns) else ''
 
         ver = ''
         if self.is_old_version:
-            ver = "[%s]" % '.'.join([ str(x) for x in self.version ])
+            ver = "[%s]" % '.'.join([str(x) for x in self.version])
 
         return "%-12.12s%s (typ->%s,nrows->%s,ncols->%s,indexers->[%s]%s)" % (self.pandas_type,
                                                                               ver,
                                                                               self.table_type_short,
                                                                               self.nrows,
                                                                               self.ncols,
-                                                                              ','.join([ a.name for a in self.index_axes ]),
+                                                                              ','.join(
+                                                                                  [a.name for a in self.index_axes]),
                                                                               dc)
 
     def __getitem__(self, c):
@@ -2299,30 +2423,35 @@ def __getitem__(self, c):
 
     def validate(self, other):
         """ validate against an existing table """
-        if other is None: return
+        if other is None:
+            return
 
         if other.table_type != self.table_type:
             raise TypeError("incompatible table_type with existing [%s - %s]" %
                             (other.table_type, self.table_type))
 
-        for c in ['index_axes','non_index_axes','values_axes']:
-            sv = getattr(self,c,None)
-            ov = getattr(other,c,None)
+        for c in ['index_axes', 'non_index_axes', 'values_axes']:
+            sv = getattr(self, c, None)
+            ov = getattr(other, c, None)
             if sv != ov:
 
                 # show the error for the specific axes
                 for i, sax in enumerate(sv):
                     oax = ov[i]
                     if sax != oax:
-                        raise ValueError("invalid combinate of [%s] on appending data [%s] vs current table [%s]" % (c,sax,oax))
+                        raise ValueError(
+                            "invalid combinate of [%s] on appending data [%s] vs current table [%s]" %
+                            (c, sax, oax))
 
                 # should never get here
-                raise Exception("invalid combinate of [%s] on appending data [%s] vs current table [%s]" % (c,sv,ov))
+                raise Exception(
+                    "invalid combinate of [%s] on appending data [%s] vs current table [%s]" %
+                    (c, sv, ov))
 
     @property
     def nrows_expected(self):
         """ based on our axes, compute the expected nrows """
-        return np.prod([ i.cvalues.shape[0] for i in self.index_axes ])
+        return np.prod([i.cvalues.shape[0] for i in self.index_axes])
 
     @property
     def is_exists(self):
@@ -2331,7 +2460,7 @@ def is_exists(self):
 
     @property
     def storable(self):
-        return getattr(self.group,'table',None)
+        return getattr(self.group, 'table', None)
 
     @property
     def table(self):
@@ -2353,7 +2482,7 @@ def axes(self):
     @property
     def ncols(self):
         """ the number of total columns in the values axes """
-        return sum([ len(a.values) for a in self.values_axes ])
+        return sum([len(a.values) for a in self.values_axes])
 
     @property
     def is_transposed(self):
@@ -2370,7 +2499,8 @@ def queryables(self):
         # compute the values_axes queryables
         return dict([(a.cname, a.kind) for a in self.index_axes] +
                     [(self.obj_type._AXIS_NAMES[axis], None) for axis, values in self.non_index_axes] +
-                    [(v.cname, v.kind) for v in self.values_axes if v.name in set(self.data_columns)]
+                    [(v.cname, v.kind)
+                     for v in self.values_axes if v.name in set(self.data_columns)]
                     )
 
     def index_cols(self):
@@ -2383,44 +2513,62 @@ def values_cols(self):
 
     def set_info(self):
         """ update our table index info """
-        self.attrs.info         = self.info
+        self.attrs.info = self.info
 
     def set_attrs(self):
         """ set our table type & indexables """
-        self.attrs.table_type   = self.table_type
-        self.attrs.index_cols   = self.index_cols()
-        self.attrs.values_cols  = self.values_cols()
+        self.attrs.table_type = self.table_type
+        self.attrs.index_cols = self.index_cols()
+        self.attrs.values_cols = self.values_cols()
         self.attrs.non_index_axes = self.non_index_axes
         self.attrs.data_columns = self.data_columns
-        self.attrs.nan_rep      = self.nan_rep
-        self.attrs.encoding     = self.encoding
-        self.attrs.levels       = self.levels
+        self.attrs.nan_rep = self.nan_rep
+        self.attrs.encoding = self.encoding
+        self.attrs.levels = self.levels
         self.set_info()
 
     def get_attrs(self):
         """ retrieve our attributes """
-        self.non_index_axes   = getattr(self.attrs,'non_index_axes',None) or []
-        self.data_columns     = getattr(self.attrs,'data_columns',None)   or []
-        self.info             = getattr(self.attrs,'info',None)           or dict()
-        self.nan_rep          = getattr(self.attrs,'nan_rep',None)
-        self.encoding         = _ensure_encoding(getattr(self.attrs,'encoding',None))
-        self.levels           = getattr(self.attrs,'levels',None)         or []
+        self.non_index_axes = getattr(
+            self.attrs,
+            'non_index_axes',
+            None) or []
+        self.data_columns = getattr(
+            self.attrs,
+            'data_columns',
+            None) or []
+        self.info = getattr(
+            self.attrs,
+            'info',
+            None) or dict()
+        self.nan_rep = getattr(self.attrs, 'nan_rep', None)
+        self.encoding = _ensure_encoding(
+            getattr(self.attrs, 'encoding', None))
+        self.levels = getattr(
+            self.attrs,
+            'levels',
+            None) or []
         t = self.table
-        self.index_axes       = [ a.infer(t) for a in self.indexables if     a.is_an_indexable ]
-        self.values_axes      = [ a.infer(t) for a in self.indexables if not a.is_an_indexable ]
+        self.index_axes = [a.infer(t)
+                           for a in self.indexables if a.is_an_indexable]
+        self.values_axes = [a.infer(t)
+                            for a in self.indexables if not a.is_an_indexable]
 
-    def validate_version(self, where = None):
+    def validate_version(self, where=None):
         """ are we trying to operate on an old version? """
         if where is not None:
             if self.version[0] <= 0 and self.version[1] <= 10 and self.version[2] < 1:
-                ws = incompatibility_doc % '.'.join([ str(x) for x in self.version ])
+                ws = incompatibility_doc % '.'.join(
+                    [str(x) for x in self.version])
                 warnings.warn(ws, IncompatibilityWarning)
 
     def validate_min_itemsize(self, min_itemsize):
         """ validate the min_itemisze doesn't contain items that are not in the axes
             this needs data_columns to be defined """
-        if min_itemsize is None: return
-        if not isinstance(min_itemsize, dict): return
+        if min_itemsize is None:
+            return
+        if not isinstance(min_itemsize, dict):
+            return
 
         q = self.queryables()
         for k, v in min_itemsize.items():
@@ -2429,7 +2577,9 @@ def validate_min_itemsize(self, min_itemsize):
             if k == 'values':
                 continue
             if k not in q:
-                raise ValueError("min_itemsize has the key [%s] which is not an axis or data_column" % k)
+                raise ValueError(
+                    "min_itemsize has the key [%s] which is not an axis or data_column" %
+                    k)
 
     @property
     def indexables(self):
@@ -2440,7 +2590,8 @@ def indexables(self):
             self._indexables = []
 
             # index columns
-            self._indexables.extend([ IndexCol(name=name,axis=axis,pos=i) for i, (axis, name) in enumerate(self.attrs.index_cols)])
+            self._indexables.extend([IndexCol(name=name, axis=axis, pos=i)
+                                    for i, (axis, name) in enumerate(self.attrs.index_cols)])
 
             # values columns
             dc = set(self.data_columns)
@@ -2558,15 +2709,17 @@ def validate_data_columns(self, data_columns, min_itemsize):
             data_columns = []
 
         # if min_itemsize is a dict, add the keys (exclude 'values')
-        if isinstance(min_itemsize,dict):
+        if isinstance(min_itemsize, dict):
 
             existing_data_columns = set(data_columns)
-            data_columns.extend([ k for k in min_itemsize.keys() if k != 'values' and k not in existing_data_columns ])
+            data_columns.extend(
+                [k for k in min_itemsize.keys() if k != 'values' and k not in existing_data_columns])
 
         # return valid columns in the order of our axis
         return [c for c in data_columns if c in axis_labels]
 
-    def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, min_itemsize=None, **kwargs):
+    def create_axes(self, axes, obj, validate=True, nan_rep=None,
+                    data_columns=None, min_itemsize=None, **kwargs):
         """ create and return the axes
               leagcy tables create an indexable column, indexable index, non-indexable fields
 
@@ -2588,7 +2741,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
                 axes = _AXES_MAP[type(obj)]
             except:
                 raise TypeError("cannot properly create the storer for: [group->%s,value->%s]" %
-                                (self.group._v_name,type(obj)))
+                                (self.group._v_name, type(obj)))
 
         # map axes to numbers
         axes = [obj._get_axis_number(a) for a in axes]
@@ -2597,17 +2750,18 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
         if self.infer_axes():
             existing_table = self.copy()
             existing_table.infer_axes()
-            axes          = [ a.axis for a in existing_table.index_axes]
-            data_columns  = existing_table.data_columns
-            nan_rep       = existing_table.nan_rep
+            axes = [a.axis for a in existing_table.index_axes]
+            data_columns = existing_table.data_columns
+            nan_rep = existing_table.nan_rep
             self.encoding = existing_table.encoding
-            self.info     = copy.copy(existing_table.info)
+            self.info = copy.copy(existing_table.info)
         else:
             existing_table = None
 
         # currently support on ndim-1 axes
         if len(axes) != self.ndim - 1:
-            raise ValueError("currently only support ndim-1 indexers in an AppendableTable")
+            raise ValueError(
+                "currently only support ndim-1 indexers in an AppendableTable")
 
         # create according to the new data
         self.non_index_axes = []
@@ -2644,8 +2798,9 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
                 self.non_index_axes.append((i, append_axis))
 
         # set axis positions (based on the axes)
-        self.index_axes = [index_axes_map[a].set_pos(j).update_info(self.info) for j,
-                           a in enumerate(axes)]
+        self.index_axes = [index_axes_map[a].set_pos(
+            j).update_info(self.info) for j,
+            a in enumerate(axes)]
         j = len(self.index_axes)
 
         # check for column conflicts
@@ -2662,17 +2817,18 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
         blocks = block_obj._data.blocks
         if len(self.non_index_axes):
             axis, axis_labels = self.non_index_axes[0]
-            data_columns = self.validate_data_columns(data_columns, min_itemsize)
+            data_columns = self.validate_data_columns(
+                data_columns, min_itemsize)
             if len(data_columns):
                 blocks = block_obj.reindex_axis(Index(axis_labels) - Index(
-                        data_columns), axis=axis, copy=False)._data.blocks
+                    data_columns), axis=axis, copy=False)._data.blocks
                 for c in data_columns:
                     blocks.extend(block_obj.reindex_axis(
-                            [c], axis=axis, copy=False)._data.blocks)
+                        [c], axis=axis, copy=False)._data.blocks)
 
         # reorder the blocks in the same order as the existing_table if we can
         if existing_table is not None:
-            by_items = dict([ (tuple(b.items.tolist()),b) for b in blocks ])
+            by_items = dict([(tuple(b.items.tolist()), b) for b in blocks])
             new_blocks = []
             for ea in existing_table.values_axes:
                 items = tuple(ea.values)
@@ -2680,7 +2836,9 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
                     b = by_items.pop(items)
                     new_blocks.append(b)
                 except:
-                    raise ValueError("cannot match existing table structure for [%s] on appending data" % items)
+                    raise ValueError(
+                        "cannot match existing table structure for [%s] on appending data" %
+                        items)
             blocks = new_blocks
 
         # add my values
@@ -2704,7 +2862,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
                     existing_col = existing_table.values_axes[i]
                 except:
                     raise ValueError("Incompatible appended table [%s] with existing table [%s]" %
-                                    (blocks,existing_table.values_axes))
+                                    (blocks, existing_table.values_axes))
             else:
                 existing_col = None
 
@@ -2721,10 +2879,12 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
                 col.set_pos(j)
 
                 self.values_axes.append(col)
-            except (NotImplementedError, ValueError, TypeError), e:
+            except (NotImplementedError, ValueError, TypeError) as e:
                 raise e
-            except (Exception), detail:
-                raise Exception("cannot find the correct atom type -> [dtype->%s,items->%s] %s" % (b.dtype.name, b.items, str(detail)))
+            except (Exception) as detail:
+                raise Exception(
+                    "cannot find the correct atom type -> [dtype->%s,items->%s] %s" %
+                    (b.dtype.name, b.items, str(detail)))
             j += 1
 
         # validate our min_itemsize
@@ -2755,29 +2915,32 @@ def process_filter(field, filt):
 
                         # see if the field is the name of an axis
                         if field == axis_name:
-                            takers = op(axis_values,filt)
-                            return obj.ix._getitem_axis(takers,axis=axis_number)
+                            takers = op(axis_values, filt)
+                            return obj.ix._getitem_axis(takers, axis=axis_number)
 
                         # this might be the name of a file IN an axis
                         elif field in axis_values:
 
                             # we need to filter on this dimension
-                            values = _ensure_index(getattr(obj,field).values)
-                            filt   = _ensure_index(filt)
+                            values = _ensure_index(getattr(obj, field).values)
+                            filt = _ensure_index(filt)
 
                             # hack until we support reversed dim flags
-                            if isinstance(obj,DataFrame):
-                                axis_number = 1-axis_number
-                            takers = op(values,filt)
-                            return obj.ix._getitem_axis(takers,axis=axis_number)
+                            if isinstance(obj, DataFrame):
+                                axis_number = 1 - axis_number
+                            takers = op(values, filt)
+                            return obj.ix._getitem_axis(takers, axis=axis_number)
 
-                    raise ValueError("cannot find the field [%s] for filtering!" % field)
+                    raise ValueError(
+                        "cannot find the field [%s] for filtering!" %
+                        field)
 
                 obj = process_filter(field, filt)
 
         return obj
 
-    def create_description(self, complib=None, complevel=None, fletcher32=False, expectedrows=None):
+    def create_description(
+            self, complib=None, complevel=None, fletcher32=False, expectedrows=None):
         """ create the description of the table from the axes & values """
 
         # expected rows estimate
@@ -2811,10 +2974,15 @@ def read_coordinates(self, where=None, start=None, stop=None, **kwargs):
             return False
 
         # create the selection
-        self.selection = Selection(self, where=where, start=start, stop=stop, **kwargs)
+        self.selection = Selection(
+            self,
+            where=where,
+            start=start,
+            stop=stop,
+            **kwargs)
         return Coordinates(self.selection.select_coords(), group=self.group, where=where)
 
-    def read_column(self, column, where = None, **kwargs):
+    def read_column(self, column, where=None, **kwargs):
         """ return a single column from the table, generally only indexables are interesting """
 
         # validate the version
@@ -2825,14 +2993,17 @@ def read_column(self, column, where = None, **kwargs):
             return False
 
         if where is not None:
-            raise Exception("read_column does not currently accept a where clause")
+            raise Exception(
+                "read_column does not currently accept a where clause")
 
         # find the axes
         for a in self.axes:
             if column == a.name:
 
                 if not a.is_data_indexable:
-                    raise ValueError("column [%s] can not be extracted individually; it is not data indexable" % column)
+                    raise ValueError(
+                        "column [%s] can not be extracted individually; it is not data indexable" %
+                        column)
 
                 # column must be an indexable or a data column
                 c = getattr(self.table.cols, column)
@@ -2841,7 +3012,9 @@ def read_column(self, column, where = None, **kwargs):
 
         raise KeyError("column [%s] not found in the table" % column)
 
+
 class WORMTable(Table):
+
     """ a write-once read-many table: this format DOES NOT ALLOW appending to a
          table. writing is a one-time operation the data are stored in a format
          that allows for searching the data on disk
@@ -2861,6 +3034,7 @@ def write(self, **kwargs):
 
 
 class LegacyTable(Table):
+
     """ an appendable table: allow append/query/delete operations to a
           (possibily) already existing appendable table this table ALLOWS
           append (but doesn't require them), and stores the data in a format
@@ -2960,6 +3134,7 @@ def read(self, where=None, columns=None, **kwargs):
 
 
 class LegacyFrameTable(LegacyTable):
+
     """ support the legacy frame table """
     pandas_kind = u'frame_table'
     table_type = u'legacy_frame'
@@ -2970,12 +3145,14 @@ def read(self, *args, **kwargs):
 
 
 class LegacyPanelTable(LegacyTable):
+
     """ support the legacy panel table """
     table_type = u'legacy_panel'
     obj_type = Panel
 
 
 class AppendableTable(LegacyTable):
+
     """ suppor the new appendable table formats """
     _indexables = None
     table_type = u'appendable'
@@ -3043,7 +3220,8 @@ def write_data(self, chunksize):
         values = [a.take_data() for a in self.values_axes]
 
         # transpose the values so first dimension is last
-        values = [ v.transpose(np.roll(np.arange(v.ndim),v.ndim-1)) for v in values ]
+        values = [v.transpose(np.roll(np.arange(v.ndim), v.ndim - 1))
+                  for v in values]
 
         # write the chunks
         if chunksize is None:
@@ -3076,15 +3254,17 @@ def write_data_chunk(self, indexes, mask, search, values):
             args = list(indexes)
             args.extend([self.dtype, mask, search, values])
             rows = func(*args)
-        except (Exception), detail:
+        except (Exception) as detail:
             raise Exception("cannot create row-data -> %s" % str(detail))
 
         try:
             if len(rows):
                 self.table.append(rows)
                 self.table.flush()
-        except (Exception), detail:
-            raise Exception("tables cannot write this data -> %s" % str(detail))
+        except (Exception) as detail:
+            raise Exception(
+                "tables cannot write this data -> %s" %
+                str(detail))
 
     def delete(self, where=None, **kwargs):
 
@@ -3140,6 +3320,7 @@ def delete(self, where=None, **kwargs):
 
 
 class AppendableFrameTable(AppendableTable):
+
     """ suppor the new appendable table formats """
     pandas_kind = u'frame_table'
     table_type = u'appendable_frame'
@@ -3169,10 +3350,10 @@ def read(self, where=None, columns=None, **kwargs):
             if self.is_transposed:
                 values = a.cvalues
                 index_ = cols
-                cols_ = Index(index,name=getattr(index,'name',None))
+                cols_ = Index(index, name=getattr(index, 'name', None))
             else:
                 values = a.cvalues.T
-                index_ = Index(index,name=getattr(index,'name',None))
+                index_ = Index(index, name=getattr(index, 'name', None))
                 cols_ = cols
 
             # if we have a DataIndexableCol, its shape will only be 1 dim
@@ -3195,6 +3376,7 @@ def read(self, where=None, columns=None, **kwargs):
 
 
 class GenericTable(AppendableFrameTable):
+
     """ a table that read/writes the generic pytables table format """
     pandas_kind = u'frame_table'
     table_type = u'generic_table'
@@ -3207,17 +3389,19 @@ def pandas_type(self):
 
     @property
     def storable(self):
-        return getattr(self.group,'table',None) or self.group
+        return getattr(self.group, 'table', None) or self.group
 
     def get_attrs(self):
         """ retrieve our attributes """
-        self.non_index_axes   = []
-        self.nan_rep          = None
-        self.levels           = []
+        self.non_index_axes = []
+        self.nan_rep = None
+        self.levels = []
         t = self.table
-        self.index_axes       = [ a.infer(t) for a in self.indexables if     a.is_an_indexable ]
-        self.values_axes      = [ a.infer(t) for a in self.indexables if not a.is_an_indexable ]
-        self.data_columns     = [ a.name for a in self.values_axes ]
+        self.index_axes = [a.infer(t)
+                           for a in self.indexables if a.is_an_indexable]
+        self.values_axes = [a.infer(t)
+                            for a in self.indexables if not a.is_an_indexable]
+        self.data_columns = [a.name for a in self.values_axes]
 
     @property
     def indexables(self):
@@ -3227,11 +3411,15 @@ def indexables(self):
             d = self.description
 
             # the index columns is just a simple index
-            self._indexables = [ GenericIndexCol(name='index',axis=0) ]
+            self._indexables = [GenericIndexCol(name='index', axis=0)]
 
             for i, n in enumerate(d._v_names):
 
-                dc = GenericDataIndexableCol(name = n, pos=i, values = [ n ], version = self.version)
+                dc = GenericDataIndexableCol(
+                    name=n,
+                    pos=i,
+                    values=[n],
+                    version=self.version)
                 self._indexables.append(dc)
 
         return self._indexables
@@ -3239,7 +3427,9 @@ def indexables(self):
     def write(self, **kwargs):
         raise NotImplementedError("cannot write on an generic table")
 
+
 class AppendableMultiFrameTable(AppendableFrameTable):
+
     """ a frame with a multi-index """
     table_type = u'appendable_multiframe'
     obj_type = DataFrame
@@ -3265,12 +3455,17 @@ def read(self, columns=None, **kwargs):
             for n in self.levels:
                 if n not in columns:
                     columns.insert(0, n)
-        df = super(AppendableMultiFrameTable, self).read(columns=columns, **kwargs)
+        df = super(
+            AppendableMultiFrameTable,
+            self).read(
+                columns=columns,
+                **kwargs)
         df.set_index(self.levels, inplace=True)
         return df
 
 
 class AppendablePanelTable(AppendableTable):
+
     """ suppor the new appendable table formats """
     table_type = u'appendable_panel'
     ndim = 3
@@ -3288,23 +3483,26 @@ def is_transposed(self):
 
 
 class AppendableNDimTable(AppendablePanelTable):
+
     """ suppor the new appendable table formats """
     table_type = u'appendable_ndim'
     ndim = 4
     obj_type = Panel4D
 
+
 def _convert_index(index, encoding=None):
-    index_name = getattr(index,'name',None)
+    index_name = getattr(index, 'name', None)
 
     if isinstance(index, DatetimeIndex):
         converted = index.asi8
         return IndexCol(converted, 'datetime64', _tables().Int64Col(),
-                        freq=getattr(index,'freq',None), tz=getattr(index,'tz',None),
+                        freq=getattr(index, 'freq', None), tz=getattr(index, 'tz', None),
                         index_name=index_name)
     elif isinstance(index, (Int64Index, PeriodIndex)):
         atom = _tables().Int64Col()
-        return IndexCol(index.values, 'integer', atom, freq=getattr(index,'freq',None),
-                        index_name=index_name)
+        return IndexCol(
+            index.values, 'integer', atom, freq=getattr(index, 'freq', None),
+            index_name=index_name)
 
     if isinstance(index, MultiIndex):
         raise Exception('MultiIndex not supported here!')
@@ -3316,7 +3514,7 @@ def _convert_index(index, encoding=None):
     if inferred_type == 'datetime64':
         converted = values.view('i8')
         return IndexCol(converted, 'datetime64', _tables().Int64Col(),
-                        freq=getattr(index,'freq',None), tz=getattr(index,'tz',None),
+                        freq=getattr(index, 'freq', None), tz=getattr(index, 'tz', None),
                         index_name=index_name)
     elif inferred_type == 'datetime':
         converted = np.array([(time.mktime(v.timetuple()) +
@@ -3335,8 +3533,9 @@ def _convert_index(index, encoding=None):
 
         converted = _convert_string_array(values, encoding)
         itemsize = converted.dtype.itemsize
-        return IndexCol(converted, 'string', _tables().StringCol(itemsize), itemsize=itemsize,
-                        index_name=index_name)
+        return IndexCol(
+            converted, 'string', _tables().StringCol(itemsize), itemsize=itemsize,
+            index_name=index_name)
     elif inferred_type == 'unicode':
         atom = _tables().ObjectAtom()
         return IndexCol(np.asarray(values, dtype='O'), 'object', atom,
@@ -3355,6 +3554,7 @@ def _convert_index(index, encoding=None):
         return IndexCol(np.asarray(values, dtype='O'), 'object', atom,
                         index_name=index_name)
 
+
 def _unconvert_index(data, kind, encoding=None):
     kind = _ensure_decoded(kind)
     if kind == u'datetime64':
@@ -3374,6 +3574,7 @@ def _unconvert_index(data, kind, encoding=None):
         raise ValueError('unrecognized index type %s' % kind)
     return index
 
+
 def _unconvert_index_legacy(data, kind, legacy=False, encoding=None):
     kind = _ensure_decoded(kind)
     if kind == u'datetime':
@@ -3386,6 +3587,7 @@ def _unconvert_index_legacy(data, kind, legacy=False, encoding=None):
         raise ValueError('unrecognized index type %s' % kind)
     return index
 
+
 def _convert_string_array(data, encoding, itemsize=None):
 
     # encode if needed
@@ -3397,19 +3599,20 @@ def _convert_string_array(data, encoding, itemsize=None):
     if itemsize is None:
         itemsize = lib.max_len_string_array(com._ensure_object(data.ravel()))
 
-    data = np.array(data,dtype="S%d" % itemsize)
+    data = np.array(data, dtype="S%d" % itemsize)
     return data
 
+
 def _unconvert_string_array(data, nan_rep=None, encoding=None):
     """ deserialize a string array, possibly decoding """
     shape = data.shape
-    data = np.array(data.ravel(),dtype=object)
+    data = np.array(data.ravel(), dtype=object)
 
     # guard against a None encoding in PY3 (because of a legacy
     # where the passed encoding is actually None)
     encoding = _ensure_encoding(encoding)
     if encoding is not None and len(data):
-        f = np.vectorize(lambda x: x.decode(encoding),otypes=[np.object])
+        f = np.vectorize(lambda x: x.decode(encoding), otypes=[np.object])
         data = f(data)
 
     if nan_rep is None:
@@ -3418,6 +3621,7 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None):
     data = lib.string_array_replace_from_nan_rep(data, nan_rep)
     return data.reshape(shape)
 
+
 def _maybe_convert(values, val_kind, encoding):
     if _need_convert(val_kind):
         conv = _get_converter(val_kind, encoding)
@@ -3425,6 +3629,7 @@ def _maybe_convert(values, val_kind, encoding):
         values = conv(values)
     return values
 
+
 def _get_converter(kind, encoding):
     kind = _ensure_decoded(kind)
     if kind == 'datetime64':
@@ -3432,17 +3637,20 @@ def _get_converter(kind, encoding):
     elif kind == 'datetime':
         return lib.convert_timestamps
     elif kind == 'string':
-        return lambda x: _unconvert_string_array(x,encoding=encoding)
+        return lambda x: _unconvert_string_array(x, encoding=encoding)
     else:  # pragma: no cover
         raise ValueError('invalid kind %s' % kind)
 
+
 def _need_convert(kind):
     kind = _ensure_decoded(kind)
     if kind in (u'datetime', u'datetime64', u'string'):
         return True
     return False
 
+
 class Term(StringMixin):
+
     """create a term object that holds a field, op, and value
 
     Parameters
@@ -3470,10 +3678,13 @@ class Term(StringMixin):
     """
 
     _ops = ['<=', '<', '>=', '>', '!=', '==', '=']
-    _search = re.compile("^\s*(?P<field>\w+)\s*(?P<op>%s)\s*(?P<value>.+)\s*$" % '|'.join(_ops))
+    _search = re.compile(
+        "^\s*(?P<field>\w+)\s*(?P<op>%s)\s*(?P<value>.+)\s*$" %
+        '|'.join(_ops))
     _max_selectors = 31
 
-    def __init__(self, field, op=None, value=None, queryables=None, encoding=None):
+    def __init__(self, field, op=None,
+                 value=None, queryables=None, encoding=None):
         self.field = None
         self.op = None
         self.value = None
@@ -3538,8 +3749,10 @@ def __init__(self, field, op=None, value=None, queryables=None, encoding=None):
 
         # we have valid conditions
         if self.op in ['>', '>=', '<', '<=']:
-            if hasattr(self.value, '__iter__') and len(self.value) > 1 and not isinstance(self.value,basestring):
-                raise ValueError("an inequality condition cannot have multiple values [%s]" % str(self))
+            if hasattr(self.value, '__iter__') and len(self.value) > 1 and not isinstance(self.value, basestring):
+                raise ValueError(
+                    "an inequality condition cannot have multiple values [%s]" %
+                    str(self))
 
         if not is_list_like(self.value):
             self.value = [self.value]
@@ -3581,7 +3794,7 @@ def eval(self):
         if self.is_in_table:
             values = [self.convert_value(v) for v in self.value]
         else:
-            values = [TermValue(v,v,self.kind) for v in self.value]
+            values = [TermValue(v, v, self.kind) for v in self.value]
 
         # equality conditions
         if self.op in ['==', '!=']:
@@ -3592,21 +3805,26 @@ def eval(self):
             else:
                 filter_op = lambda axis, vals: axis.isin(vals)
 
-
             if self.is_in_table:
 
                 # too many values to create the expression?
                 if len(values) <= self._max_selectors:
-                    vs = [ self.generate(v) for v in values ]
+                    vs = [self.generate(v) for v in values]
                     self.condition = "(%s)" % ' | '.join(vs)
 
                 # use a filter after reading
                 else:
-                    self.filter = (self.field, filter_op, Index([v.value for v in values]))
+                    self.filter = (
+                        self.field,
+                        filter_op,
+                        Index([v.value for v in values]))
 
             else:
 
-                self.filter = (self.field, filter_op, Index([v.value for v in values]))
+                self.filter = (
+                    self.field,
+                    filter_op,
+                    Index([v.value for v in values]))
 
         else:
 
@@ -3616,7 +3834,9 @@ def eval(self):
 
             else:
 
-                raise TypeError("passing a filterable condition to a non-table indexer [%s]" % str(self))
+                raise TypeError(
+                    "passing a filterable condition to a non-table indexer [%s]" %
+                    str(self))
 
     def convert_value(self, v):
         """ convert the expression that is in the term to something that is accepted by pytables """
@@ -3628,34 +3848,37 @@ def stringify(value):
             return value
 
         kind = _ensure_decoded(self.kind)
-        if kind == u'datetime64' or kind == u'datetime' :
+        if kind == u'datetime64' or kind == u'datetime':
             v = lib.Timestamp(v)
             if v.tz is not None:
                 v = v.tz_convert('UTC')
-            return TermValue(v,v.value,kind)
+            return TermValue(v, v.value, kind)
         elif isinstance(v, datetime) or hasattr(v, 'timetuple') or kind == u'date':
             v = time.mktime(v.timetuple())
-            return TermValue(v,Timestamp(v),kind)
+            return TermValue(v, Timestamp(v), kind)
         elif kind == u'integer':
             v = int(float(v))
-            return TermValue(v,v,kind)
+            return TermValue(v, v, kind)
         elif kind == u'float':
             v = float(v)
-            return TermValue(v,v,kind)
+            return TermValue(v, v, kind)
         elif kind == u'bool':
             if isinstance(v, basestring):
-                v = not v.strip().lower() in [u'false', u'f', u'no', u'n', u'none', u'0', u'[]', u'{}', u'']
+                v = not v.strip().lower() in [
+                    u'false', u'f', u'no', u'n', u'none', u'0', u'[]', u'{}', u'']
             else:
                 v = bool(v)
-            return TermValue(v,v,kind)
+            return TermValue(v, v, kind)
         elif not isinstance(v, basestring):
             v = stringify(v)
-            return TermValue(v,stringify(v),u'string')
+            return TermValue(v, stringify(v), u'string')
 
         # string quoting
-        return TermValue(v,stringify(v),u'string')
+        return TermValue(v, stringify(v), u'string')
+
 
 class TermValue(object):
+
     """ hold a term value the we use to construct a condition/filter """
 
     def __init__(self, value, converted, kind):
@@ -3672,7 +3895,9 @@ def tostring(self, encoding):
             return '"%s"' % self.converted
         return self.converted
 
+
 class Coordinates(object):
+
     """ holds a returned coordinates list, useful to select the same rows from different tables
 
     coordinates : holds the array of coordinates
@@ -3692,7 +3917,9 @@ def __getitem__(self, key):
         """ return a new coordinates object, sliced by the key """
         return Coordinates(self.values[key], self.group, self.where)
 
+
 class Selection(object):
+
     """
     Carries out a selection operation on a tables.Table object.
 
@@ -3703,6 +3930,7 @@ class Selection(object):
     start, stop: indicies to start and/or stop selection
 
     """
+
     def __init__(self, table, where=None, start=None, stop=None, **kwargs):
         self.table = table
         self.where = where
@@ -3720,9 +3948,10 @@ def __init__(self, table, where=None, start=None, stop=None, **kwargs):
 
             # create the numexpr & the filter
             if self.terms:
-                terms = [ t for t in self.terms if t.condition is not None ]
+                terms = [t for t in self.terms if t.condition is not None]
                 if len(terms):
-                    self.condition = "(%s)" % ' & '.join([ t.condition for t in terms ])
+                    self.condition = "(%s)" % ' & '.join(
+                        [t.condition for t in terms])
                 self.filter = []
                 for t in self.terms:
                     if t.filter is not None:
@@ -3767,13 +3996,13 @@ def select_coords(self):
         return self.table.table.getWhereList(self.condition, start=self.start, stop=self.stop, sort=True)
 
 
-### utilities ###
+# utilities ###
 
-def timeit(key,df,fn=None,remove=True,**kwargs):
+def timeit(key, df, fn=None, remove=True, **kwargs):
     if fn is None:
         fn = 'timeit.h5'
-    store = HDFStore(fn,mode='w')
-    store.append(key,df,**kwargs)
+    store = HDFStore(fn, mode='w')
+    store.append(key, df, **kwargs)
     store.close()
 
     if remove:

From c665a85b6f7422403acf684d086141d0d701f952 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Thu, 4 Jul 2013 14:32:24 -0400
Subject: [PATCH 26/48] DOC: reference future enhancingperf.eval section

---
 pandas/computation/eval.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/computation/eval.py b/pandas/computation/eval.py
index e08e0f28d7877..1a681e37d6130 100644
--- a/pandas/computation/eval.py
+++ b/pandas/computation/eval.py
@@ -48,8 +48,10 @@ def eval(expr, engine='numexpr', truediv=True, local_dict=None,
 
     Notes
     -----
-    The benefits of using ``eval`` are that very large frames that are terms in
-    long expressions are sped up, sometimes by as much as 10x.
+    * The benefits of using ``eval`` are that very large frames that are terms in
+      long expressions are sped up, sometimes by as much as 10x.
+
+    See :ref:`Enhancing performance <enhancingperf.eval>` for more details.
     """
     # make sure we're passed a valid engine
     if not engine in _engines:

From cb27934a41ebcd1085ac08b587f44202103c3413 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Thu, 4 Jul 2013 14:33:45 -0400
Subject: [PATCH 27/48] CLN/DOC: clean up docstrings in pytables

---
 pandas/io/pytables.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 013e596320250..1cb465cbdf16a 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -167,9 +167,12 @@ def get_store(path, mode='a', complevel=None, complib=None,
 
     Examples
     --------
+    >>> from pandas import DataFrame
+    >>> from numpy.random import randn
+    >>> bar = DataFrame(randn(10, 4))
     >>> with get_store('test.h5') as store:
-    >>>     store['foo'] = bar   # write to HDF5
-    >>>     bar = store['foo']   # retrieve
+    ...     store['foo'] = bar   # write to HDF5
+    ...     bar = store['foo']   # retrieve
     """
     store = None
     try:
@@ -262,6 +265,9 @@ class HDFStore(object):
 
     Examples
     --------
+    >>> from pandas import DataFrame
+    >>> from numpy.random import randn
+    >>> bar = DataFrame(randn(10, 4))
     >>> store = HDFStore('test.h5')
     >>> store['foo'] = bar   # write to HDF5
     >>> bar = store['foo']   # retrieve

From 63ba37d0943607f679fd9d3b4715ba38e8ae9739 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Thu, 4 Jul 2013 14:34:20 -0400
Subject: [PATCH 28/48] CLN: actually pass fletcher32 in get_store

---
 pandas/io/pytables.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 1cb465cbdf16a..21da0d58b67f7 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -177,7 +177,7 @@ def get_store(path, mode='a', complevel=None, complib=None,
     store = None
     try:
         store = HDFStore(path, mode=mode, complevel=complevel,
-                         complib=complib, fletcher32=False)
+                         complib=complib, fletcher32=fletcher32)
         yield store
     finally:
         if store is not None:

From dcde5901f7975c4aac046d4f3c0b7c6629bc4f15 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Thu, 4 Jul 2013 14:35:32 -0400
Subject: [PATCH 29/48] CLN: remove unused variables

---
 pandas/io/pytables.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 21da0d58b67f7..2ac4e19a7eb7b 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -278,7 +278,7 @@ class HDFStore(object):
     def __init__(self, path, mode=None, complevel=None, complib=None,
                  fletcher32=False):
         try:
-            import tables as _
+            import tables
         except ImportError:  # pragma: no cover
             raise Exception('HDFStore requires PyTables')
 
@@ -576,7 +576,7 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None,
                 start=start,
                 stop=stop)
             nrows = len(c)
-        except (Exception) as detail:
+        except Exception:
             raise ValueError("invalid selector [%s]" % selector)
 
         def func(_start, _stop):
@@ -1235,7 +1235,6 @@ def validate_col(self, itemsize=None):
         """ validate this column: return the compared against itemsize """
 
         # validate this column for string truncation (or reset to the max size)
-        dtype = getattr(self, 'dtype', None)
         if _ensure_decoded(self.kind) == u'string':
 
             c = self.col
@@ -2252,7 +2251,6 @@ def read(self, **kwargs):
         sdict = {}
         for name in items:
             key = 'sparse_frame_%s' % name
-            node = getattr(self.group, key)
             s = SparseFrameStorer(self.parent, getattr(self.group, key))
             s.infer_axes()
             sdict[name] = s.read()
@@ -2592,7 +2590,6 @@ def indexables(self):
         """ create/cache the indexables if they don't exist """
         if self._indexables is None:
 
-            d = self.description
             self._indexables = []
 
             # index columns

From 3c4e2b3fa40df21ba477693ce647542f156f1e92 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Thu, 4 Jul 2013 14:41:02 -0400
Subject: [PATCH 30/48] CLN: more pep8 and get rid of most raise Exception
 clauses

---
 pandas/io/pytables.py | 83 ++++++++++++++++++++++---------------------
 1 file changed, 42 insertions(+), 41 deletions(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 2ac4e19a7eb7b..0f84884d51340 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -280,7 +280,7 @@ def __init__(self, path, mode=None, complevel=None, complib=None,
         try:
             import tables
         except ImportError:  # pragma: no cover
-            raise Exception('HDFStore requires PyTables')
+            raise ImportError('HDFStore requires PyTables')
 
         self._path = path
         if mode is None:
@@ -516,7 +516,8 @@ def select_column(self, key, column, **kwargs):
         return self.get_storer(key).read_column(column=column, **kwargs)
 
     def select_as_multiple(self, keys, where=None, selector=None, columns=None,
-                           start=None, stop=None, iterator=False, chunksize=None, auto_close=False, **kwargs):
+                           start=None, stop=None, iterator=False,
+                           chunksize=None, auto_close=False, **kwargs):
         """ Retrieve pandas objects from multiple tables
 
         Parameters
@@ -538,13 +539,15 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None,
         if isinstance(keys, (list, tuple)) and len(keys) == 1:
             keys = keys[0]
         if isinstance(keys, basestring):
-            return self.select(key=keys, where=where, columns=columns, start=start, stop=stop, iterator=iterator, chunksize=chunksize, **kwargs)
+            return self.select(key=keys, where=where, columns=columns,
+                               start=start, stop=stop, iterator=iterator,
+                               chunksize=chunksize, **kwargs)
 
         if not isinstance(keys, (list, tuple)):
-            raise Exception("keys must be a list/tuple")
+            raise TypeError("keys must be a list/tuple")
 
-        if len(keys) == 0:
-            raise Exception("keys must have a non-zero length")
+        if not len(keys):
+            raise ValueError("keys must have a non-zero length")
 
         if selector is None:
             selector = keys[0]
@@ -686,13 +689,13 @@ def append(self, key, value, columns=None, **kwargs):
         data in the table, so be careful
         """
         if columns is not None:
-            raise Exception(
-                "columns is not a supported keyword in append, try data_columns")
+            raise TypeError("columns is not a supported keyword in append, "
+                            "try data_columns")
 
         self._write_to_group(key, value, table=True, append=True, **kwargs)
 
-    def append_to_multiple(
-            self, d, value, selector, data_columns=None, axes=None, **kwargs):
+    def append_to_multiple(self, d, value, selector, data_columns=None,
+                           axes=None, **kwargs):
         """
         Append to multiple tables
 
@@ -711,8 +714,9 @@ def append_to_multiple(
 
         """
         if axes is not None:
-            raise Exception(
-                "axes is currently not accepted as a paremter to append_to_multiple; you can create the tables indepdently instead")
+            raise TypeError("axes is currently not accepted as a parameter to"
+                            " append_to_multiple; you can create the "
+                            "tables indepdently instead")
 
         if not isinstance(d, dict):
             raise ValueError(
@@ -770,7 +774,7 @@ def create_table_index(self, key, **kwargs):
         # version requirements
         _tables()
         if not _table_supports_index:
-            raise Exception("PyTables >= 2.3 is required for table indexing")
+            raise ValueError("PyTables >= 2.3 is required for table indexing")
 
         s = self.get_storer(key)
         if s is None:
@@ -1005,8 +1009,8 @@ class TableIterator(object):
         kwargs : the passed kwargs
         """
 
-    def __init__(self, store, func, nrows, start=None,
-                 stop=None, chunksize=None, auto_close=False):
+    def __init__(self, store, func, nrows, start=None, stop=None,
+                 chunksize=None, auto_close=False):
         self.store = store
         self.func = func
         self.nrows = nrows or 0
@@ -1928,7 +1932,7 @@ def read_index(self, key):
             _, index = self.read_index_node(getattr(self.group, key))
             return index
         else:  # pragma: no cover
-            raise Exception('unrecognized index variety: %s' % variety)
+            raise TypeError('unrecognized index variety: %s' % variety)
 
     def write_index(self, key, index):
         if isinstance(index, MultiIndex):
@@ -2448,7 +2452,7 @@ def validate(self, other):
                             (c, sax, oax))
 
                 # should never get here
-                raise Exception(
+                raise ValueError(
                     "invalid combinate of [%s] on appending data [%s] vs current table [%s]" %
                     (c, sv, ov))
 
@@ -2884,10 +2888,11 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None,
                 self.values_axes.append(col)
             except (NotImplementedError, ValueError, TypeError) as e:
                 raise e
-            except (Exception) as detail:
-                raise Exception(
-                    "cannot find the correct atom type -> [dtype->%s,items->%s] %s" %
-                    (b.dtype.name, b.items, str(detail)))
+            except Exception as detail:
+                raise TypeError("cannot find the correct atom type -> "
+                                "[dtype->%s,items->%s] %s" % (b.dtype.name,
+                                                              b.items,
+                                                              str(detail)))
             j += 1
 
         # validate our min_itemsize
@@ -2996,8 +3001,8 @@ def read_column(self, column, where=None, **kwargs):
             return False
 
         if where is not None:
-            raise Exception(
-                "read_column does not currently accept a where clause")
+            raise TypeError("read_column does not currently accept a where "
+                            "clause")
 
         # find the axes
         for a in self.axes:
@@ -3052,7 +3057,7 @@ class LegacyTable(Table):
     ndim = 3
 
     def write(self, **kwargs):
-        raise Exception("write operations are not allowed on legacy tables!")
+        raise TypeError("write operations are not allowed on legacy tables!")
 
     def read(self, where=None, columns=None, **kwargs):
         """ we have n indexable columns, with an arbitrary number of data axes """
@@ -3257,17 +3262,15 @@ def write_data_chunk(self, indexes, mask, search, values):
             args = list(indexes)
             args.extend([self.dtype, mask, search, values])
             rows = func(*args)
-        except (Exception) as detail:
-            raise Exception("cannot create row-data -> %s" % str(detail))
+        except Exception as detail:
+            raise Exception("cannot create row-data -> %s" % detail)
 
         try:
             if len(rows):
                 self.table.append(rows)
                 self.table.flush()
-        except (Exception) as detail:
-            raise Exception(
-                "tables cannot write this data -> %s" %
-                str(detail))
+        except Exception as detail:
+            raise TypeError("tables cannot write this data -> %s" % detail)
 
     def delete(self, where=None, **kwargs):
 
@@ -3499,16 +3502,15 @@ def _convert_index(index, encoding=None):
     if isinstance(index, DatetimeIndex):
         converted = index.asi8
         return IndexCol(converted, 'datetime64', _tables().Int64Col(),
-                        freq=getattr(index, 'freq', None), tz=getattr(index, 'tz', None),
-                        index_name=index_name)
+                        freq=getattr(index, 'freq', None),
+                        tz=getattr(index, 'tz', None), index_name=index_name)
     elif isinstance(index, (Int64Index, PeriodIndex)):
         atom = _tables().Int64Col()
-        return IndexCol(
-            index.values, 'integer', atom, freq=getattr(index, 'freq', None),
-            index_name=index_name)
+        return IndexCol(index.values, 'integer', atom, freq=getattr(index,
+                        'freq', None), index_name=index_name)
 
     if isinstance(index, MultiIndex):
-        raise Exception('MultiIndex not supported here!')
+        raise TypeError('MultiIndex not supported here!')
 
     inferred_type = lib.infer_dtype(index)
 
@@ -3517,8 +3519,8 @@ def _convert_index(index, encoding=None):
     if inferred_type == 'datetime64':
         converted = values.view('i8')
         return IndexCol(converted, 'datetime64', _tables().Int64Col(),
-                        freq=getattr(index, 'freq', None), tz=getattr(index, 'tz', None),
-                        index_name=index_name)
+                        freq=getattr(index, 'freq', None),
+                        tz=getattr(index, 'tz', None), index_name=index_name)
     elif inferred_type == 'datetime':
         converted = np.array([(time.mktime(v.timetuple()) +
                                v.microsecond / 1E6) for v in values],
@@ -3536,9 +3538,8 @@ def _convert_index(index, encoding=None):
 
         converted = _convert_string_array(values, encoding)
         itemsize = converted.dtype.itemsize
-        return IndexCol(
-            converted, 'string', _tables().StringCol(itemsize), itemsize=itemsize,
-            index_name=index_name)
+        return IndexCol(converted, 'string', _tables().StringCol(itemsize),
+                        itemsize=itemsize, index_name=index_name)
     elif inferred_type == 'unicode':
         atom = _tables().ObjectAtom()
         return IndexCol(np.asarray(values, dtype='O'), 'object', atom,

From 226c7869742582cf62af604dcab2237cfe1750c4 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Thu, 4 Jul 2013 18:52:36 -0400
Subject: [PATCH 31/48] CLN: change NameError to match python

---
 pandas/computation/ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py
index 24000b27a033a..a35d80568b482 100644
--- a/pandas/computation/ops.py
+++ b/pandas/computation/ops.py
@@ -31,7 +31,7 @@ def _resolve_name(env, key):
         if not isinstance(key, basestring):
             return key
 
-        raise NameError('{0!r} is undefined'.format(key))
+        raise NameError('name {0!r} is not defined'.format(key))
 
     return res
 

From 79871d8b9c24d7b52d8ab86897b43c2a5481c89d Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Fri, 5 Jul 2013 11:26:31 -0400
Subject: [PATCH 32/48] API: expose the Expr object to top level pandas

---
 pandas/__init__.py        | 2 +-
 pandas/computation/api.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/__init__.py b/pandas/__init__.py
index bec0877b13bb8..5315fd770e796 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -29,7 +29,7 @@
 from pandas.stats.api import *
 from pandas.tseries.api import *
 from pandas.io.api import *
-from pandas.computation.api import eval
+from pandas.computation.api import *
 
 from pandas.util.testing import debug
 
diff --git a/pandas/computation/api.py b/pandas/computation/api.py
index 86f72902a52c8..db8269a497768 100644
--- a/pandas/computation/api.py
+++ b/pandas/computation/api.py
@@ -1 +1,2 @@
 from pandas.computation.eval import eval
+from pandas.computation.expr import Expr

From 84fdb453fb497ec73ae70cd059840d3b087fa828 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Fri, 5 Jul 2013 11:27:02 -0400
Subject: [PATCH 33/48] CLN/TST: fail with a NotImplementedError on and or not

---
 pandas/computation/expr.py            |  4 ++++
 pandas/computation/tests/test_eval.py | 33 +++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py
index 60fea6e935070..666eb891f9929 100644
--- a/pandas/computation/expr.py
+++ b/pandas/computation/expr.py
@@ -77,6 +77,8 @@ def visit_BinOp(self, node):
         return op(left, right)
 
     def visit_UnaryOp(self, node):
+        if isinstance(node.op, ast.Not):
+            raise NotImplementedError("not operator not yet supported")
         op = self.visit(node.op)
         return op(self.visit(node.operand))
 
@@ -107,6 +109,8 @@ def visit_Call(self, node):
     def visit_Attribute(self, node):
         raise NotImplementedError("attribute access is not yet supported")
 
+    def visit_BoolOp(self, node):
+        raise NotImplementedError("boolean operators are not yet supported")
 
 class Expr(StringMixin):
     """Expr object"""
diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py
index 0a1356915523a..8e185f5b9772b 100644
--- a/pandas/computation/tests/test_eval.py
+++ b/pandas/computation/tests/test_eval.py
@@ -610,6 +610,39 @@ def test_is_expr():
         check_is_expr(engine)
 
 
+def check_not_fails(engine):
+    x = True
+    assert_raises(NotImplementedError, pd.eval, 'not x', engine=engine,
+                  local_dict={'x': x})
+
+
+def test_not_fails():
+    for engine in _engines:
+        check_not_fails(engine)
+
+
+def check_and_fails(engine):
+    x, y = False, True
+    assert_raises(NotImplementedError, pd.eval, 'x and y', engine=engine,
+                  local_dict={'x': x, 'y': y})
+
+
+def test_and_fails():
+    for engine in _engines:
+        check_and_fails(engine)
+
+
+def check_or_fails(engine):
+    x, y = True, False
+    assert_raises(NotImplementedError, pd.eval, 'x or y', engine=engine,
+                  local_dict={'x': x, 'y': y})
+
+
+def test_or_fails():
+    for engine in _engines:
+        check_or_fails(engine)
+
+
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
                    exit=False)

From 4d9f9a7805efa54f08af7719207703e7722bb59d Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Fri, 5 Jul 2013 11:27:17 -0400
Subject: [PATCH 34/48] CLN: generlize operator/expression printing

---
 pandas/computation/ops.py | 29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py
index a35d80568b482..0d67c56ba472a 100644
--- a/pandas/computation/ops.py
+++ b/pandas/computation/ops.py
@@ -49,13 +49,7 @@ def _update_name(env, key, value):
                 raise NameError('{0!r} is undefined'.format(key))
 
 
-class NamedObjectMixin(object):
-    @property
-    def typename(self):
-        return com.pprint_thing(self.__class__.__name__)
-
-
-class Term(StringMixin, NamedObjectMixin):
+class Term(StringMixin):
     def __init__(self, name, env):
         self.name = name
         self.value = _resolve_name(env, name)
@@ -79,7 +73,11 @@ def __init__(self, value, env):
         super(Constant, self).__init__(value, env)
 
 
-class Op(NamedObjectMixin, StringMixin):
+def _print_operand(opr):
+    return opr.name if is_term(opr) else unicode(opr)
+
+
+class Op(StringMixin):
     """Hold an operator of unknown arity
     """
     def __init__(self, op, operands):
@@ -90,12 +88,11 @@ def __iter__(self):
         return iter(self.operands)
 
     def __unicode__(self):
-        op = 'op={1!r}'.format(self.op)
-        operands = ', '.join('opr_{i}={opr}'.format(i=i, opr=opr)
-                             for i, opr in enumerate(self.operands))
-        return com.pprint_thing('{0}({op}, '
-                                '{operands})'.format(self.name, op=op,
-                                                     operands=operands))
+        """Print a generic n-ary operator and its operands"""
+        # recurse over the operands
+        parened = ('({0})'.format(_print_operand(opr))
+                   for opr in self.operands)
+        return com.pprint_thing(' {0} '.format(self.op).join(parened))
 
 
 _cmp_ops_syms = '>', '<', '>=', '<=', '==', '!='
@@ -161,10 +158,6 @@ def __init__(self, op, lhs, rhs):
             raise BinaryOperatorError('Invalid binary operator {0}, valid'
                                       ' operators are {1}'.format(op, keys))
 
-    def __unicode__(self):
-        return com.pprint_thing('({0}) {1} ({2})'.format(self.lhs, self.op,
-                                                         self.rhs))
-
     def __call__(self, env):
         # handle truediv
         if self.op == '/' and env.locals['truediv']:

From a0d2ce0f458f18d43e87e9971d2625457e7c1814 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Fri, 5 Jul 2013 11:34:20 -0400
Subject: [PATCH 35/48] CLN: clean up testing and expr

---
 pandas/computation/expr.py            | 1 +
 pandas/computation/tests/test_eval.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py
index 666eb891f9929..6d33f6ac50a0d 100644
--- a/pandas/computation/expr.py
+++ b/pandas/computation/expr.py
@@ -112,6 +112,7 @@ def visit_Attribute(self, node):
     def visit_BoolOp(self, node):
         raise NotImplementedError("boolean operators are not yet supported")
 
+
 class Expr(StringMixin):
     """Expr object"""
     def __init__(self, expr, engine='numexpr', env=None, truediv=True):
diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py
index 8e185f5b9772b..fc1cccf320201 100644
--- a/pandas/computation/tests/test_eval.py
+++ b/pandas/computation/tests/test_eval.py
@@ -42,7 +42,7 @@ def fractional(x):
 
 
 def hasfractional(x):
-    return np.any(fractional(x) != 0.0)
+    return np.any(fractional(x))
 
 
 def _eval_from_expr(lhs, cmp1, rhs, binop, cmp2):

From 317a1530b1e46a61fb4c97388108fdd7e43ece77 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Sat, 6 Jul 2013 16:25:20 -0400
Subject: [PATCH 36/48] ENH: add modest type inference

---
 pandas/computation/align.py   | 17 +++++++++--------
 pandas/computation/engines.py |  3 ++-
 pandas/computation/ops.py     | 31 +++++++++++++++++++++++++------
 3 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/pandas/computation/align.py b/pandas/computation/align.py
index f2bf11d41e185..529fe84fd06a7 100644
--- a/pandas/computation/align.py
+++ b/pandas/computation/align.py
@@ -163,6 +163,7 @@ def _filter_terms(flat):
 
 
 def _align(terms, env):
+
     # flatten the parse tree (a nested list)
     terms = list(flatten(terms))
 
@@ -181,7 +182,7 @@ def _align(terms, env):
     return typ, axes
 
 
-def _reconstruct_object(typ, obj, axes):
+def _reconstruct_object(typ, obj, axes, dtype):
     """Reconstruct an object given its type, raw value, and possibly empty
     (None) axes.
 
@@ -200,20 +201,20 @@ def _reconstruct_object(typ, obj, axes):
         An object of type ``typ`` with the value `obj` and possible axes
         `axes`.
     """
+    #import ipdb; ipdb.set_trace()
     try:
-        # handle numpy dtypes
         typ = typ.type
     except AttributeError:
         pass
 
     if (not isinstance(typ, partial) and
         issubclass(typ, pd.core.generic.PandasObject)):
-        return typ(obj, **axes)
+        return typ(obj, dtype=dtype, **axes)
 
-    ret_value = typ(obj)
+    ret_value = typ(obj).astype(dtype)
 
     try:
-        return ret_value.item()
-    except (AttributeError, ValueError):
-        return ret_value
-
+        ret = ret_value.item()
+    except ValueError:
+        ret = ret_value
+    return ret
diff --git a/pandas/computation/engines.py b/pandas/computation/engines.py
index db6beb87da3a5..7f500dccb825b 100644
--- a/pandas/computation/engines.py
+++ b/pandas/computation/engines.py
@@ -25,7 +25,8 @@ def evaluate(self):
                                                          self.expr.env)
 
         res = self._evaluate(self.expr.env)
-        return _reconstruct_object(self.result_type, res, self.aligned_axes)
+        return _reconstruct_object(self.result_type, res, self.aligned_axes,
+                                   self.expr.terms.return_type)
 
     @property
     def _is_aligned(self):
diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py
index 0d67c56ba472a..ca5f6d4872a72 100644
--- a/pandas/computation/ops.py
+++ b/pandas/computation/ops.py
@@ -4,6 +4,7 @@
 from pandas.util.py3compat import PY3
 import pandas.core.common as com
 from pandas.core.base import StringMixin
+from pandas.computation.common import flatten
 
 
 _reductions = 'sum', 'prod'
@@ -46,15 +47,25 @@ def _update_name(env, key, value):
                 del env.globals[key]
                 env.globals[key] = value
             except KeyError:
-                raise NameError('{0!r} is undefined'.format(key))
+                raise NameError('name {0!r} is not defined'.format(key))
 
 
 class Term(StringMixin):
     def __init__(self, name, env):
         self.name = name
-        self.value = _resolve_name(env, name)
         self.env = env
-        self.type = type(self.value)
+        self.value = _resolve_name(self.env, self.name)
+
+        try:
+            # ndframe potentially very slow for large, mixed dtype frames
+            self.type = self.value.values.dtype
+        except AttributeError:
+            try:
+                # ndarray
+                self.type = self.value.dtype
+            except AttributeError:
+                # scalar
+                self.type = type(self.value)
 
     def __unicode__(self):
         return com.pprint_thing(self.name)
@@ -88,15 +99,23 @@ def __iter__(self):
         return iter(self.operands)
 
     def __unicode__(self):
-        """Print a generic n-ary operator and its operands"""
+        """Print a generic n-ary operator and its operands using infix
+        notation"""
         # recurse over the operands
         parened = ('({0})'.format(_print_operand(opr))
                    for opr in self.operands)
         return com.pprint_thing(' {0} '.format(self.op).join(parened))
 
+    @property
+    def return_type(self):
+        # clobber types to bool if the op is a boolean operator
+        if self.op in (_cmp_ops_syms + _bool_ops_syms):
+            return np.bool_
+        return np.result_type(*(term.type for term in flatten(self)))
+
 
-_cmp_ops_syms = '>', '<', '>=', '<=', '==', '!='
-_cmp_ops_funcs = op.gt, op.lt, op.ge, op.le, op.eq, op.ne
+_cmp_ops_syms = '>', '<', '>=', '<=', '==', '!=', '='
+_cmp_ops_funcs = op.gt, op.lt, op.ge, op.le, op.eq, op.ne, op.eq
 _cmp_ops_dict = dict(zip(_cmp_ops_syms, _cmp_ops_funcs))
 
 _bool_ops_syms = '&', '|'

From 401bc288cc7e145a8a4076376204d59793e94b02 Mon Sep 17 00:00:00 2001
From: Phillip Cloud <cpcloud@gmail.com>
Date: Sat, 6 Jul 2013 16:25:58 -0400
Subject: [PATCH 37/48] ENH: rewrite assignment as equal comparison

---
 pandas/computation/expr.py            | 47 ++++++++++++++++++++++++---
 pandas/computation/tests/test_eval.py |  4 ++-
 2 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py
index 6d33f6ac50a0d..9a9cd226278bc 100644
--- a/pandas/computation/expr.py
+++ b/pandas/computation/expr.py
@@ -1,7 +1,12 @@
 import ast
 import sys
+import itertools
+import tokenize
+import re
+from cStringIO import StringIO
 from functools import partial
 
+
 from pandas.core.base import StringMixin
 from pandas.computation.ops import BinOp, UnaryOp, _reductions, _mathops
 from pandas.computation.ops import _cmp_ops_syms, _bool_ops_syms
@@ -26,12 +31,38 @@ class ExprParserError(Exception):
     pass
 
 
+def _rewrite_assign(source):
+    res = []
+    g = tokenize.generate_tokens(StringIO(source).readline)
+    for toknum, tokval, _, _, _ in g:
+        res.append((toknum, '==' if tokval == '=' else tokval))
+    return tokenize.untokenize(res)
+
+
+def _parenthesize_booleans(source, ops='|&'):
+    res = source
+    for op in ops:
+        terms = res.split(op)
+
+        t = []
+        for term in terms:
+            t.append('({0})'.format(term))
+
+        res = op.join(t)
+    return res
+
+
+def preparse(source):
+    return _parenthesize_booleans(_rewrite_assign(source))
+
+
 class ExprVisitor(ast.NodeVisitor):
     """Custom ast walker
     """
     bin_ops = _cmp_ops_syms + _bool_ops_syms + _arith_ops_syms
-    bin_op_nodes = ('Gt', 'Lt', 'GtE', 'LtE', 'Eq', 'NotEq', 'BitAnd', 'BitOr',
-                    'Add', 'Sub', 'Mult', 'Div', 'Pow', 'FloorDiv', 'Mod')
+    bin_op_nodes = ('Gt', 'Lt', 'GtE', 'LtE', 'Eq', 'NotEq', None,
+                    'BitAnd', 'BitOr', 'Add', 'Sub', 'Mult', 'Div', 'Pow',
+                    'FloorDiv', 'Mod')
     bin_op_nodes_map = dict(zip(bin_ops, bin_op_nodes))
 
     unary_ops = _unary_ops_syms
@@ -39,7 +70,7 @@ class ExprVisitor(ast.NodeVisitor):
     unary_op_nodes_map = dict(zip(unary_ops, unary_op_nodes))
 
     def __init__(self, env):
-        for bin_op in self.bin_ops:
+        for bin_op in itertools.ifilter(lambda x: x is not None, self.bin_ops):
             setattr(self, 'visit_{0}'.format(self.bin_op_nodes_map[bin_op]),
                     lambda node, bin_op=bin_op: partial(BinOp, bin_op))
 
@@ -54,7 +85,7 @@ def visit(self, node):
             raise TypeError('"node" must be an AST node or a string, you'
                             ' passed a(n) {0}'.format(node.__class__))
         if isinstance(node, basestring):
-            node = ast.fix_missing_locations(ast.parse(node))
+            node = ast.fix_missing_locations(ast.parse(preparse(node)))
         return super(ExprVisitor, self).visit(node)
 
     def visit_Module(self, node):
@@ -62,7 +93,7 @@ def visit_Module(self, node):
             raise ExprParserError('only a single expression is allowed')
 
         expr = node.body[0]
-        if not isinstance(expr, ast.Expr):
+        if not isinstance(expr, (ast.Expr, ast.Assign)):
             raise SyntaxError('only expressions are allowed')
 
         return self.visit(expr)
@@ -95,6 +126,12 @@ def visit_Compare(self, node):
             raise ExprParserError('chained comparisons not supported')
         return self.visit(ops[0])(self.visit(node.left), self.visit(comps[0]))
 
+    def visit_Assign(self, node):
+        cmpr = ast.copy_location(ast.Compare(ops=[ast.Eq()],
+                                             left=node.targets[0],
+                                             comparators=[node.value]), node)
+        return self.visit(cmpr)
+
     def visit_Call(self, node):
         if not isinstance(node.func, ast.Name):
             raise TypeError("Only named functions are supported")
diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py
index fc1cccf320201..6ec630b80614d 100644
--- a/pandas/computation/tests/test_eval.py
+++ b/pandas/computation/tests/test_eval.py
@@ -52,7 +52,9 @@ def _eval_from_expr(lhs, cmp1, rhs, binop, cmp2):
     env = Scope()
     typ, axes = _align_core((Term('lhs', env), Term('rhs', env)))
     lhs, rhs = env.locals['lhs'], env.locals['rhs']
-    return _reconstruct_object(typ, bf(f1(lhs, rhs), f2(lhs, rhs)), axes)
+    ret_type = np.result_type(lhs, rhs)
+    return _reconstruct_object(typ, bf(f1(lhs, rhs), f2(lhs, rhs)), axes,
+                               ret_type)
 
 
 def _eval_single_bin(lhs, cmp1, rhs, has_neg_frac):

From 22dedcb87392276c057882c5e1c5e71f06c2e83d Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Sat, 6 Jul 2013 15:52:29 -0400
Subject: [PATCH 38/48] ENH: initial commit for adding Expr based terms for
 pytables support

---
 pandas/computation/engines.py    |   3 +-
 pandas/computation/expr.py       |  51 +++---
 pandas/computation/ops.py        |  56 +++---
 pandas/computation/pytables.py   | 281 +++++++++++++++++++++++++++++++
 pandas/io/pytables.py            | 276 +-----------------------------
 pandas/io/tests/test_pytables.py |  39 +++++
 6 files changed, 389 insertions(+), 317 deletions(-)
 create mode 100644 pandas/computation/pytables.py

diff --git a/pandas/computation/engines.py b/pandas/computation/engines.py
index 7f500dccb825b..c28e88bdef887 100644
--- a/pandas/computation/engines.py
+++ b/pandas/computation/engines.py
@@ -76,5 +76,4 @@ def evaluate(self):
     def _evaluate(self, env):
         pass
 
-
-_engines = {'numexpr': NumExprEngine, 'python': PythonEngine}
+_engines = {'numexpr': NumExprEngine, 'python': PythonEngine }
diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py
index 9a9cd226278bc..10ca7e1083983 100644
--- a/pandas/computation/expr.py
+++ b/pandas/computation/expr.py
@@ -13,7 +13,6 @@
 from pandas.computation.ops import _arith_ops_syms, _unary_ops_syms
 from pandas.computation.ops import Term, Constant
 
-
 class Scope(object):
     __slots__ = 'globals', 'locals'
 
@@ -26,7 +25,6 @@ def __init__(self, gbls=None, lcls=None, frame_level=1):
         finally:
             del frame
 
-
 class ExprParserError(Exception):
     pass
 
@@ -80,15 +78,28 @@ def __init__(self, env):
                     lambda node, unary_op=unary_op: partial(UnaryOp, unary_op))
         self.env = env
 
-    def visit(self, node):
+    def generic_visit(self, node, **kwargs):
+        """Called if no explicit visitor function exists for a node."""
+        for field, value in iter_fields(node):
+            if isinstance(value, list):
+                for item in value:
+                    if isinstance(item, AST):
+                        self.visit(item, **kwargs)
+            elif isinstance(value, AST):
+                self.visit(value, **kwargs)
+
+    def visit(self, node, **kwargs):
         if not (isinstance(node, ast.AST) or isinstance(node, basestring)):
             raise TypeError('"node" must be an AST node or a string, you'
                             ' passed a(n) {0}'.format(node.__class__))
         if isinstance(node, basestring):
             node = ast.fix_missing_locations(ast.parse(preparse(node)))
-        return super(ExprVisitor, self).visit(node)
 
-    def visit_Module(self, node):
+        method = 'visit_' + node.__class__.__name__
+        visitor = getattr(self, method, self.generic_visit)
+        return visitor(node, **kwargs)
+
+    def visit_Module(self, node, **kwargs):
         if len(node.body) != 1:
             raise ExprParserError('only a single expression is allowed')
 
@@ -96,43 +107,43 @@ def visit_Module(self, node):
         if not isinstance(expr, (ast.Expr, ast.Assign)):
             raise SyntaxError('only expressions are allowed')
 
-        return self.visit(expr)
+        return self.visit(expr, **kwargs)
 
-    def visit_Expr(self, node):
-        return self.visit(node.value)
+    def visit_Expr(self, node, **kwargs):
+        return self.visit(node.value, **kwargs)
 
-    def visit_BinOp(self, node):
+    def visit_BinOp(self, node, **kwargs):
         op = self.visit(node.op)
-        left = self.visit(node.left)
-        right = self.visit(node.right)
+        left = self.visit(node.left,side='left')
+        right = self.visit(node.right,side='right')
         return op(left, right)
 
-    def visit_UnaryOp(self, node):
+    def visit_UnaryOp(self, node, **kwargs):
         if isinstance(node.op, ast.Not):
             raise NotImplementedError("not operator not yet supported")
         op = self.visit(node.op)
         return op(self.visit(node.operand))
 
-    def visit_Name(self, node):
+    def visit_Name(self, node, **kwargs):
         return Term(node.id, self.env)
 
-    def visit_Num(self, node):
+    def visit_Num(self, node, **kwargs):
         return Constant(node.n, self.env)
 
-    def visit_Compare(self, node):
+    def visit_Compare(self, node, **kwargs):
         ops = node.ops
         comps = node.comparators
         if len(ops) != 1:
             raise ExprParserError('chained comparisons not supported')
-        return self.visit(ops[0])(self.visit(node.left), self.visit(comps[0]))
+        return self.visit(ops[0])(self.visit(node.left,side='left'), self.visit(comps[0],side='right'))
 
-    def visit_Assign(self, node):
+    def visit_Assign(self, node, **kwargs):
         cmpr = ast.copy_location(ast.Compare(ops=[ast.Eq()],
                                              left=node.targets[0],
                                              comparators=[node.value]), node)
         return self.visit(cmpr)
 
-    def visit_Call(self, node):
+    def visit_Call(self, node, **kwargs):
         if not isinstance(node.func, ast.Name):
             raise TypeError("Only named functions are supported")
 
@@ -143,10 +154,10 @@ def visit_Call(self, node):
 
         raise NotImplementedError("function calls not yet supported")
 
-    def visit_Attribute(self, node):
+    def visit_Attribute(self, node, **kwargs):
         raise NotImplementedError("attribute access is not yet supported")
 
-    def visit_BoolOp(self, node):
+    def visit_BoolOp(self, node, **kwargs):
         raise NotImplementedError("boolean operators are not yet supported")
 
 
diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py
index ca5f6d4872a72..76e5497d48175 100644
--- a/pandas/computation/ops.py
+++ b/pandas/computation/ops.py
@@ -25,36 +25,12 @@ class BinaryOperatorError(OperatorError):
     pass
 
 
-def _resolve_name(env, key):
-    res = env.locals.get(key, env.globals.get(key))
-
-    if res is None:
-        if not isinstance(key, basestring):
-            return key
-
-        raise NameError('name {0!r} is not defined'.format(key))
-
-    return res
-
-
-def _update_name(env, key, value):
-    if isinstance(key, basestring):
-        try:
-            del env.locals[key]
-            env.locals[key] = value
-        except KeyError:
-            try:
-                del env.globals[key]
-                env.globals[key] = value
-            except KeyError:
-                raise NameError('name {0!r} is not defined'.format(key))
-
-
 class Term(StringMixin):
-    def __init__(self, name, env):
+    def __init__(self, name, env, side=None):
         self.name = name
         self.env = env
-        self.value = _resolve_name(self.env, self.name)
+        self.side = side
+        self.value = self._resolve_name()
 
         try:
             # ndframe potentially very slow for large, mixed dtype frames
@@ -70,8 +46,32 @@ def __init__(self, name, env):
     def __unicode__(self):
         return com.pprint_thing(self.name)
 
+    def _resolve_name(self):
+        env = self.env
+        key = self.name
+        res = env.locals.get(key, env.globals.get(key))
+
+        if res is None:
+            if not isinstance(key, basestring):
+                return key
+
+            raise NameError('name {0!r} is not defined'.format(key))
+        return res
+
     def update(self, value):
-        _update_name(self.env, self.name, value)
+        env = self.env
+        key = self.name
+        if isinstance(key, basestring):
+            try:
+                del env.locals[key]
+                env.locals[key] = value
+            except KeyError:
+                try:
+                    del env.globals[key]
+                    env.globals[key] = value
+                except KeyError:
+                    raise NameError('{0!r} is undefined'.format(key))
+
         self.value = value
 
     @property
diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py
new file mode 100644
index 0000000000000..7f070adff7054
--- /dev/null
+++ b/pandas/computation/pytables.py
@@ -0,0 +1,281 @@
+import sys
+import re
+import ast
+from functools import partial
+
+from pandas.computation import expr, ops
+from pandas.computation.ops import is_term
+from pandas.computation.expr import ExprParserError
+
+class Scope(expr.Scope):
+    __slots__ = 'globals', 'locals', 'queryables'
+
+    def __init__(self, gbls=None, lcls=None, queryables=None, frame_level=1):
+        super(Scope, self).__init__(gbls=gbls, lcls=lcls, frame_level=frame_level)
+        self.queryables = queryables or dict()
+
+class Term(ops.Term):
+
+    def __init__(self, name, env, side=None):
+        super(Term, self).__init__(name, env, side=side)
+
+    def _resolve_name(self):
+
+        # must be a queryable
+        if self.side == 'left':
+            if self.name not in self.env.queryables:
+                raise NameError('name {0!r} is not defined'.format(self.name))
+            return self.name
+
+        # resolve the rhs (and allow to be None)
+        return self.env.locals.get(self.name, self.env.globals.get(self.name,self.name))
+
+def format_value(q, lhs, v):
+    """ given a queryable, a lhs name and value, return a formatted value """
+    return v
+
+class BinOp(ops.BinOp):
+
+    def __call__(self, q):
+        left, right = self.lhs, self.rhs
+
+        # base cases
+        if is_term(left) and is_term(right):
+            res = "(%s %s %s)" % (left.value,self.op,format_value(q, left.value, right.value))
+        elif not is_term(left) and is_term(right):
+            res = "(%s %s %s)" % (left(q),self.op,right.value)
+        elif is_term(left) and not is_term(right):
+            res = "(%s %s %s)" % (left.value,self.op,right(q))
+        elif not (is_term(left) or is_term(right)):
+            res = "(%s %s %s)" % (left(q),self.op,right(q))
+
+        return res
+
+class UnaryOp(ops.UnaryOp):
+    def __call__(self, q):
+        operand = self.operand
+        v = operand.value if is_term(operand) else operand
+        return "%s (%s)" % (operand,v)
+
+class ExprVisitor(expr.ExprVisitor):
+
+    bin_ops = '>', '<', '>=', '<=', '==', '!=', '&', '|'
+    bin_op_nodes = ('Gt', 'Lt', 'GtE', 'LtE', 'Eq', 'NotEq', 'BitAnd', 'BitOr')
+    bin_op_nodes_map = dict(zip(bin_ops, bin_op_nodes))
+
+    unary_ops =  ['~']
+    unary_op_nodes = 'Invert'
+    unary_op_nodes_map = dict(zip(unary_ops, unary_op_nodes))
+
+    def __init__(self, env):
+        for bin_op in self.bin_ops:
+            setattr(self, 'visit_{0}'.format(self.bin_op_nodes_map[bin_op]),
+                    lambda node, bin_op=bin_op: partial(BinOp, bin_op))
+
+        for unary_op in self.unary_ops:
+            setattr(self,
+                    'visit_{0}'.format(self.unary_op_nodes_map[unary_op]),
+                    lambda node, unary_op=unary_op: partial(UnaryOp, unary_op))
+        self.env = env
+
+    def visit_Module(self, node, **kwargs):
+        if len(node.body) != 1:
+            raise ExprParserError('only a single expression is allowed')
+
+        body = node.body[0]
+        return self.visit(body)
+
+    def visit_Compare(self, node, **kwargs):
+        ops = node.ops
+        comps = node.comparators
+        for op, comp in zip(ops, comps):
+            node = self.visit(op)(self.visit(node.left,side='left'), self.visit(comp,side='right'))
+        return node
+
+    def visit_Name(self, node, side=None, **kwargs):
+        return Term(node.id, self.env, side=side)
+
+class Expr(expr.Expr):
+
+    """ hold a pytables like expression, comprised of possibly multiple 'terms'
+
+    Parameters
+    ----------
+    field : dict, string term expression, or the field to operate (must be a valid index/column type of DataFrame/Panel)
+    queryables : a kinds map (dict of column name -> kind), or None i column is non-indexable
+    encoding : an encoding that will encode the query terms
+
+    Returns
+    -------
+    an Expr object
+
+    Examples
+    --------
+    """
+
+    _max_selectors = 31
+
+    def __init__(self, expression, queryables=None, encoding=None):
+        self.expr = self.pre_parse(expression)
+        self.env = Scope(queryables=queryables,frame_level=2)
+        self._visitor = ExprVisitor(self.env)
+        self.terms = self.parse()
+        self.encoding = encoding
+        self.condition = None
+        self.filter = None
+
+    def pre_parse(self, expression):
+        """ transform = to == """
+        expression = re.sub("=+","==",expression)
+        return expression
+
+    def evaluate(self):
+        """ create and return the numexpr condition and filter """
+        import pdb; pdb.set_trace()
+        terms = []
+        filter = []
+
+        self.terms(self.env)
+        #for t in self.terms:
+
+        terms = [t for t in self.terms if t.condition is not None]
+        if len(terms):
+            self.condition = "(%s)" % ' & '.join(
+                [t.condition for t in terms])
+            self.filter = []
+            for t in self.terms:
+                if t.filter is not None:
+                    self.filter.append(t.filter)
+
+
+    @property
+    def is_valid(self):
+        """ return True if this is a valid field """
+        return self.field in self.q
+
+    @property
+    def is_in_table(self):
+        """ return True if this is a valid column name for generation (e.g. an actual column in the table) """
+        return self.q.get(self.field) is not None
+
+    @property
+    def kind(self):
+        """ the kind of my field """
+        return self.q.get(self.field)
+
+    def generate(self, v):
+        """ create and return the op string for this TermValue """
+        val = v.tostring(self.encoding)
+        return "(%s %s %s)" % (self.field, self.op, val)
+
+        """ set the numexpr expression for this term """
+
+        if not self.is_valid:
+            raise ValueError("query term is not valid [%s]" % str(self))
+
+        # convert values if we are in the table
+        if self.is_in_table:
+            values = [self.convert_value(v) for v in self.value]
+        else:
+            values = [TermValue(v, v, self.kind) for v in self.value]
+
+        # equality conditions
+        if self.op in ['==', '!=']:
+
+            # our filter op expression
+            if self.op == '!=':
+                filter_op = lambda axis, vals: not axis.isin(vals)
+            else:
+                filter_op = lambda axis, vals: axis.isin(vals)
+
+            if self.is_in_table:
+
+                # too many values to create the expression?
+                if len(values) <= self._max_selectors:
+                    vs = [self.generate(v) for v in values]
+                    self.condition = "(%s)" % ' | '.join(vs)
+
+                # use a filter after reading
+                else:
+                    self.filter = (
+                        self.field,
+                        filter_op,
+                        Index([v.value for v in values]))
+
+            else:
+
+                self.filter = (
+                    self.field,
+                    filter_op,
+                    Index([v.value for v in values]))
+
+        else:
+
+            if self.is_in_table:
+
+                self.condition = self.generate(values[0])
+
+            else:
+
+                raise TypeError(
+                    "passing a filterable condition to a non-table indexer [%s]" %
+                    str(self))
+
+    def convert_value(self, v):
+        """ convert the expression that is in the term to something that is accepted by pytables """
+
+        def stringify(value):
+            value = str(value)
+            if self.encoding is not None:
+                value = value.encode(self.encoding)
+            return value
+
+        kind = _ensure_decoded(self.kind)
+        if kind == u'datetime64' or kind == u'datetime':
+            v = lib.Timestamp(v)
+            if v.tz is not None:
+                v = v.tz_convert('UTC')
+            return TermValue(v, v.value, kind)
+        elif isinstance(v, datetime) or hasattr(v, 'timetuple') or kind == u'date':
+            v = time.mktime(v.timetuple())
+            return TermValue(v, Timestamp(v), kind)
+        elif kind == u'integer':
+            v = int(float(v))
+            return TermValue(v, v, kind)
+        elif kind == u'float':
+            v = float(v)
+            return TermValue(v, v, kind)
+        elif kind == u'bool':
+            if isinstance(v, basestring):
+                v = not v.strip().lower() in [
+                    u'false', u'f', u'no', u'n', u'none', u'0', u'[]', u'{}', u'']
+            else:
+                v = bool(v)
+            return TermValue(v, v, kind)
+        elif not isinstance(v, basestring):
+            v = stringify(v)
+            return TermValue(v, stringify(v), u'string')
+
+        # string quoting
+        return TermValue(v, stringify(v), u'string')
+
+
+class TermValue(object):
+
+    """ hold a term value the we use to construct a condition/filter """
+
+    def __init__(self, value, converted, kind):
+        self.value = value
+        self.converted = converted
+        self.kind = kind
+
+    def tostring(self, encoding):
+        """ quote the string if not encoded
+            else encode and return """
+        if self.kind == u'string':
+            if encoding is not None:
+                return self.converted
+            return '"%s"' % self.converted
+        return self.converted
+
+
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 0f84884d51340..5e45cc4d45e3c 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -29,6 +29,7 @@
 from pandas.tools.merge import concat
 from pandas.util import py3compat
 from pandas.io.common import PerformanceWarning
+from pandas.computation.pytables import Expr
 
 import pandas.lib as lib
 import pandas.algos as algos
@@ -3652,253 +3653,7 @@ def _need_convert(kind):
         return True
     return False
 
-
-class Term(StringMixin):
-
-    """create a term object that holds a field, op, and value
-
-    Parameters
-    ----------
-    field : dict, string term expression, or the field to operate (must be a valid index/column type of DataFrame/Panel)
-    op    : a valid op (defaults to '=') (optional)
-            >, >=, <, <=, =, != (not equal) are allowed
-    value : a value or list of values (required)
-    queryables : a kinds map (dict of column name -> kind), or None i column is non-indexable
-    encoding : an encoding that will encode the query terms
-
-    Returns
-    -------
-    a Term object
-
-    Examples
-    --------
-    >>> Term(dict(field = 'index', op = '>', value = '20121114'))
-    >>> Term('index', '20121114')
-    >>> Term('index', '>', '20121114')
-    >>> Term('index', ['20121114','20121114'])
-    >>> Term('index', datetime(2012,11,14))
-    >>> Term('major_axis>20121114')
-    >>> Term('minor_axis', ['A','U'])
-    """
-
-    _ops = ['<=', '<', '>=', '>', '!=', '==', '=']
-    _search = re.compile(
-        "^\s*(?P<field>\w+)\s*(?P<op>%s)\s*(?P<value>.+)\s*$" %
-        '|'.join(_ops))
-    _max_selectors = 31
-
-    def __init__(self, field, op=None,
-                 value=None, queryables=None, encoding=None):
-        self.field = None
-        self.op = None
-        self.value = None
-        self.q = queryables or dict()
-        self.filter = None
-        self.condition = None
-        self.encoding = encoding
-
-        # unpack lists/tuples in field
-        while(isinstance(field, (tuple, list))):
-            f = field
-            field = f[0]
-            if len(f) > 1:
-                op = f[1]
-            if len(f) > 2:
-                value = f[2]
-
-        # backwards compatible
-        if isinstance(field, dict):
-            self.field = field.get('field')
-            self.op = field.get('op') or '=='
-            self.value = field.get('value')
-
-        # passed a term
-        elif isinstance(field, Term):
-            self.field = field.field
-            self.op = field.op
-            self.value = field.value
-
-        # a string expression (or just the field)
-        elif isinstance(field, basestring):
-
-            # is a term is passed
-            s = self._search.match(field)
-            if s is not None:
-                self.field = s.group('field')
-                self.op = s.group('op')
-                self.value = s.group('value')
-
-            else:
-                self.field = field
-
-                # is an op passed?
-                if isinstance(op, basestring) and op in self._ops:
-                    self.op = op
-                    self.value = value
-                else:
-                    self.op = '=='
-                    self.value = op
-
-        else:
-            raise ValueError(
-                "Term does not understand the supplied field [%s]" % field)
-
-        # we have valid fields
-        if self.field is None or self.op is None or self.value is None:
-            raise ValueError("Could not create this term [%s]" % str(self))
-
-         # = vs ==
-        if self.op == '=':
-            self.op = '=='
-
-        # we have valid conditions
-        if self.op in ['>', '>=', '<', '<=']:
-            if hasattr(self.value, '__iter__') and len(self.value) > 1 and not isinstance(self.value, basestring):
-                raise ValueError(
-                    "an inequality condition cannot have multiple values [%s]" %
-                    str(self))
-
-        if not is_list_like(self.value):
-            self.value = [self.value]
-
-        if len(self.q):
-            self.eval()
-
-    def __unicode__(self):
-        attrs = map(pprint_thing, (self.field, self.op, self.value))
-        return "field->%s,op->%s,value->%s" % tuple(attrs)
-
-    @property
-    def is_valid(self):
-        """ return True if this is a valid field """
-        return self.field in self.q
-
-    @property
-    def is_in_table(self):
-        """ return True if this is a valid column name for generation (e.g. an actual column in the table) """
-        return self.q.get(self.field) is not None
-
-    @property
-    def kind(self):
-        """ the kind of my field """
-        return self.q.get(self.field)
-
-    def generate(self, v):
-        """ create and return the op string for this TermValue """
-        val = v.tostring(self.encoding)
-        return "(%s %s %s)" % (self.field, self.op, val)
-
-    def eval(self):
-        """ set the numexpr expression for this term """
-
-        if not self.is_valid:
-            raise ValueError("query term is not valid [%s]" % str(self))
-
-        # convert values if we are in the table
-        if self.is_in_table:
-            values = [self.convert_value(v) for v in self.value]
-        else:
-            values = [TermValue(v, v, self.kind) for v in self.value]
-
-        # equality conditions
-        if self.op in ['==', '!=']:
-
-            # our filter op expression
-            if self.op == '!=':
-                filter_op = lambda axis, vals: not axis.isin(vals)
-            else:
-                filter_op = lambda axis, vals: axis.isin(vals)
-
-            if self.is_in_table:
-
-                # too many values to create the expression?
-                if len(values) <= self._max_selectors:
-                    vs = [self.generate(v) for v in values]
-                    self.condition = "(%s)" % ' | '.join(vs)
-
-                # use a filter after reading
-                else:
-                    self.filter = (
-                        self.field,
-                        filter_op,
-                        Index([v.value for v in values]))
-
-            else:
-
-                self.filter = (
-                    self.field,
-                    filter_op,
-                    Index([v.value for v in values]))
-
-        else:
-
-            if self.is_in_table:
-
-                self.condition = self.generate(values[0])
-
-            else:
-
-                raise TypeError(
-                    "passing a filterable condition to a non-table indexer [%s]" %
-                    str(self))
-
-    def convert_value(self, v):
-        """ convert the expression that is in the term to something that is accepted by pytables """
-
-        def stringify(value):
-            value = str(value)
-            if self.encoding is not None:
-                value = value.encode(self.encoding)
-            return value
-
-        kind = _ensure_decoded(self.kind)
-        if kind == u'datetime64' or kind == u'datetime':
-            v = lib.Timestamp(v)
-            if v.tz is not None:
-                v = v.tz_convert('UTC')
-            return TermValue(v, v.value, kind)
-        elif isinstance(v, datetime) or hasattr(v, 'timetuple') or kind == u'date':
-            v = time.mktime(v.timetuple())
-            return TermValue(v, Timestamp(v), kind)
-        elif kind == u'integer':
-            v = int(float(v))
-            return TermValue(v, v, kind)
-        elif kind == u'float':
-            v = float(v)
-            return TermValue(v, v, kind)
-        elif kind == u'bool':
-            if isinstance(v, basestring):
-                v = not v.strip().lower() in [
-                    u'false', u'f', u'no', u'n', u'none', u'0', u'[]', u'{}', u'']
-            else:
-                v = bool(v)
-            return TermValue(v, v, kind)
-        elif not isinstance(v, basestring):
-            v = stringify(v)
-            return TermValue(v, stringify(v), u'string')
-
-        # string quoting
-        return TermValue(v, stringify(v), u'string')
-
-
-class TermValue(object):
-
-    """ hold a term value the we use to construct a condition/filter """
-
-    def __init__(self, value, converted, kind):
-        self.value = value
-        self.converted = converted
-        self.kind = kind
-
-    def tostring(self, encoding):
-        """ quote the string if not encoded
-            else encode and return """
-        if self.kind == u'string':
-            if encoding is not None:
-                return self.converted
-            return '"%s"' % self.converted
-        return self.converted
-
+Term = Expr
 
 class Coordinates(object):
 
@@ -3951,34 +3706,21 @@ def __init__(self, table, where=None, start=None, stop=None, **kwargs):
             self.terms = self.generate(where)
 
             # create the numexpr & the filter
-            if self.terms:
-                terms = [t for t in self.terms if t.condition is not None]
-                if len(terms):
-                    self.condition = "(%s)" % ' & '.join(
-                        [t.condition for t in terms])
-                self.filter = []
-                for t in self.terms:
-                    if t.filter is not None:
-                        self.filter.append(t.filter)
+            if self.terms is not None:
+                self.condition, self.filter = self.terms.evaluate()
 
     def generate(self, where):
         """ where can be a : dict,list,tuple,string """
         if where is None:
             return None
 
-        if not isinstance(where, (list, tuple)):
-            where = [where]
-        else:
-
-            # make this a list of we think that we only have a sigle term & no
-            # operands inside any terms
-            if not any([isinstance(w, (list, tuple, Term)) for w in where]):
-
-                if not any([isinstance(w, basestring) and Term._search.match(w) for w in where]):
-                    where = [where]
+        if isinstance(where, basestring):
+            pass
+        elif isinstance(where, (list, tuple)):
+            where = ' & ' .join([ "(%s)" for w in where])
 
         queryables = self.table.queryables()
-        return [Term(c, queryables=queryables, encoding=self.table.encoding) for c in where]
+        return Expr(where, queryables=queryables, encoding=self.table.encoding)
 
     def select(self):
         """
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 6737408081f3d..e339eeb69692d 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -581,6 +581,7 @@ def test_append_frame_column_oriented(self):
             store.append('df1', df.ix[:, 2:])
             tm.assert_frame_equal(store['df1'], df)
 
+            result = store.select('df1', '(columns=A) | (columns=B)')
             result = store.select('df1', 'columns=A')
             expected = df.reindex(columns=['A'])
             tm.assert_frame_equal(expected, result)
@@ -1605,6 +1606,44 @@ def test_terms(self):
             for t in terms:
                 store.select('p4d', t)
 
+    def test_eval(self):
+        """ test evaluation using new terms """
+
+        with ensure_clean(self.path) as store:
+
+            wp = tm.makePanel()
+            p4d = tm.makePanel4D()
+
+            # valid terms
+            terms = [
+                dict(field='major_axis', op='>', value='20121114'),
+                ('major_axis', '20121114'),
+                ('major_axis', '>', '20121114'),
+                (('major_axis', ['20121114', '20121114']),),
+                ('major_axis', datetime.datetime(2012, 11, 14)),
+                'major_axis> 20121114',
+                'major_axis >20121114',
+                'major_axis > 20121114',
+                (('minor_axis', ['A', 'B']),),
+                (('minor_axis', ['A', 'B']),),
+                ((('minor_axis', ['A', 'B']),),),
+                (('items', ['ItemA', 'ItemB']),),
+                ('items=ItemA'),
+                ]
+
+            for t in terms:
+                store.select('wp', t)
+                store.select('p4d', t)
+
+            # valid for p4d only
+            terms = [
+                (('labels', '=', ['l1', 'l2']),),
+                Term('labels', '=', ['l1', 'l2']),
+                ]
+
+            for t in terms:
+                store.select('p4d', t)
+
     def test_series(self):
 
         s = tm.makeStringSeries()

From 441285c9c64cc08b701cbf81ce78a5fa567f2c67 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Sat, 6 Jul 2013 20:27:58 -0400
Subject: [PATCH 39/48] WIP: still some debugging statements in

---
 pandas/computation/expr.py       |   6 +-
 pandas/computation/pytables.py   | 406 +++++++++++++++++++------------
 pandas/io/pytables.py            |   7 +-
 pandas/io/tests/test_pytables.py |  11 +-
 4 files changed, 268 insertions(+), 162 deletions(-)

diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py
index 10ca7e1083983..2104a437b1ba3 100644
--- a/pandas/computation/expr.py
+++ b/pandas/computation/expr.py
@@ -80,12 +80,12 @@ def __init__(self, env):
 
     def generic_visit(self, node, **kwargs):
         """Called if no explicit visitor function exists for a node."""
-        for field, value in iter_fields(node):
+        for field, value in ast.iter_fields(node):
             if isinstance(value, list):
                 for item in value:
-                    if isinstance(item, AST):
+                    if isinstance(item, ast.AST):
                         self.visit(item, **kwargs)
-            elif isinstance(value, AST):
+            elif isinstance(value, ast.AST):
                 self.visit(value, **kwargs)
 
     def visit(self, node, **kwargs):
diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py
index 7f070adff7054..931c0ffe7e0fb 100644
--- a/pandas/computation/pytables.py
+++ b/pandas/computation/pytables.py
@@ -1,12 +1,24 @@
-import sys
+import sys, inspect
 import re
 import ast
 from functools import partial
+import numpy as np
+from datetime import datetime
+import time
 
+import pandas
+import pandas.core.common as com
+import pandas.lib as lib
 from pandas.computation import expr, ops
 from pandas.computation.ops import is_term
 from pandas.computation.expr import ExprParserError
 
+def _ensure_decoded(s):
+    """ if we have bytes, decode them to unicde """
+    if isinstance(s, np.bytes_):
+        s = s.decode('UTF-8')
+    return s
+
 class Scope(expr.Scope):
     __slots__ = 'globals', 'locals', 'queryables'
 
@@ -21,7 +33,7 @@ def __init__(self, name, env, side=None):
 
     def _resolve_name(self):
 
-        # must be a queryable
+        # must be a queryables
         if self.side == 'left':
             if self.name not in self.env.queryables:
                 raise NameError('name {0!r} is not defined'.format(self.name))
@@ -30,29 +42,224 @@ def _resolve_name(self):
         # resolve the rhs (and allow to be None)
         return self.env.locals.get(self.name, self.env.globals.get(self.name,self.name))
 
-def format_value(q, lhs, v):
-    """ given a queryable, a lhs name and value, return a formatted value """
-    return v
-
 class BinOp(ops.BinOp):
 
-    def __call__(self, q):
+    def __init__(self, op, lhs, rhs, queryables, encoding):
+        super(BinOp, self).__init__(op, lhs, rhs)
+        self.queryables = queryables
+        self.encoding = encoding
+        self.filter = None
+        self.condition = None
+
+    def prune(self, klass):
+
+        def pr(left, right):
+            """ create and return a new specilized BinOp from myself """
+
+            if left is None:
+                return right
+            elif right is None:
+                return left
+
+            k = klass
+            if isinstance(left, ConditionBinOp):
+                if isinstance(left, ConditionBinOp) and isinstance(right, ConditionBinOp):
+                    k = JointConditionBinOp
+                elif isinstance(left, k):
+                    return left
+                elif isinstance(right, k):
+                    return right
+
+            elif isinstance(left, FilterBinOp):
+                if isinstance(left, FilterBinOp) and isinstance(right, FilterBinOp):
+                    k = JointFilterBinOp
+                elif isinstance(left, k):
+                    return left
+                elif isinstance(right, k):
+                    return right
+
+            return k(self.op, left, right, queryables=self.queryables, encoding=self.encoding).evaluate()
+
         left, right = self.lhs, self.rhs
 
-        # base cases
         if is_term(left) and is_term(right):
-            res = "(%s %s %s)" % (left.value,self.op,format_value(q, left.value, right.value))
+            res = pr(left.value,right.value)
         elif not is_term(left) and is_term(right):
-            res = "(%s %s %s)" % (left(q),self.op,right.value)
+            res = pr(left.prune(klass),right.value)
         elif is_term(left) and not is_term(right):
-            res = "(%s %s %s)" % (left.value,self.op,right(q))
+            res = pr(left.value,right.prune(klass))
         elif not (is_term(left) or is_term(right)):
-            res = "(%s %s %s)" % (left(q),self.op,right(q))
+            res = pr(left.prune(klass),right.prune(klass))
 
         return res
 
+    @property
+    def is_valid(self):
+        """ return True if this is a valid field """
+        return self.lhs in self.queryables
+
+    @property
+    def is_in_table(self):
+        """ return True if this is a valid column name for generation (e.g. an actual column in the table) """
+        return self.queryables.get(self.lhs) is not None
+
+    @property
+    def kind(self):
+        """ the kind of my field """
+        return self.queryables.get(self.lhs)
+
+    def generate(self, v):
+        """ create and return the op string for this TermValue """
+        val = v.tostring(self.encoding)
+        return "(%s %s %s)" % (self.lhs, self.op, val)
+
+    def convert_value(self, v):
+        """ convert the expression that is in the term to something that is accepted by pytables """
+
+        def stringify(value):
+            value = str(value)
+            if self.encoding is not None:
+                value = value.encode(self.encoding)
+            return value
+
+        kind = _ensure_decoded(self.kind)
+        if kind == u'datetime64' or kind == u'datetime':
+            v = lib.Timestamp(v)
+            if v.tz is not None:
+                v = v.tz_convert('UTC')
+            return TermValue(v, v.value, kind)
+        elif isinstance(v, datetime) or hasattr(v, 'timetuple') or kind == u'date':
+            v = time.mktime(v.timetuple())
+            return TermValue(v, Timestamp(v), kind)
+        elif kind == u'integer':
+            v = int(float(v))
+            return TermValue(v, v, kind)
+        elif kind == u'float':
+            v = float(v)
+            return TermValue(v, v, kind)
+        elif kind == u'bool':
+            if isinstance(v, basestring):
+                v = not v.strip().lower() in [
+                    u'false', u'f', u'no', u'n', u'none', u'0', u'[]', u'{}', u'']
+            else:
+                v = bool(v)
+            return TermValue(v, v, kind)
+        elif not isinstance(v, basestring):
+            v = stringify(v)
+            return TermValue(v, stringify(v), u'string')
+
+        # string quoting
+        return TermValue(v, stringify(v), u'string')
+
+class FilterBinOp(BinOp):
+
+    def __unicode__(self):
+        return com.pprint_thing("[Filter : [{0}] -> [{1}]".format(self.filter[0],self.filter[1]))
+
+    def evaluate(self):
+
+        if not isinstance(self.lhs,basestring):
+            return self
+
+        if not self.is_valid:
+            raise ValueError("query term is not valid [%s]" % self)
+
+        if self.is_in_table:
+            return None
+
+        import pdb; pdb.set_trace()
+
+        if not isinstance(self.rhs, list):
+            self.rhs = [ self.rhs ]
+        values = [TermValue(v, v, self.kind) for v in self.rhs]
+
+        # equality conditions
+        if self.op in ['==', '!=']:
+
+            # our filter op expression
+            if self.op == '!=':
+                filter_op = lambda axis, vals: not axis.isin(vals)
+            else:
+                filter_op = lambda axis, vals: axis.isin(vals)
+
+            self.filter = (
+                self.lhs,
+                filter_op,
+                Index([v.value for v in values]))
+
+        else:
+
+            raise TypeError(
+                "passing a filterable condition to a non-table indexer [%s]" %
+                self)
+
+        return self
+
+class JointFilterBinOp(FilterBinOp):
+
+    def evaluate(self):
+        return self
+
+class ConditionBinOp(BinOp):
+
+    _max_selectors = 31
+
+    def __unicode__(self):
+        return com.pprint_thing("[Condition : [{0}]]".format(self.condition))
+
+    def format(self):
+        """ return the actual ne format """
+        return self.condition
+
+    def evaluate(self):
+
+        if not isinstance(self.lhs,basestring):
+            return self
+
+        if not self.is_valid:
+            raise ValueError("query term is not valid [%s]" % self)
+
+        # convert values if we are in the table
+        if not self.is_in_table:
+            return None
+
+        if not isinstance(self.rhs, list):
+            self.rhs = [ self.rhs ]
+        values = [self.convert_value(v) for v in self.rhs]
+
+        # equality conditions
+        if self.op in ['==', '!=']:
+
+            # our filter op expression
+            if self.op == '!=':
+                filter_op = lambda axis, vals: not axis.isin(vals)
+            else:
+                filter_op = lambda axis, vals: axis.isin(vals)
+
+            # too many values to create the expression?
+            if len(values) <= self._max_selectors:
+                vs = [self.generate(v) for v in values]
+                self.condition = "(%s)" % ' | '.join(vs)
+
+            # use a filter after reading
+            else:
+                return None
+
+        else:
+
+            self.condition = self.generate(values[0])
+
+        return self
+
+class JointConditionBinOp(ConditionBinOp):
+
+    def evaluate(self):
+        self.condition = "(%s %s %s)" % (self.lhs.condition,self.op,self.rhs.condition)
+        return self
+
 class UnaryOp(ops.UnaryOp):
-    def __call__(self, q):
+
+    def apply(self, func):
         operand = self.operand
         v = operand.value if is_term(operand) else operand
         return "%s (%s)" % (operand,v)
@@ -67,10 +274,10 @@ class ExprVisitor(expr.ExprVisitor):
     unary_op_nodes = 'Invert'
     unary_op_nodes_map = dict(zip(unary_ops, unary_op_nodes))
 
-    def __init__(self, env):
+    def __init__(self, env, **kwargs):
         for bin_op in self.bin_ops:
             setattr(self, 'visit_{0}'.format(self.bin_op_nodes_map[bin_op]),
-                    lambda node, bin_op=bin_op: partial(BinOp, bin_op))
+                    lambda node, bin_op=bin_op: partial(BinOp, bin_op, **kwargs))
 
         for unary_op in self.unary_ops:
             setattr(self,
@@ -95,6 +302,14 @@ def visit_Compare(self, node, **kwargs):
     def visit_Name(self, node, side=None, **kwargs):
         return Term(node.id, self.env, side=side)
 
+    def visit_Attribute(self, node, **kwargs):
+        import pdb; pdb.set_trace()
+        raise NotImplementedError("attribute access is not yet supported")
+
+    def visit_BoolOp(self, node, **kwargs):
+        import pdb; pdb.set_trace()
+        raise NotImplementedError("boolean operators are not yet supported")
+
 class Expr(expr.Expr):
 
     """ hold a pytables like expression, comprised of possibly multiple 'terms'
@@ -113,16 +328,31 @@ class Expr(expr.Expr):
     --------
     """
 
-    _max_selectors = 31
-
-    def __init__(self, expression, queryables=None, encoding=None):
-        self.expr = self.pre_parse(expression)
-        self.env = Scope(queryables=queryables,frame_level=2)
-        self._visitor = ExprVisitor(self.env)
-        self.terms = self.parse()
-        self.encoding = encoding
+    def __init__(self, expression, queryables=None, encoding=None, env=None):
+        self.expr = expression
         self.condition = None
         self.filter = None
+        self.terms = None
+        self._visitor = None
+
+        if env is None:
+            frame = inspect.currentframe()
+            try:
+                env = Scope(lcls = frame.f_back.f_locals.copy())
+            finally:
+                del frame
+        self.env = env
+
+        if queryables is not None:
+            self.env.queryables.update(queryables)
+            self._visitor = ExprVisitor(self.env, queryables=queryables, encoding=encoding)
+            self.expr = self.pre_parse(self.expr)
+            self.terms = self.parse()
+
+    def __unicode__(self):
+        if self.terms is not None:
+            return unicode(self.terms)
+        return self.expr
 
     def pre_parse(self, expression):
         """ transform = to == """
@@ -131,134 +361,10 @@ def pre_parse(self, expression):
 
     def evaluate(self):
         """ create and return the numexpr condition and filter """
-        import pdb; pdb.set_trace()
-        terms = []
-        filter = []
-
-        self.terms(self.env)
-        #for t in self.terms:
-
-        terms = [t for t in self.terms if t.condition is not None]
-        if len(terms):
-            self.condition = "(%s)" % ' & '.join(
-                [t.condition for t in terms])
-            self.filter = []
-            for t in self.terms:
-                if t.filter is not None:
-                    self.filter.append(t.filter)
-
-
-    @property
-    def is_valid(self):
-        """ return True if this is a valid field """
-        return self.field in self.q
-
-    @property
-    def is_in_table(self):
-        """ return True if this is a valid column name for generation (e.g. an actual column in the table) """
-        return self.q.get(self.field) is not None
-
-    @property
-    def kind(self):
-        """ the kind of my field """
-        return self.q.get(self.field)
-
-    def generate(self, v):
-        """ create and return the op string for this TermValue """
-        val = v.tostring(self.encoding)
-        return "(%s %s %s)" % (self.field, self.op, val)
-
-        """ set the numexpr expression for this term """
-
-        if not self.is_valid:
-            raise ValueError("query term is not valid [%s]" % str(self))
-
-        # convert values if we are in the table
-        if self.is_in_table:
-            values = [self.convert_value(v) for v in self.value]
-        else:
-            values = [TermValue(v, v, self.kind) for v in self.value]
-
-        # equality conditions
-        if self.op in ['==', '!=']:
-
-            # our filter op expression
-            if self.op == '!=':
-                filter_op = lambda axis, vals: not axis.isin(vals)
-            else:
-                filter_op = lambda axis, vals: axis.isin(vals)
-
-            if self.is_in_table:
-
-                # too many values to create the expression?
-                if len(values) <= self._max_selectors:
-                    vs = [self.generate(v) for v in values]
-                    self.condition = "(%s)" % ' | '.join(vs)
-
-                # use a filter after reading
-                else:
-                    self.filter = (
-                        self.field,
-                        filter_op,
-                        Index([v.value for v in values]))
-
-            else:
-
-                self.filter = (
-                    self.field,
-                    filter_op,
-                    Index([v.value for v in values]))
-
-        else:
-
-            if self.is_in_table:
-
-                self.condition = self.generate(values[0])
-
-            else:
-
-                raise TypeError(
-                    "passing a filterable condition to a non-table indexer [%s]" %
-                    str(self))
-
-    def convert_value(self, v):
-        """ convert the expression that is in the term to something that is accepted by pytables """
-
-        def stringify(value):
-            value = str(value)
-            if self.encoding is not None:
-                value = value.encode(self.encoding)
-            return value
-
-        kind = _ensure_decoded(self.kind)
-        if kind == u'datetime64' or kind == u'datetime':
-            v = lib.Timestamp(v)
-            if v.tz is not None:
-                v = v.tz_convert('UTC')
-            return TermValue(v, v.value, kind)
-        elif isinstance(v, datetime) or hasattr(v, 'timetuple') or kind == u'date':
-            v = time.mktime(v.timetuple())
-            return TermValue(v, Timestamp(v), kind)
-        elif kind == u'integer':
-            v = int(float(v))
-            return TermValue(v, v, kind)
-        elif kind == u'float':
-            v = float(v)
-            return TermValue(v, v, kind)
-        elif kind == u'bool':
-            if isinstance(v, basestring):
-                v = not v.strip().lower() in [
-                    u'false', u'f', u'no', u'n', u'none', u'0', u'[]', u'{}', u'']
-            else:
-                v = bool(v)
-            return TermValue(v, v, kind)
-        elif not isinstance(v, basestring):
-            v = stringify(v)
-            return TermValue(v, stringify(v), u'string')
-
-        # string quoting
-        return TermValue(v, stringify(v), u'string')
 
+        self.condition = self.terms.prune(ConditionBinOp)
+        self.filter = self.terms.prune(FilterBinOp)
+        return self.condition, self.filter
 
 class TermValue(object):
 
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 5e45cc4d45e3c..e229301e96ea8 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -3714,10 +3714,11 @@ def generate(self, where):
         if where is None:
             return None
 
+        import pdb; pd.set_trace()
         if isinstance(where, basestring):
             pass
         elif isinstance(where, (list, tuple)):
-            where = ' & ' .join([ "(%s)" for w in where])
+            where = ' & ' .join([ "(%s)" % w for w in where])
 
         queryables = self.table.queryables()
         return Expr(where, queryables=queryables, encoding=self.table.encoding)
@@ -3727,7 +3728,7 @@ def select(self):
         generate the selection
         """
         if self.condition is not None:
-            return self.table.table.readWhere(self.condition, start=self.start, stop=self.stop)
+            return self.table.table.readWhere(self.condition.format(), start=self.start, stop=self.stop)
         elif self.coordinates is not None:
             return self.table.table.readCoordinates(self.coordinates)
         return self.table.table.read(start=self.start, stop=self.stop)
@@ -3739,7 +3740,7 @@ def select_coords(self):
         if self.condition is None:
             return np.arange(self.table.nrows)
 
-        return self.table.table.getWhereList(self.condition, start=self.start, stop=self.stop, sort=True)
+        return self.table.table.getWhereList(self.condition.format(), start=self.start, stop=self.stop, sort=True)
 
 
 # utilities ###
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index e339eeb69692d..00cce4a22279d 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -581,21 +581,20 @@ def test_append_frame_column_oriented(self):
             store.append('df1', df.ix[:, 2:])
             tm.assert_frame_equal(store['df1'], df)
 
-            result = store.select('df1', '(columns=A) | (columns=B)')
             result = store.select('df1', 'columns=A')
             expected = df.reindex(columns=['A'])
             tm.assert_frame_equal(expected, result)
 
-            # this isn't supported
-            self.assertRaises(TypeError, store.select, 'df1', (
-                    'columns=A', Term('index', '>', df.index[4])))
-
             # selection on the non-indexable
             result = store.select(
-                'df1', ('columns=A', Term('index', '=', df.index[0:4])))
+                'df1', ('columns=A', Term('index=df.index[0:4]')))
             expected = df.reindex(columns=['A'], index=df.index[0:4])
             tm.assert_frame_equal(expected, result)
 
+            # this isn't supported
+            self.assertRaises(TypeError, store.select, 'df1', (
+                    'columns=A', Term('index', '>', df.index[4])))
+
     def test_append_with_different_block_ordering(self):
 
         #GH 4096; using same frames, but different block orderings

From 05a005f4629cbc065728f6853fe6ae426e6fbd35 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Sat, 6 Jul 2013 21:35:40 -0400
Subject: [PATCH 40/48] WIP: conditions working now, filtering still only ok   
   good parsing of attributes, subscripting, e.g df.index[0:4] works!

---
 pandas/computation/ops.py        |  7 +++
 pandas/computation/pytables.py   | 81 ++++++++++++++++++++++----------
 pandas/io/pytables.py            | 11 +++--
 pandas/io/tests/test_pytables.py | 19 ++++----
 4 files changed, 79 insertions(+), 39 deletions(-)

diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py
index 76e5497d48175..926b9bf9bc509 100644
--- a/pandas/computation/ops.py
+++ b/pandas/computation/ops.py
@@ -83,6 +83,13 @@ class Constant(Term):
     def __init__(self, value, env):
         super(Constant, self).__init__(value, env)
 
+class Value(Term):
+    """ a resolved value """
+    def __init__(self, value, env, name=None):
+        self.name = name
+        self.env = env
+        self.value = value
+        self.type = type(self.value)
 
 def _print_operand(opr):
     return opr.name if is_term(opr) else unicode(opr)
diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py
index 931c0ffe7e0fb..f3cb4f45874c3 100644
--- a/pandas/computation/pytables.py
+++ b/pandas/computation/pytables.py
@@ -10,8 +10,10 @@
 import pandas.core.common as com
 import pandas.lib as lib
 from pandas.computation import expr, ops
-from pandas.computation.ops import is_term
+from pandas.computation.ops import is_term, Value
 from pandas.computation.expr import ExprParserError
+from pandas import Index
+from pandas.core.common import is_list_like
 
 def _ensure_decoded(s):
     """ if we have bytes, decode them to unicde """
@@ -93,6 +95,14 @@ def pr(left, right):
 
         return res
 
+    def conform(self, rhs):
+        """ inplace conform rhs """
+        if not is_list_like(rhs):
+            rhs = [ rhs ]
+        if hasattr(self.rhs,'ravel'):
+            rhs = rhs.ravel()
+        return rhs
+
     @property
     def is_valid(self):
         """ return True if this is a valid field """
@@ -167,11 +177,8 @@ def evaluate(self):
         if self.is_in_table:
             return None
 
-        import pdb; pdb.set_trace()
-
-        if not isinstance(self.rhs, list):
-            self.rhs = [ self.rhs ]
-        values = [TermValue(v, v, self.kind) for v in self.rhs]
+        rhs = self.conform(self.rhs)
+        values = [TermValue(v, v, self.kind) for v in rhs]
 
         # equality conditions
         if self.op in ['==', '!=']:
@@ -223,9 +230,8 @@ def evaluate(self):
         if not self.is_in_table:
             return None
 
-        if not isinstance(self.rhs, list):
-            self.rhs = [ self.rhs ]
-        values = [self.convert_value(v) for v in self.rhs]
+        rhs = self.conform(self.rhs)
+        values = [self.convert_value(v) for v in rhs]
 
         # equality conditions
         if self.op in ['==', '!=']:
@@ -303,8 +309,30 @@ def visit_Name(self, node, side=None, **kwargs):
         return Term(node.id, self.env, side=side)
 
     def visit_Attribute(self, node, **kwargs):
-        import pdb; pdb.set_trace()
-        raise NotImplementedError("attribute access is not yet supported")
+        attr = node.attr
+        value = node.value
+
+        # resolve the value
+        return getattr(self.visit(value).value,attr)
+
+    def visit_Subscript(self, node, **kwargs):
+        value = self.visit(node.value)
+        slobj = self.visit(node.slice)
+
+        return Value(value[slobj],self.env)
+
+    def visit_Slice(self, node, **kwargs):
+        lower = node.lower
+        if lower is not None:
+            lower = self.visit(lower).value
+        upper = node.upper
+        if upper is not None:
+            upper = self.visit(upper).value
+        step = node.step
+        if step is not None:
+            step = self.visit(step).value
+
+        return slice(lower,upper,step)
 
     def visit_BoolOp(self, node, **kwargs):
         import pdb; pdb.set_trace()
@@ -328,25 +356,33 @@ class Expr(expr.Expr):
     --------
     """
 
-    def __init__(self, expression, queryables=None, encoding=None, env=None):
+    def __init__(self, expression, queryables=None, encoding=None, lcls=None):
+        if isinstance(expression, Expr):
+            expression = str(expression)
         self.expr = expression
         self.condition = None
         self.filter = None
         self.terms = None
         self._visitor = None
 
-        if env is None:
-            frame = inspect.currentframe()
-            try:
-                env = Scope(lcls = frame.f_back.f_locals.copy())
-            finally:
-                del frame
-        self.env = env
+        # add current locals scope
+        frame = inspect.currentframe()
+        try:
+            if lcls is None:
+                lcls = dict()
+            lcls.update(frame.f_back.f_locals)
+            self.env = Scope(lcls = lcls)
+        finally:
+            del frame
 
         if queryables is not None:
+
+            # if using the old format, this will raise
+            if not isinstance(queryables, dict):
+                raise TypeError("Expr must be called with a single-string expression")
+
             self.env.queryables.update(queryables)
             self._visitor = ExprVisitor(self.env, queryables=queryables, encoding=encoding)
-            self.expr = self.pre_parse(self.expr)
             self.terms = self.parse()
 
     def __unicode__(self):
@@ -354,11 +390,6 @@ def __unicode__(self):
             return unicode(self.terms)
         return self.expr
 
-    def pre_parse(self, expression):
-        """ transform = to == """
-        expression = re.sub("=+","==",expression)
-        return expression
-
     def evaluate(self):
         """ create and return the numexpr condition and filter """
 
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index e229301e96ea8..f072d484f223e 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -3714,14 +3714,15 @@ def generate(self, where):
         if where is None:
             return None
 
-        import pdb; pd.set_trace()
-        if isinstance(where, basestring):
-            pass
-        elif isinstance(where, (list, tuple)):
+        lcls = dict()
+        if isinstance(where, (list, tuple)):
+            for w in where:
+                if isinstance(w, Term):
+                    lcls.update(w.env.locals)
             where = ' & ' .join([ "(%s)" % w for w in where])
 
         queryables = self.table.queryables()
-        return Expr(where, queryables=queryables, encoding=self.table.encoding)
+        return Expr(where, queryables=queryables, encoding=self.table.encoding, lcls=lcls)
 
     def select(self):
         """
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 00cce4a22279d..8ca0ffee50d40 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -593,7 +593,7 @@ def test_append_frame_column_oriented(self):
 
             # this isn't supported
             self.assertRaises(TypeError, store.select, 'df1', (
-                    'columns=A', Term('index', '>', df.index[4])))
+                    'columns=A', Term('index>df.index[4]')))
 
     def test_append_with_different_block_ordering(self):
 
@@ -816,7 +816,7 @@ def test_append_with_data_columns(self):
 
             # data column searching (with an indexable and a data_columns)
             result = store.select(
-                'df', [Term('B>0'), Term('index', '>', df.index[3])])
+                'df', [Term('B>0'), Term('index>df.index[3]')])
             df_new = df.reindex(index=df.index[4:])
             expected = df_new[df_new.B > 0]
             tm.assert_frame_equal(result, expected)
@@ -828,7 +828,7 @@ def test_append_with_data_columns(self):
             df_new['string'][5:6] = 'bar'
             _maybe_remove(store, 'df')
             store.append('df', df_new, data_columns=['string'])
-            result = store.select('df', [Term('string', '=', 'foo')])
+            result = store.select('df', [Term('string=foo')])
             expected = df_new[df_new.string == 'foo']
             tm.assert_frame_equal(result, expected)
 
@@ -874,14 +874,14 @@ def check_col(key,name,size):
             _maybe_remove(store, 'df')
             store.append(
                 'df', df_new, data_columns=['A', 'B', 'string', 'string2'])
-            result = store.select('df', [Term('string', '=', 'foo'), Term(
+            result = store.select('df', [Term('string=foo'), Term(
                         'string2=foo'), Term('A>0'), Term('B<0')])
             expected = df_new[(df_new.string == 'foo') & (
                     df_new.string2 == 'foo') & (df_new.A > 0) & (df_new.B < 0)]
             tm.assert_frame_equal(result, expected)
 
             # yield an empty frame
-            result = store.select('df', [Term('string', '=', 'foo'), Term(
+            result = store.select('df', [Term('string=foo'), Term(
                         'string2=cool')])
             expected = df_new[(df_new.string == 'foo') & (
                     df_new.string2 == 'cool')]
@@ -2318,10 +2318,11 @@ def test_frame_select(self):
             store.put('frame', df, table=True)
             date = df.index[len(df) // 2]
 
-            crit1 = ('index', '>=', date)
-            crit2 = ('columns', ['A', 'D'])
-            crit3 = ('columns', 'A')
+            crit1 = ('index>=date')
+            crit2 = ("columns=['A', 'D']")
+            crit3 = ('columns=A')
 
+            import pdb; pdb.set_trace()
             result = store.select('frame', [crit1, crit2])
             expected = df.ix[date:, ['A', 'D']]
             tm.assert_frame_equal(result, expected)
@@ -2668,7 +2669,7 @@ def test_legacy_table_read(self):
             # old version warning
             warnings.filterwarnings('ignore', category=IncompatibilityWarning)
             self.assertRaises(
-                Exception, store.select, 'wp1', Term('minor_axis', '=', 'B'))
+                Exception, store.select, 'wp1', Term('minor_axis=B'))
 
             df2 = store.select('df2')
             store.select('df2', Term('index', '>', df2.index[2]))

From 22b4a93b209fa7bdf9bb7f2e0f8ca6df35bfb376 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Sun, 7 Jul 2013 10:39:19 -0400
Subject: [PATCH 41/48] TST: more test changes

---
 pandas/computation/expr.py       |  29 +++++++-
 pandas/computation/pytables.py   |  52 ++++++--------
 pandas/io/pytables.py            |   5 +-
 pandas/io/tests/test_pytables.py | 116 +++++++++++--------------------
 4 files changed, 95 insertions(+), 107 deletions(-)

diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py
index 2104a437b1ba3..b8acab7cf9edb 100644
--- a/pandas/computation/expr.py
+++ b/pandas/computation/expr.py
@@ -11,7 +11,7 @@
 from pandas.computation.ops import BinOp, UnaryOp, _reductions, _mathops
 from pandas.computation.ops import _cmp_ops_syms, _bool_ops_syms
 from pandas.computation.ops import _arith_ops_syms, _unary_ops_syms
-from pandas.computation.ops import Term, Constant
+from pandas.computation.ops import Term, Constant, Value
 
 class Scope(object):
     __slots__ = 'globals', 'locals'
@@ -96,7 +96,10 @@ def visit(self, node, **kwargs):
             node = ast.fix_missing_locations(ast.parse(preparse(node)))
 
         method = 'visit_' + node.__class__.__name__
-        visitor = getattr(self, method, self.generic_visit)
+        visitor = getattr(self, method, None)
+        if visitor is None:
+            visitor = self.generic_visit
+            print method
         return visitor(node, **kwargs)
 
     def visit_Module(self, node, **kwargs):
@@ -124,12 +127,34 @@ def visit_UnaryOp(self, node, **kwargs):
         op = self.visit(node.op)
         return op(self.visit(node.operand))
 
+    def visit_List(self, node, **kwargs):
+        return Value([ self.visit(e) for e in node.elts ], self.env)
+
     def visit_Name(self, node, **kwargs):
         return Term(node.id, self.env)
 
     def visit_Num(self, node, **kwargs):
         return Constant(node.n, self.env)
 
+    def visit_Subscript(self, node, **kwargs):
+        value = self.visit(node.value)
+        slobj = self.visit(node.slice)
+
+        return Value(value[slobj],self.env)
+
+    def visit_Slice(self, node, **kwargs):
+        lower = node.lower
+        if lower is not None:
+            lower = self.visit(lower).value
+        upper = node.upper
+        if upper is not None:
+            upper = self.visit(upper).value
+        step = node.step
+        if step is not None:
+            step = self.visit(step).value
+
+        return slice(lower,upper,step)
+
     def visit_Compare(self, node, **kwargs):
         ops = node.ops
         comps = node.comparators
diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py
index f3cb4f45874c3..2b04c6fb9e12a 100644
--- a/pandas/computation/pytables.py
+++ b/pandas/computation/pytables.py
@@ -166,6 +166,10 @@ class FilterBinOp(BinOp):
     def __unicode__(self):
         return com.pprint_thing("[Filter : [{0}] -> [{1}]".format(self.filter[0],self.filter[1]))
 
+    def format(self):
+        """ return the actual filter format """
+        return [ self.filter ]
+
     def evaluate(self):
 
         if not isinstance(self.lhs,basestring):
@@ -204,6 +208,9 @@ def evaluate(self):
 
 class JointFilterBinOp(FilterBinOp):
 
+    def format(self):
+        raise NotImplementedError("unable to collapse Joint Filters")
+
     def evaluate(self):
         return self
 
@@ -298,16 +305,6 @@ def visit_Module(self, node, **kwargs):
         body = node.body[0]
         return self.visit(body)
 
-    def visit_Compare(self, node, **kwargs):
-        ops = node.ops
-        comps = node.comparators
-        for op, comp in zip(ops, comps):
-            node = self.visit(op)(self.visit(node.left,side='left'), self.visit(comp,side='right'))
-        return node
-
-    def visit_Name(self, node, side=None, **kwargs):
-        return Term(node.id, self.env, side=side)
-
     def visit_Attribute(self, node, **kwargs):
         attr = node.attr
         value = node.value
@@ -315,28 +312,24 @@ def visit_Attribute(self, node, **kwargs):
         # resolve the value
         return getattr(self.visit(value).value,attr)
 
-    def visit_Subscript(self, node, **kwargs):
-        value = self.visit(node.value)
-        slobj = self.visit(node.slice)
-
-        return Value(value[slobj],self.env)
+    def visit_Call(self, node, **kwargs):
+        if not isinstance(node.func, ast.Name):
+            raise TypeError("Only named functions are supported")
 
-    def visit_Slice(self, node, **kwargs):
-        lower = node.lower
-        if lower is not None:
-            lower = self.visit(lower).value
-        upper = node.upper
-        if upper is not None:
-            upper = self.visit(upper).value
-        step = node.step
-        if step is not None:
-            step = self.visit(step).value
+        res = self.visit(node.func)
+        if res is None:
+            raise ValueError("Invalid function call {0}".format(node.func.id))
+        return res
 
-        return slice(lower,upper,step)
+    def visit_Compare(self, node, **kwargs):
+        ops = node.ops
+        comps = node.comparators
+        for op, comp in zip(ops, comps):
+            node = self.visit(op)(self.visit(node.left,side='left'), self.visit(comp,side='right'))
+        return node
 
-    def visit_BoolOp(self, node, **kwargs):
-        import pdb; pdb.set_trace()
-        raise NotImplementedError("boolean operators are not yet supported")
+    def visit_Name(self, node, side=None, **kwargs):
+        return Term(node.id, self.env, side=side)
 
 class Expr(expr.Expr):
 
@@ -395,6 +388,7 @@ def evaluate(self):
 
         self.condition = self.terms.prune(ConditionBinOp)
         self.filter = self.terms.prune(FilterBinOp)
+
         return self.condition, self.filter
 
 class TermValue(object):
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index f072d484f223e..68f2b8698d960 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -2913,8 +2913,8 @@ def process_axes(self, obj, columns=None):
             obj = obj.reindex_axis(labels, axis=axis, copy=False)
 
         # apply the selection filters (but keep in the same order)
-        if self.selection.filter:
-            for field, op, filt in self.selection.filter:
+        if self.selection.filter is not None:
+            for field, op, filt in self.selection.filter.format():
 
                 def process_filter(field, filt):
 
@@ -3719,6 +3719,7 @@ def generate(self, where):
             for w in where:
                 if isinstance(w, Term):
                     lcls.update(w.env.locals)
+
             where = ' & ' .join([ "(%s)" % w for w in where])
 
         queryables = self.table.queryables()
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 8ca0ffee50d40..76ad477a43ccb 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -1358,7 +1358,7 @@ def compare(a,b):
             assert_frame_equal(result,df)
 
             # select with tz aware
-            compare(store.select('df_tz',where=Term('A','>=',df.A[3])),df[df.A>=df.A[3]])
+            compare(store.select('df_tz',where=Term('A>=df.A[3]')),df[df.A>=df.A[3]])
 
             _maybe_remove(store, 'df_tz')
             df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130103',tz='US/Eastern')),index=range(5))
@@ -1440,14 +1440,14 @@ def test_remove_where(self):
         with ensure_clean(self.path) as store:
 
             # non-existance
-            crit1 = Term('index', '>', 'foo')
+            crit1 = Term('index>foo')
             self.assertRaises(KeyError, store.remove, 'a', [crit1])
 
             # try to remove non-table (with crit)
             # non-table ok (where = None)
             wp = tm.makePanel()
             store.put('wp', wp, table=True)
-            store.remove('wp', [('minor_axis', ['A', 'D'])])
+            store.remove('wp', [("minor_axis=['A', 'D']")])
             rs = store.select('wp')
             expected = wp.reindex(minor_axis=['B', 'C'])
             tm.assert_panel_equal(rs, expected)
@@ -1479,7 +1479,7 @@ def test_remove_crit(self):
 
             # group row removal
             date4 = wp.major_axis.take([0, 1, 2, 4, 5, 6, 8, 9, 10])
-            crit4 = Term('major_axis', date4)
+            crit4 = Term('major_axis=date4')
             store.put('wp3', wp, table=True)
             n = store.remove('wp3', where=[crit4])
             assert(n == 36)
@@ -1491,8 +1491,8 @@ def test_remove_crit(self):
             store.put('wp', wp, table=True)
             date = wp.major_axis[len(wp.major_axis) // 2]
 
-            crit1 = Term('major_axis', '>', date)
-            crit2 = Term('minor_axis', ['A', 'D'])
+            crit1 = Term('major_axis>date')
+            crit2 = Term("minor_axis=['A', 'D']")
             n = store.remove('wp', where=[crit1])
 
             assert(n == 56)
@@ -1548,9 +1548,9 @@ def test_terms(self):
 
             # some invalid terms
             terms = [
-                ['minor', ['A', 'B']],
-                ['index', ['20121114']],
-                ['index', ['20121114', '20121114']],
+                ["minor=['A', 'B']"],
+                ["index=['20121114']"],
+                ["index=['20121114', '20121114']"],
                 ]
             for t in terms:
                 self.assertRaises(Exception, store.select, 'wp', t)
@@ -1558,75 +1558,43 @@ def test_terms(self):
             self.assertRaises(Exception, Term.__init__)
             self.assertRaises(Exception, Term.__init__, 'blah')
             self.assertRaises(Exception, Term.__init__, 'index')
-            self.assertRaises(Exception, Term.__init__, 'index', '==')
-            self.assertRaises(Exception, Term.__init__, 'index', '>', 5)
+            self.assertRaises(TypeError, Term.__init__, 'index', '==')
+            self.assertRaises(TypeError, Term.__init__, 'index', '>', 5)
 
             # panel
             result = store.select('wp', [Term(
-                        'major_axis<20000108'), Term('minor_axis', '=', ['A', 'B'])])
+                        'major_axis<20000108'), Term("minor_axis=['A', 'B']")])
             expected = wp.truncate(after='20000108').reindex(minor=['A', 'B'])
             tm.assert_panel_equal(result, expected)
 
             # p4d
             result = store.select('p4d', [Term('major_axis<20000108'),
-                                          Term('minor_axis', '=', ['A', 'B']),
-                                          Term('items', '=', ['ItemA', 'ItemB'])])
+                                          Term("minor_axis=['A', 'B']"),
+                                          Term("items=['ItemA', 'ItemB']")])
             expected = p4d.truncate(after='20000108').reindex(
                 minor=['A', 'B'], items=['ItemA', 'ItemB'])
             tm.assert_panel4d_equal(result, expected)
 
-            # valid terms
-            terms = [
-                dict(field='major_axis', op='>', value='20121114'),
-                ('major_axis', '20121114'),
-                ('major_axis', '>', '20121114'),
-                (('major_axis', ['20121114', '20121114']),),
-                ('major_axis', datetime.datetime(2012, 11, 14)),
-                'major_axis> 20121114',
-                'major_axis >20121114',
-                'major_axis > 20121114',
-                (('minor_axis', ['A', 'B']),),
-                (('minor_axis', ['A', 'B']),),
-                ((('minor_axis', ['A', 'B']),),),
-                (('items', ['ItemA', 'ItemB']),),
-                ('items=ItemA'),
-                ]
-
-            for t in terms:
-                store.select('wp', t)
-                store.select('p4d', t)
-
-            # valid for p4d only
+            # back compat invalid terms
             terms = [
-                (('labels', '=', ['l1', 'l2']),),
-                Term('labels', '=', ['l1', 'l2']),
+                dict(field='major_axis', op='>', value='20121114')
                 ]
-
             for t in terms:
-                store.select('p4d', t)
-
-    def test_eval(self):
-        """ test evaluation using new terms """
-
-        with ensure_clean(self.path) as store:
-
-            wp = tm.makePanel()
-            p4d = tm.makePanel4D()
+                self.assertRaises(TypeError, Term.__init__, t)
 
             # valid terms
             terms = [
-                dict(field='major_axis', op='>', value='20121114'),
-                ('major_axis', '20121114'),
-                ('major_axis', '>', '20121114'),
-                (('major_axis', ['20121114', '20121114']),),
+                ('major_axis=20121114'),
+                ('major_axis>20121114'),
+                (("major_axis=['20121114', '20121114']"),),
                 ('major_axis', datetime.datetime(2012, 11, 14)),
                 'major_axis> 20121114',
                 'major_axis >20121114',
                 'major_axis > 20121114',
-                (('minor_axis', ['A', 'B']),),
-                (('minor_axis', ['A', 'B']),),
-                ((('minor_axis', ['A', 'B']),),),
-                (('items', ['ItemA', 'ItemB']),),
+                (("minor_axis=['A', 'B']"),),
+                (("minor_axis=['A', 'B']"),),
+                ((("minor_axis==['A', 'B']"),),),
+                (("items=['ItemA', 'ItemB']"),),
                 ('items=ItemA'),
                 ]
 
@@ -1636,8 +1604,8 @@ def test_eval(self):
 
             # valid for p4d only
             terms = [
-                (('labels', '=', ['l1', 'l2']),),
-                Term('labels', '=', ['l1', 'l2']),
+                (("labels=['l1', 'l2']"),),
+                Term("labels=['l1', 'l2']"),
                 ]
 
             for t in terms:
@@ -2017,7 +1985,7 @@ def test_select(self):
             _maybe_remove(store, 'wp')
             store.append('wp', wp)
             items = ['Item%03d' % i for i in xrange(80)]
-            result = store.select('wp', Term('items', items))
+            result = store.select('wp', Term('items=items'))
             expected = wp.reindex(items=items)
             tm.assert_panel_equal(expected, result)
 
@@ -2034,7 +2002,7 @@ def test_select(self):
             tm.assert_frame_equal(expected, result)
 
             # equivalentsly
-            result = store.select('df', [('columns', ['A', 'B'])])
+            result = store.select('df', [("columns=['A', 'B']")])
             expected = df.reindex(columns=['A', 'B'])
             tm.assert_frame_equal(expected, result)
 
@@ -2067,7 +2035,7 @@ def test_select_dtypes(self):
             df = DataFrame(dict(ts=bdate_range('2012-01-01', periods=300), A=np.random.randn(300)))
             _maybe_remove(store, 'df')
             store.append('df', df, data_columns=['ts', 'A'])
-            result = store.select('df', [Term('ts', '>=', Timestamp('2012-02-01'))])
+            result = store.select('df', [Term("ts>=Timestamp('2012-02-01')")])
             expected = df[df.ts >= Timestamp('2012-02-01')]
             tm.assert_frame_equal(expected, result)
 
@@ -2120,30 +2088,30 @@ def test_select_with_many_inputs(self):
             store.append('df', df, data_columns=['ts', 'A', 'B', 'users'])
 
             # regular select
-            result = store.select('df', [Term('ts', '>=', Timestamp('2012-02-01'))])
+            result = store.select('df', [Term("ts>=Timestamp('2012-02-01')")])
             expected = df[df.ts >= Timestamp('2012-02-01')]
             tm.assert_frame_equal(expected, result)
 
             # small selector
-            result = store.select('df', [Term('ts', '>=', Timestamp('2012-02-01')),Term('users',['a','b','c'])])
+            result = store.select('df', [Term("ts>=Timestamp('2012-02-01') & users=['a','b','c']")])
             expected = df[ (df.ts >= Timestamp('2012-02-01')) & df.users.isin(['a','b','c']) ]
             tm.assert_frame_equal(expected, result)
 
             # big selector along the columns
             selector = [ 'a','b','c' ] + [ 'a%03d' % i for i in xrange(60) ]
-            result = store.select('df', [Term('ts', '>=', Timestamp('2012-02-01')),Term('users',selector)])
+            result = store.select('df', [Term("ts>=Timestamp('2012-02-01')"),Term('users=selector')])
             expected = df[ (df.ts >= Timestamp('2012-02-01')) & df.users.isin(selector) ]
             tm.assert_frame_equal(expected, result)
 
             selector = range(100,200)
-            result = store.select('df', [Term('B', selector)])
+            result = store.select('df', [Term('B=selector')])
             expected = df[ df.B.isin(selector) ]
             tm.assert_frame_equal(expected, result)
             self.assert_(len(result) == 100)
 
             # big selector along the index
             selector = Index(df.ts[0:100].values)
-            result  = store.select('df', [Term('ts', selector)])
+            result  = store.select('df', [Term('ts=selector')])
             expected = df[ df.ts.isin(selector.values) ]
             tm.assert_frame_equal(expected, result)
             self.assert_(len(result) == 100)
@@ -2298,15 +2266,15 @@ def test_panel_select(self):
             store.put('wp', wp, table=True)
             date = wp.major_axis[len(wp.major_axis) // 2]
 
-            crit1 = ('major_axis', '>=', date)
-            crit2 = ('minor_axis', '=', ['A', 'D'])
+            crit1 = ('major_axis>=date')
+            crit2 = ("minor_axis=['A', 'D']")
 
             result = store.select('wp', [crit1, crit2])
             expected = wp.truncate(before=date).reindex(minor=['A', 'D'])
             tm.assert_panel_equal(result, expected)
 
             result = store.select(
-                'wp', ['major_axis>=20000124', ('minor_axis', '=', ['A', 'B'])])
+                'wp', ['major_axis>=20000124', ("minor_axis=['A', 'B']")])
             expected = wp.truncate(before='20000124').reindex(minor=['A', 'B'])
             tm.assert_panel_equal(result, expected)
 
@@ -2318,7 +2286,7 @@ def test_frame_select(self):
             store.put('frame', df, table=True)
             date = df.index[len(df) // 2]
 
-            crit1 = ('index>=date')
+            crit1 = Term('index>=date')
             crit2 = ("columns=['A', 'D']")
             crit3 = ('columns=A')
 
@@ -2569,13 +2537,13 @@ def test_start_stop(self):
             store.append('df', df)
 
             result = store.select(
-                'df', [Term("columns", "=", ["A"])], start=0, stop=5)
+                'df', [Term("columns=['A']")], start=0, stop=5)
             expected = df.ix[0:4, ['A']]
             tm.assert_frame_equal(result, expected)
 
             # out of range
             result = store.select(
-                'df', [Term("columns", "=", ["A"])], start=30, stop=40)
+                'df', [Term("columns=['A']")], start=30, stop=40)
             assert(len(result) == 0)
             assert(type(result) == DataFrame)
 
@@ -2588,7 +2556,7 @@ def test_select_filter_corner(self):
         with ensure_clean(self.path) as store:
             store.put('frame', df, table=True)
 
-            crit = Term('columns', df.columns[:75])
+            crit = Term('columns=df.columns[:75]')
             result = store.select('frame', [crit])
             tm.assert_frame_equal(result, df.ix[:, df.columns[:75]])
 
@@ -2672,7 +2640,7 @@ def test_legacy_table_read(self):
                 Exception, store.select, 'wp1', Term('minor_axis=B'))
 
             df2 = store.select('df2')
-            store.select('df2', Term('index', '>', df2.index[2]))
+            store.select('df2', Term('index>df2.index[2]'))
             warnings.filterwarnings('always', category=IncompatibilityWarning)
 
         finally:

From ca292c20f92f36483d1567946da7b2fce7d7176d Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Sun, 7 Jul 2013 10:45:22 -0400
Subject: [PATCH 42/48] BUG: added HDFStore to inherit from Stringmixin

---
 pandas/io/pytables.py            | 2 +-
 pandas/io/tests/test_pytables.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 68f2b8698d960..14d67b9313ff3 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -229,7 +229,7 @@ def read_hdf(path_or_buf, key, **kwargs):
     f(path_or_buf, False)
 
 
-class HDFStore(object):
+class HDFStore(StringMixin):
 
     """
     dict-like IO interface for storing pandas objects in PyTables
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 76ad477a43ccb..4b5ca28e702a7 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -797,6 +797,7 @@ def check_col(key,name,size):
 
     def test_append_with_data_columns(self):
 
+        import pdb; pdb.set_trace()
         with ensure_clean(self.path) as store:
             df = tm.makeTimeDataFrame()
             df.loc[:,'B'].iloc[0] = 1.

From dfef6175140e7cfede80260c1aa5779733a57bda Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Sun, 7 Jul 2013 10:53:14 -0400
Subject: [PATCH 43/48] BUG: process visit_Index

---
 pandas/computation/expr.py       | 6 ++++++
 pandas/io/tests/test_pytables.py | 1 -
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py
index b8acab7cf9edb..22210cb1fe9e7 100644
--- a/pandas/computation/expr.py
+++ b/pandas/computation/expr.py
@@ -136,13 +136,19 @@ def visit_Name(self, node, **kwargs):
     def visit_Num(self, node, **kwargs):
         return Constant(node.n, self.env)
 
+    def visit_Index(self, node, **kwargs):
+        """ df.index[4] """
+        return self.visit(node.value).value
+
     def visit_Subscript(self, node, **kwargs):
+        """ df.index[4:6] """
         value = self.visit(node.value)
         slobj = self.visit(node.slice)
 
         return Value(value[slobj],self.env)
 
     def visit_Slice(self, node, **kwargs):
+        """ df.index[slice(4,6)] """
         lower = node.lower
         if lower is not None:
             lower = self.visit(lower).value
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 4b5ca28e702a7..76ad477a43ccb 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -797,7 +797,6 @@ def check_col(key,name,size):
 
     def test_append_with_data_columns(self):
 
-        import pdb; pdb.set_trace()
         with ensure_clean(self.path) as store:
             df = tm.makeTimeDataFrame()
             df.loc[:,'B'].iloc[0] = 1.

From b168fb3df7d6924a2d6b60b898a66cc1edadab5a Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Sun, 7 Jul 2013 16:30:12 -0400
Subject: [PATCH 44/48] ENH: use non_implemented function call in ExprVisitor

ENH: support Load context in Attribute only
---
 pandas/computation/expr.py     | 25 ++++++++++---------------
 pandas/computation/pytables.py |  7 +++++--
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py
index 22210cb1fe9e7..bb028d6eec1c2 100644
--- a/pandas/computation/expr.py
+++ b/pandas/computation/expr.py
@@ -78,15 +78,8 @@ def __init__(self, env):
                     lambda node, unary_op=unary_op: partial(UnaryOp, unary_op))
         self.env = env
 
-    def generic_visit(self, node, **kwargs):
-        """Called if no explicit visitor function exists for a node."""
-        for field, value in ast.iter_fields(node):
-            if isinstance(value, list):
-                for item in value:
-                    if isinstance(item, ast.AST):
-                        self.visit(item, **kwargs)
-            elif isinstance(value, ast.AST):
-                self.visit(value, **kwargs)
+    def not_implemented(self, s):
+        raise NotImplementedError("{0} not yet supported".format(s))
 
     def visit(self, node, **kwargs):
         if not (isinstance(node, ast.AST) or isinstance(node, basestring)):
@@ -98,8 +91,7 @@ def visit(self, node, **kwargs):
         method = 'visit_' + node.__class__.__name__
         visitor = getattr(self, method, None)
         if visitor is None:
-            visitor = self.generic_visit
-            print method
+            self.not_implemented("ast visitor [{0}]".format(method))
         return visitor(node, **kwargs)
 
     def visit_Module(self, node, **kwargs):
@@ -123,7 +115,7 @@ def visit_BinOp(self, node, **kwargs):
 
     def visit_UnaryOp(self, node, **kwargs):
         if isinstance(node.op, ast.Not):
-            raise NotImplementedError("not operator not yet supported")
+            self.not_implemented('not operator')
         op = self.visit(node.op)
         return op(self.visit(node.operand))
 
@@ -136,6 +128,9 @@ def visit_Name(self, node, **kwargs):
     def visit_Num(self, node, **kwargs):
         return Constant(node.n, self.env)
 
+    def visit_Str(self, node, **kwargs):
+        return Value(node.s, self.env)
+
     def visit_Index(self, node, **kwargs):
         """ df.index[4] """
         return self.visit(node.value).value
@@ -183,13 +178,13 @@ def visit_Call(self, node, **kwargs):
         if node.func.id not in valid_ops:
             raise ValueError("Only {0} are supported".format(valid_ops))
 
-        raise NotImplementedError("function calls not yet supported")
+        self.not_implemented('function calls')
 
     def visit_Attribute(self, node, **kwargs):
-        raise NotImplementedError("attribute access is not yet supported")
+        self.not_implemented('attribute access')
 
     def visit_BoolOp(self, node, **kwargs):
-        raise NotImplementedError("boolean operators are not yet supported")
+        self.not_implemented('boolean operators')
 
 
 class Expr(StringMixin):
diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py
index 2b04c6fb9e12a..6be9c67f443cf 100644
--- a/pandas/computation/pytables.py
+++ b/pandas/computation/pytables.py
@@ -309,8 +309,11 @@ def visit_Attribute(self, node, **kwargs):
         attr = node.attr
         value = node.value
 
-        # resolve the value
-        return getattr(self.visit(value).value,attr)
+        ctx = node.ctx.__class__
+        if ctx == ast.Load:
+            # resolve the value
+            return getattr(self.visit(value).value,attr)
+        raise ValueError("Invalid Attribute context {0}".format(ctx.__name__))
 
     def visit_Call(self, node, **kwargs):
         if not isinstance(node.func, ast.Name):

From 5fac7495bd123d60ec7fc323f73f482251ddf6f9 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Sun, 7 Jul 2013 17:29:00 -0400
Subject: [PATCH 45/48] BUG: fixed scoping issues by _ensure_term at the
 top-level

---
 pandas/computation/expr.py       | 25 +++++++++++++++++++--
 pandas/computation/ops.py        |  3 +++
 pandas/computation/pytables.py   | 38 ++++++++++++++++++++------------
 pandas/io/pytables.py            | 32 +++++++++++++++++----------
 pandas/io/tests/test_pytables.py | 14 ++++++------
 5 files changed, 77 insertions(+), 35 deletions(-)

diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py
index bb028d6eec1c2..b7a65c96a5d6a 100644
--- a/pandas/computation/expr.py
+++ b/pandas/computation/expr.py
@@ -1,5 +1,5 @@
 import ast
-import sys
+import sys, inspect
 import itertools
 import tokenize
 import re
@@ -25,6 +25,27 @@ def __init__(self, gbls=None, lcls=None, frame_level=1):
         finally:
             del frame
 
+
+    def update(self, scope_level=None):
+
+        # we are always 2 levels below the caller
+        # plus the caller maybe below the env level
+        # in which case we need addtl levels
+        sl = 2
+        if scope_level is not None:
+            sl += scope_level
+
+        # add current locals scope
+        frame = inspect.currentframe()
+        try:
+            while(sl>0):
+                frame = frame.f_back
+                sl -= 1
+            self.locals.update(frame.f_locals)
+        finally:
+            del frame
+
+
 class ExprParserError(Exception):
     pass
 
@@ -120,7 +141,7 @@ def visit_UnaryOp(self, node, **kwargs):
         return op(self.visit(node.operand))
 
     def visit_List(self, node, **kwargs):
-        return Value([ self.visit(e) for e in node.elts ], self.env)
+        return Value([ self.visit(e).value for e in node.elts ], self.env)
 
     def visit_Name(self, node, **kwargs):
         return Term(node.id, self.env)
diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py
index 926b9bf9bc509..3efe4bf743a0a 100644
--- a/pandas/computation/ops.py
+++ b/pandas/computation/ops.py
@@ -91,6 +91,9 @@ def __init__(self, value, env, name=None):
         self.value = value
         self.type = type(self.value)
 
+    def __unicode__(self):
+        return com.pprint_thing(self.value)
+
 def _print_operand(opr):
     return opr.name if is_term(opr) else unicode(opr)
 
diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py
index 6be9c67f443cf..853f5897e39f7 100644
--- a/pandas/computation/pytables.py
+++ b/pandas/computation/pytables.py
@@ -134,6 +134,9 @@ def stringify(value):
 
         kind = _ensure_decoded(self.kind)
         if kind == u'datetime64' or kind == u'datetime':
+
+            if isinstance(v, (int, float)):
+                raise ValueError("cannot index datelike with an integer/float value")
             v = lib.Timestamp(v)
             if v.tz is not None:
                 v = v.tz_convert('UTC')
@@ -340,7 +343,7 @@ class Expr(expr.Expr):
 
     Parameters
     ----------
-    field : dict, string term expression, or the field to operate (must be a valid index/column type of DataFrame/Panel)
+    where : string term expression, Expr, or list-like of Exprs
     queryables : a kinds map (dict of column name -> kind), or None i column is non-indexable
     encoding : an encoding that will encode the query terms
 
@@ -352,24 +355,31 @@ class Expr(expr.Expr):
     --------
     """
 
-    def __init__(self, expression, queryables=None, encoding=None, lcls=None):
-        if isinstance(expression, Expr):
-            expression = str(expression)
-        self.expr = expression
+    def __init__(self, where, queryables=None, encoding=None, scope_level=None):
+        self.encoding = encoding
         self.condition = None
         self.filter = None
         self.terms = None
         self._visitor = None
 
-        # add current locals scope
-        frame = inspect.currentframe()
-        try:
-            if lcls is None:
-                lcls = dict()
-            lcls.update(frame.f_back.f_locals)
-            self.env = Scope(lcls = lcls)
-        finally:
-            del frame
+        # capture the environement if needed
+        lcls = dict()
+        if isinstance(where, Expr):
+
+            lcls.update(where.env.locals)
+            where = str(where)
+
+        elif isinstance(where, (list, tuple)):
+
+            for w in where:
+                if isinstance(w, Expr):
+                    lcls.update(w.env.locals)
+
+            where = ' & ' .join([ "(%s)" % w for w in where])
+
+        self.expr = where
+        self.env = Scope(lcls = lcls)
+        self.env.update(scope_level)
 
         if queryables is not None:
 
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 14d67b9313ff3..2acb61baa0195 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -58,6 +58,21 @@ def _ensure_encoding(encoding):
             encoding = _default_encoding
     return encoding
 
+Term = Expr
+
+def _ensure_term(where):
+    """ ensure that the where is a Term or a list of Term
+        this makes sure that we are capturing the scope of variables
+        that are passed """
+
+    # create the terms here with a frame_level=2 (we are 2 levels down)
+    if isinstance(where, (list, tuple)):
+        where = [ w if isinstance(w, Term) else Term(w, scope_level=2) for w in where if w is not None ]
+    elif where is None or isinstance(where, Coordinates):
+        pass
+    elif not isinstance(where, Term):
+        where = Term(where, scope_level=2)
+    return where
 
 class IncompatibilityWarning(Warning):
     pass
@@ -461,6 +476,7 @@ def select(self, key, where=None, start=None, stop=None, columns=None,
             raise KeyError('No object named %s in the file' % key)
 
         # create the storer and axes
+        where = _ensure_term(where)
         s = self._create_storer(group)
         s.infer_axes()
 
@@ -492,6 +508,7 @@ def select_as_coordinates(
         start : integer (defaults to None), row number to start selection
         stop  : integer (defaults to None), row number to stop selection
         """
+        where = _ensure_term(where)
         return self.get_storer(key).read_coordinates(where=where, start=start, stop=stop, **kwargs)
 
     def unique(self, key, column, **kwargs):
@@ -537,6 +554,7 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None,
         """
 
         # default to single select
+        where = _ensure_term(where)
         if isinstance(keys, (list, tuple)) and len(keys) == 1:
             keys = keys[0]
         if isinstance(keys, basestring):
@@ -640,6 +658,7 @@ def remove(self, key, where=None, start=None, stop=None):
         raises KeyError if key is not a valid store
 
         """
+        where = _ensure_term(where)
         try:
             s = self.get_storer(key)
         except:
@@ -3653,8 +3672,6 @@ def _need_convert(kind):
         return True
     return False
 
-Term = Expr
-
 class Coordinates(object):
 
     """ holds a returned coordinates list, useful to select the same rows from different tables
@@ -3714,16 +3731,7 @@ def generate(self, where):
         if where is None:
             return None
 
-        lcls = dict()
-        if isinstance(where, (list, tuple)):
-            for w in where:
-                if isinstance(w, Term):
-                    lcls.update(w.env.locals)
-
-            where = ' & ' .join([ "(%s)" % w for w in where])
-
-        queryables = self.table.queryables()
-        return Expr(where, queryables=queryables, encoding=self.table.encoding, lcls=lcls)
+        return Expr(where, queryables=self.table.queryables(), encoding=self.table.encoding)
 
     def select(self):
         """
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 76ad477a43ccb..0a8cac5e05b44 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -1551,19 +1551,20 @@ def test_terms(self):
                 ["minor=['A', 'B']"],
                 ["index=['20121114']"],
                 ["index=['20121114', '20121114']"],
+                ['major=20121114'],                  # passing an integer as the value
                 ]
             for t in terms:
-                self.assertRaises(Exception, store.select, 'wp', t)
+                self.assertRaises(ValueError, store.select, 'wp', t)
 
-            self.assertRaises(Exception, Term.__init__)
-            self.assertRaises(Exception, Term.__init__, 'blah')
-            self.assertRaises(Exception, Term.__init__, 'index')
+            self.assertRaises(TypeError, Term.__init__)
+            self.assertRaises(TypeError, Term.__init__, 'blah')
+            self.assertRaises(TypeError, Term.__init__, 'index')
             self.assertRaises(TypeError, Term.__init__, 'index', '==')
             self.assertRaises(TypeError, Term.__init__, 'index', '>', 5)
 
             # panel
             result = store.select('wp', [Term(
-                        'major_axis<20000108'), Term("minor_axis=['A', 'B']")])
+                        'major_axis<"20000108"'), Term("minor_axis=['A', 'B']")])
             expected = wp.truncate(after='20000108').reindex(minor=['A', 'B'])
             tm.assert_panel_equal(result, expected)
 
@@ -2274,7 +2275,7 @@ def test_panel_select(self):
             tm.assert_panel_equal(result, expected)
 
             result = store.select(
-                'wp', ['major_axis>=20000124', ("minor_axis=['A', 'B']")])
+                'wp', ['major_axis>="20000124"', ("minor_axis=['A', 'B']")])
             expected = wp.truncate(before='20000124').reindex(minor=['A', 'B'])
             tm.assert_panel_equal(result, expected)
 
@@ -2290,7 +2291,6 @@ def test_frame_select(self):
             crit2 = ("columns=['A', 'D']")
             crit3 = ('columns=A')
 
-            import pdb; pdb.set_trace()
             result = store.select('frame', [crit1, crit2])
             expected = df.ix[date:, ['A', 'D']]
             tm.assert_frame_equal(result, expected)

From c5a3c9fdb96e78d039a3d56870fd3c804ba54d45 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Sun, 7 Jul 2013 18:19:29 -0400
Subject: [PATCH 46/48] TST: fixed remaining tests

BUG: fixed Attribute ast node in a Call expression

BUG: condition with > max_selectors wasn't being handled (in filter)
---
 pandas/computation/expr.py       |  7 +++-
 pandas/computation/pytables.py   | 72 ++++++++++++++++++++++++--------
 pandas/io/tests/test_pytables.py | 45 ++++++++------------
 3 files changed, 78 insertions(+), 46 deletions(-)

diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py
index b7a65c96a5d6a..5c19f3f1859f9 100644
--- a/pandas/computation/expr.py
+++ b/pandas/computation/expr.py
@@ -6,13 +6,15 @@
 from cStringIO import StringIO
 from functools import partial
 
-
 from pandas.core.base import StringMixin
 from pandas.computation.ops import BinOp, UnaryOp, _reductions, _mathops
 from pandas.computation.ops import _cmp_ops_syms, _bool_ops_syms
 from pandas.computation.ops import _arith_ops_syms, _unary_ops_syms
 from pandas.computation.ops import Term, Constant, Value
 
+from pandas import Timestamp
+import datetime
+
 class Scope(object):
     __slots__ = 'globals', 'locals'
 
@@ -25,6 +27,9 @@ def __init__(self, gbls=None, lcls=None, frame_level=1):
         finally:
             del frame
 
+        # add some usefule defaults
+        self.globals['Timestamp'] = Timestamp
+        self.globals['datetime'] = datetime
 
     def update(self, scope_level=None):
 
diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py
index 853f5897e39f7..31853113d7e34 100644
--- a/pandas/computation/pytables.py
+++ b/pandas/computation/pytables.py
@@ -46,6 +46,8 @@ def _resolve_name(self):
 
 class BinOp(ops.BinOp):
 
+    _max_selectors = 31
+
     def __init__(self, op, lhs, rhs, queryables, encoding):
         super(BinOp, self).__init__(op, lhs, rhs)
         self.queryables = queryables
@@ -136,7 +138,7 @@ def stringify(value):
         if kind == u'datetime64' or kind == u'datetime':
 
             if isinstance(v, (int, float)):
-                raise ValueError("cannot index datelike with an integer/float value")
+                v = stringify(v)
             v = lib.Timestamp(v)
             if v.tz is not None:
                 v = v.tz_convert('UTC')
@@ -181,12 +183,29 @@ def evaluate(self):
         if not self.is_valid:
             raise ValueError("query term is not valid [%s]" % self)
 
-        if self.is_in_table:
-            return None
-
         rhs = self.conform(self.rhs)
         values = [TermValue(v, v, self.kind) for v in rhs]
 
+        if self.is_in_table:
+
+            # if too many values to create the expression, use a filter instead
+            if self.op in ['==', '!='] and len(values) > self._max_selectors:
+
+                # our filter op expression
+                if self.op == '!=':
+                    filter_op = lambda axis, vals: not axis.isin(vals)
+                else:
+                    filter_op = lambda axis, vals: axis.isin(vals)
+
+                self.filter = (
+                    self.lhs,
+                    filter_op,
+                    Index([v.value for v in values]))
+
+                return self
+
+            return None
+
         # equality conditions
         if self.op in ['==', '!=']:
 
@@ -219,8 +238,6 @@ def evaluate(self):
 
 class ConditionBinOp(BinOp):
 
-    _max_selectors = 31
-
     def __unicode__(self):
         return com.pprint_thing("[Condition : [{0}]]".format(self.condition))
 
@@ -246,12 +263,6 @@ def evaluate(self):
         # equality conditions
         if self.op in ['==', '!=']:
 
-            # our filter op expression
-            if self.op == '!=':
-                filter_op = lambda axis, vals: not axis.isin(vals)
-            else:
-                filter_op = lambda axis, vals: axis.isin(vals)
-
             # too many values to create the expression?
             if len(values) <= self._max_selectors:
                 vs = [self.generate(v) for v in values]
@@ -259,6 +270,7 @@ def evaluate(self):
 
             # use a filter after reading
             else:
+
                 return None
 
         else:
@@ -319,13 +331,33 @@ def visit_Attribute(self, node, **kwargs):
         raise ValueError("Invalid Attribute context {0}".format(ctx.__name__))
 
     def visit_Call(self, node, **kwargs):
-        if not isinstance(node.func, ast.Name):
+
+        # this can happen with: datetime.datetime
+        if isinstance(node.func, ast.Attribute):
+            res = self.visit_Attribute(node.func)
+        elif not isinstance(node.func, ast.Name):
             raise TypeError("Only named functions are supported")
+        else:
+            res = self.visit(node.func)
 
-        res = self.visit(node.func)
         if res is None:
             raise ValueError("Invalid function call {0}".format(node.func.id))
-        return res
+        if hasattr(res,'value'):
+            res = res.value
+
+        args = [self.visit(targ).value for targ in node.args]
+        if node.starargs is not None:
+            args = args + self.visit(node.starargs).value
+
+        keywords = {}
+        for key in node.keywords:
+            if not isinstance(key, ast.keyword):
+                raise ValueError("keyword error in function call '{0}'".format(node.func.id))
+            keywords[key.arg] = self.visit(key.value).value
+        if node.kwargs is not None:
+            keywords.update(self.visit(node.kwargs).value)
+
+        return Value(res(*args,**keywords),self.env)
 
     def visit_Compare(self, node, **kwargs):
         ops = node.ops
@@ -399,8 +431,14 @@ def __unicode__(self):
     def evaluate(self):
         """ create and return the numexpr condition and filter """
 
-        self.condition = self.terms.prune(ConditionBinOp)
-        self.filter = self.terms.prune(FilterBinOp)
+        try:
+            self.condition = self.terms.prune(ConditionBinOp)
+        except AttributeError:
+            raise ValueError("cannot process node for the condition [{0}]".format(self))
+        try:
+            self.filter = self.terms.prune(FilterBinOp)
+        except AttributeError:
+            raise ValueError("cannot process node for the filter [{0}]".format(self))
 
         return self.condition, self.filter
 
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 0a8cac5e05b44..f02b247826653 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -1508,14 +1508,14 @@ def test_remove_crit(self):
             store.put('wp2', wp, table=True)
 
             date1 = wp.major_axis[1:3]
-            crit1 = Term('major_axis', date1)
+            crit1 = Term('major_axis=date1')
             store.remove('wp2', where=[crit1])
             result = store.select('wp2')
             expected = wp.reindex(major_axis=wp.major_axis - date1)
             tm.assert_panel_equal(result, expected)
 
             date2 = wp.major_axis[5]
-            crit2 = Term('major_axis', date2)
+            crit2 = Term('major_axis=date2')
             store.remove('wp2', where=[crit2])
             result = store['wp2']
             expected = wp.reindex(
@@ -1523,7 +1523,7 @@ def test_remove_crit(self):
             tm.assert_panel_equal(result, expected)
 
             date3 = [wp.major_axis[7], wp.major_axis[9]]
-            crit3 = Term('major_axis', date3)
+            crit3 = Term('major_axis=date3')
             store.remove('wp2', where=[crit3])
             result = store['wp2']
             expected = wp.reindex(
@@ -1533,7 +1533,7 @@ def test_remove_crit(self):
             # corners
             store.put('wp4', wp, table=True)
             n = store.remove(
-                'wp4', where=[Term('major_axis', '>', wp.major_axis[-1])])
+                'wp4', where=[Term('major_axis>wp.major_axis[-1]')])
             result = store.select('wp4')
             tm.assert_panel_equal(result, wp)
 
@@ -1547,14 +1547,9 @@ def test_terms(self):
             store.put('p4d', p4d, table=True)
 
             # some invalid terms
-            terms = [
-                ["minor=['A', 'B']"],
-                ["index=['20121114']"],
-                ["index=['20121114', '20121114']"],
-                ['major=20121114'],                  # passing an integer as the value
-                ]
-            for t in terms:
-                self.assertRaises(ValueError, store.select, 'wp', t)
+            self.assertRaises(NameError, store.select, 'wp', "minor=['A', 'B']")
+            self.assertRaises(NameError, store.select, 'wp', ["index=['20121114']"])
+            self.assertRaises(NameError, store.select, 'wp', ["index=['20121114', '20121114']"])
 
             self.assertRaises(TypeError, Term.__init__)
             self.assertRaises(TypeError, Term.__init__, 'blah')
@@ -1569,7 +1564,7 @@ def test_terms(self):
             tm.assert_panel_equal(result, expected)
 
             # p4d
-            result = store.select('p4d', [Term('major_axis<20000108'),
+            result = store.select('p4d', [Term('major_axis<"20000108"'),
                                           Term("minor_axis=['A', 'B']"),
                                           Term("items=['ItemA', 'ItemB']")])
             expected = p4d.truncate(after='20000108').reindex(
@@ -1588,7 +1583,7 @@ def test_terms(self):
                 ('major_axis=20121114'),
                 ('major_axis>20121114'),
                 (("major_axis=['20121114', '20121114']"),),
-                ('major_axis', datetime.datetime(2012, 11, 14)),
+                ('major_axis=datetime.datetime(2012, 11, 14)'),
                 'major_axis> 20121114',
                 'major_axis >20121114',
                 'major_axis > 20121114',
@@ -2036,6 +2031,7 @@ def test_select_dtypes(self):
             df = DataFrame(dict(ts=bdate_range('2012-01-01', periods=300), A=np.random.randn(300)))
             _maybe_remove(store, 'df')
             store.append('df', df, data_columns=['ts', 'A'])
+
             result = store.select('df', [Term("ts>=Timestamp('2012-02-01')")])
             expected = df[df.ts >= Timestamp('2012-02-01')]
             tm.assert_frame_equal(expected, result)
@@ -2063,7 +2059,7 @@ def test_select_dtypes(self):
             _maybe_remove(store, 'df_int')
             store.append('df_int', df)
             result = store.select(
-                'df_int', [Term("index<10"), Term("columns", "=", ["A"])])
+                'df_int', [Term("index<10"), Term("columns=['A']")])
             expected = df.reindex(index=list(df.index)[0:10],columns=['A'])
             tm.assert_frame_equal(expected, result)
 
@@ -2073,7 +2069,7 @@ def test_select_dtypes(self):
             _maybe_remove(store, 'df_float')
             store.append('df_float', df)
             result = store.select(
-                'df_float', [Term("index<10.0"), Term("columns", "=", ["A"])])
+                'df_float', [Term("index<10.0"), Term("columns=['A']")])
             expected = df.reindex(index=list(df.index)[0:10],columns=['A'])
             tm.assert_frame_equal(expected, result)
 
@@ -2511,18 +2507,11 @@ def test_select_as_multiple(self):
             tm.assert_frame_equal(result, expected)
 
             # multiple (diff selector)
-            try:
-                result = store.select_as_multiple(['df1', 'df2'], where=[Term(
-                            'index', '>', df2.index[4])], selector='df2')
-                expected = concat([df1, df2], axis=1)
-                expected = expected[5:]
-                tm.assert_frame_equal(result, expected)
-            except (Exception), detail:
-                print ("error in select_as_multiple %s" % str(detail))
-                print ("store: %s" % store)
-                print ("df1: %s" % df1)
-                print ("df2: %s" % df2)
-
+            result = store.select_as_multiple(['df1', 'df2'], where=[Term(
+                'index>df2.index[4]')], selector='df2')
+            expected = concat([df1, df2], axis=1)
+            expected = expected[5:]
+            tm.assert_frame_equal(result, expected)
 
             # test excpection for diff rows
             store.append('df3', tm.makeTimeDataFrame(nper=50))

From 71a23a8baa494979ab83b695eab6468e451ccbb4 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Sun, 7 Jul 2013 20:58:52 -0400
Subject: [PATCH 47/48] BUG: py3 fixes; revise scoping rules to be more broad  
    record variable states from the furthermost part of the stack      to the
 most recent, overwriting if shadow variables occur

---
 pandas/computation/expr.py       | 12 ++++++++----
 pandas/computation/pytables.py   | 17 ++++++++++-------
 pandas/io/tests/test_pytables.py |  8 +++-----
 3 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py
index 5c19f3f1859f9..1db6a809773f8 100644
--- a/pandas/computation/expr.py
+++ b/pandas/computation/expr.py
@@ -40,17 +40,21 @@ def update(self, scope_level=None):
         if scope_level is not None:
             sl += scope_level
 
-        # add current locals scope
+        # add sl frames to the scope starting with the
+        # most distant and overwritting with more current
+        # makes sure that we can capture variable scope
         frame = inspect.currentframe()
         try:
-            while(sl>0):
+            frames = []
+            while(sl>=0):
                 frame = frame.f_back
                 sl -= 1
-            self.locals.update(frame.f_locals)
+                frames.append(frame)
+            for f in frames[::-1]:
+                self.locals.update(f.f_locals)
         finally:
             del frame
 
-
 class ExprParserError(Exception):
     pass
 
diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py
index 31853113d7e34..521bf60284107 100644
--- a/pandas/computation/pytables.py
+++ b/pandas/computation/pytables.py
@@ -17,7 +17,7 @@
 
 def _ensure_decoded(s):
     """ if we have bytes, decode them to unicde """
-    if isinstance(s, np.bytes_):
+    if isinstance(s, (np.bytes_, bytes)):
         s = s.decode('UTF-8')
     return s
 
@@ -139,6 +139,7 @@ def stringify(value):
 
             if isinstance(v, (int, float)):
                 v = stringify(v)
+            v = _ensure_decoded(v)
             v = lib.Timestamp(v)
             if v.tz is not None:
                 v = v.tz_convert('UTC')
@@ -295,12 +296,7 @@ def apply(self, func):
 class ExprVisitor(expr.ExprVisitor):
 
     bin_ops = '>', '<', '>=', '<=', '==', '!=', '&', '|'
-    bin_op_nodes = ('Gt', 'Lt', 'GtE', 'LtE', 'Eq', 'NotEq', 'BitAnd', 'BitOr')
-    bin_op_nodes_map = dict(zip(bin_ops, bin_op_nodes))
-
-    unary_ops =  ['~']
-    unary_op_nodes = 'Invert'
-    unary_op_nodes_map = dict(zip(unary_ops, unary_op_nodes))
+    unary_ops =  ['-','~']
 
     def __init__(self, env, **kwargs):
         for bin_op in self.bin_ops:
@@ -369,6 +365,13 @@ def visit_Compare(self, node, **kwargs):
     def visit_Name(self, node, side=None, **kwargs):
         return Term(node.id, self.env, side=side)
 
+    def visit_UnaryOp(self, node, **kwargs):
+        if isinstance(node.op, ast.Not):
+            return UnaryOp(node.op,self.visit(node.operand))
+        elif isinstance(node.op, ast.USub):
+            return Value(-self.visit(node.operand).value,self.env)
+        self.not_implemented("{0} unary operations".format(node.op))
+
 class Expr(expr.Expr):
 
     """ hold a pytables like expression, comprised of possibly multiple 'terms'
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index f02b247826653..4e5b518f187d3 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -1551,11 +1551,9 @@ def test_terms(self):
             self.assertRaises(NameError, store.select, 'wp', ["index=['20121114']"])
             self.assertRaises(NameError, store.select, 'wp', ["index=['20121114', '20121114']"])
 
-            self.assertRaises(TypeError, Term.__init__)
-            self.assertRaises(TypeError, Term.__init__, 'blah')
-            self.assertRaises(TypeError, Term.__init__, 'index')
-            self.assertRaises(TypeError, Term.__init__, 'index', '==')
-            self.assertRaises(TypeError, Term.__init__, 'index', '>', 5)
+            self.assertRaises(TypeError, Term)
+            self.assertRaises(TypeError, Term, 'index', '==')
+            self.assertRaises(TypeError, Term, 'index', '>', 5)
 
             # panel
             result = store.select('wp', [Term(

From e71276230f13efea46db6b225024ebb0f3e57530 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Mon, 8 Jul 2013 11:35:55 -0400
Subject: [PATCH 48/48] COMPAT: allow prior 0.12 query syntax for terms, e.g.
 Term('index','>',5) (and show deprecation warning)

---
 pandas/computation/expr.py       |  1 +
 pandas/computation/pytables.py   | 23 ++++++++++++++++++-----
 pandas/io/tests/test_pytables.py | 17 ++++++++++++++---
 3 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py
index 1db6a809773f8..63a9776bb027c 100644
--- a/pandas/computation/expr.py
+++ b/pandas/computation/expr.py
@@ -54,6 +54,7 @@ def update(self, scope_level=None):
                 self.locals.update(f.f_locals)
         finally:
             del frame
+            del frames
 
 class ExprParserError(Exception):
     pass
diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py
index 521bf60284107..e24445bbd71e9 100644
--- a/pandas/computation/pytables.py
+++ b/pandas/computation/pytables.py
@@ -390,7 +390,24 @@ class Expr(expr.Expr):
     --------
     """
 
-    def __init__(self, where, queryables=None, encoding=None, scope_level=None):
+    def __init__(self, where, op=None, value=None, queryables=None, encoding=None, scope_level=None):
+
+        # try to be back compat
+        if op is not None:
+            if not isinstance(where, basestring):
+                raise TypeError("where must be passed as a string if op/value are passed")
+            if isinstance(op, Expr):
+                raise TypeError("invalid op passed, must be a string")
+            where = "{0}{1}".format(where,op)
+            if value is not None:
+                if isinstance(value, Expr):
+                    raise TypeError("invalid value passed, must be a string")
+                where = "{0}{1}".format(where,value)
+
+            import warnings
+            warnings.warn("passing multiple values to Expre is deprecated "
+                          "pass the where as a single string", DeprecationWarning)
+
         self.encoding = encoding
         self.condition = None
         self.filter = None
@@ -418,10 +435,6 @@ def __init__(self, where, queryables=None, encoding=None, scope_level=None):
 
         if queryables is not None:
 
-            # if using the old format, this will raise
-            if not isinstance(queryables, dict):
-                raise TypeError("Expr must be called with a single-string expression")
-
             self.env.queryables.update(queryables)
             self._visitor = ExprVisitor(self.env, queryables=queryables, encoding=encoding)
             self.terms = self.parse()
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 4e5b518f187d3..1ebcae4457bef 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -20,7 +20,6 @@
 from pandas import concat, Timestamp
 from pandas.util import py3compat
 
-
 try:
     import tables
 except ImportError:
@@ -1551,9 +1550,14 @@ def test_terms(self):
             self.assertRaises(NameError, store.select, 'wp', ["index=['20121114']"])
             self.assertRaises(NameError, store.select, 'wp', ["index=['20121114', '20121114']"])
 
+            # deprecations
+            with tm.assert_produces_warning(expected_warning=DeprecationWarning):
+                Term('index','==')
+
+            with tm.assert_produces_warning(expected_warning=DeprecationWarning):
+                Term('index', '>', 5)
+
             self.assertRaises(TypeError, Term)
-            self.assertRaises(TypeError, Term, 'index', '==')
-            self.assertRaises(TypeError, Term, 'index', '>', 5)
 
             # panel
             result = store.select('wp', [Term(
@@ -1561,6 +1565,13 @@ def test_terms(self):
             expected = wp.truncate(after='20000108').reindex(minor=['A', 'B'])
             tm.assert_panel_equal(result, expected)
 
+            # with deprecation
+            with tm.assert_produces_warning(expected_warning=DeprecationWarning):
+                result = store.select('wp', [Term(
+                    'major_axis','<',"20000108"), Term("minor_axis=['A', 'B']")])
+                expected = wp.truncate(after='20000108').reindex(minor=['A', 'B'])
+                tm.assert_panel_equal(result, expected)
+
             # p4d
             result = store.select('p4d', [Term('major_axis<"20000108"'),
                                           Term("minor_axis=['A', 'B']"),