Skip to content

WIP: use eval expression parsing as replacement for Term in HDFStore #4155

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 48 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
89a03be
ENH: add new computation module and toplevel eval function
cpcloud Jun 16, 2013
bcd17b0
ENH/TST: add new instance testing functions and their tests
cpcloud Jun 16, 2013
81bacd1
BUG: prevent certain index types from joining with DatetimeIndex
cpcloud Jun 16, 2013
e380271
TST/ENH: add 2d bare numpy array and nan support
cpcloud Jun 16, 2013
99a3d28
ENH: add modulus support
cpcloud Jun 17, 2013
4db95fe
TST: add failing modulus tests
cpcloud Jun 17, 2013
6000c89
CLN: use format string for unicode
cpcloud Jun 18, 2013
c25a1d4
CLN: remove engine detection and manip for datetimes
cpcloud Jun 18, 2013
1132bc4
CLN/ENH: add new interface to encapsulate Terms and Constants
cpcloud Jun 20, 2013
54f1897
ENH: allow an already-parsed expression to be passed to eval
cpcloud Jun 20, 2013
e20900a
CLN: add automatic scope creating object
cpcloud Jun 26, 2013
51d80f6
CLN: make the environment an implementation detail
cpcloud Jun 28, 2013
038d79c
DOC: add docstring to eval
cpcloud Jun 28, 2013
599cf32
CLN: cleanup pytables.py a bit
cpcloud Jun 28, 2013
ea769e6
CLN: clean up engines
cpcloud Jun 29, 2013
ff78c08
CLN: clean up eval and have the Scope instance auto create the scope …
cpcloud Jul 4, 2013
f9f7fd7
CLN: add six.string_types checking instead of basestring
cpcloud Jul 4, 2013
48eff13
TST: clean up some tests, add minor assertions where none existed
cpcloud Jul 4, 2013
d87f027
CLN: clean up frame.py a bit
cpcloud Jul 4, 2013
5b58a08
CLN: clean up pytables arguments a bit
cpcloud Jul 4, 2013
7482a27
CLN: use shiny new string mixin to refactor repring
cpcloud Jul 4, 2013
0d40fe1
CLN: move align to its own file
cpcloud Jul 4, 2013
87957d2
CLN: clean up and use new stringmixin for Expr
cpcloud Jul 4, 2013
e35cb5c
ENH/CLN: be more careful about unicode
cpcloud Jul 4, 2013
1ceec39
CLN: run autopep8 on pandas/io/pytables.py
cpcloud Jul 4, 2013
c665a85
DOC: reference future enhancingperf.eval section
cpcloud Jul 4, 2013
cb27934
CLN/DOC: clean up docstrings in pytables
cpcloud Jul 4, 2013
63ba37d
CLN: actually pass fletcher32 in get_store
cpcloud Jul 4, 2013
dcde590
CLN: remove unused variables
cpcloud Jul 4, 2013
3c4e2b3
CLN: more pep8 and get rid of most raise Exception clauses
cpcloud Jul 4, 2013
226c786
CLN: change NameError to match python
cpcloud Jul 4, 2013
79871d8
API: expose the Expr object to top level pandas
cpcloud Jul 5, 2013
84fdb45
CLN/TST: fail with a NotImplementedError on and or not
cpcloud Jul 5, 2013
4d9f9a7
CLN: generlize operator/expression printing
cpcloud Jul 5, 2013
a0d2ce0
CLN: clean up testing and expr
cpcloud Jul 5, 2013
317a153
ENH: add modest type inference
cpcloud Jul 6, 2013
401bc28
ENH: rewrite assignment as equal comparison
cpcloud Jul 6, 2013
22dedcb
ENH: initial commit for adding Expr based terms for pytables support
jreback Jul 6, 2013
441285c
WIP: still some debugging statements in
jreback Jul 7, 2013
05a005f
WIP: conditions working now, filtering still only ok
jreback Jul 7, 2013
22b4a93
TST: more test changes
jreback Jul 7, 2013
ca292c2
BUG: added HDFStore to inherit from Stringmixin
jreback Jul 7, 2013
dfef617
BUG: process visit_Index
jreback Jul 7, 2013
b168fb3
ENH: use non_implemented function call in ExprVisitor
jreback Jul 7, 2013
5fac749
BUG: fixed scoping issues by _ensure_term at the top-level
jreback Jul 7, 2013
c5a3c9f
TST: fixed remaining tests
jreback Jul 7, 2013
71a23a8
BUG: py3 fixes; revise scoping rules to be more broad
jreback Jul 8, 2013
e712762
COMPAT: allow prior 0.12 query syntax for terms, e.g. Term('index','>…
jreback Jul 8, 2013
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from pandas.stats.api import *
from pandas.tseries.api import *
from pandas.io.api import *
from pandas.computation.api import *

from pandas.util.testing import debug

Expand Down
Empty file added pandas/computation/__init__.py
Empty file.
220 changes: 220 additions & 0 deletions pandas/computation/align.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
from functools import partial, wraps
from itertools import izip

import numpy as np

import pandas as pd
import pandas.core.common as com
from pandas.computation.ops import is_const
from pandas.computation.common import flatten


def _align_core_single_unary_op(term):
if isinstance(term.value, np.ndarray) and not com.is_series(term.value):
typ = partial(np.asanyarray, dtype=term.value.dtype)
else:
typ = type(term.value)
ret = typ,

if not hasattr(term.value, 'axes'):
ret += None,
else:
ret += _zip_axes_from_type(typ, term.value.axes),
return ret


def _zip_axes_from_type(typ, new_axes):
axes = {}
for ax_ind, ax_name in typ._AXIS_NAMES.iteritems():
axes[ax_name] = new_axes[ax_ind]
return axes


def _maybe_promote_shape(values, naxes):
# test to see if we have an array else leave since must be a number
if not isinstance(values, np.ndarray):
return values

ndims = values.ndim
if ndims > naxes:
raise AssertionError('cannot have more dims than axes, '
'{0} > {1}'.format(ndims, naxes))
if ndims == naxes:
return values

ndim = set(xrange(ndims))
nax = set(xrange(naxes))

axes_slice = [slice(None)] * naxes

# symmetric difference of numaxes and ndims
slices = nax - ndim

if ndims == naxes:
if slices:
raise AssertionError('slices should be empty if ndims == naxes '
'{0}'.format(slices))
else:
if not slices:
raise AssertionError('slices should NOT be empty if ndim != naxes '
'{0}'.format(slices))

for sl in slices:
axes_slice[sl] = np.newaxis

return values[tuple(axes_slice)]


def _any_pandas_objects(terms):
"""Check a sequence of terms for instances of PandasObject."""
return any(com.is_pd_obj(term.value) for term in terms)


def _filter_special_cases(f):
@wraps(f)
def wrapper(terms):
# single unary operand
if len(terms) == 1:
return _align_core_single_unary_op(terms[0])

# only scalars
elif all(term.isscalar for term in terms):
return np.result_type(*(term.value for term in terms)), None

# single element ndarrays
all_has_size = all(hasattr(term.value, 'size') for term in terms)
if (all_has_size and all(term.value.size == 1 for term in terms)):
return np.result_type(*(term.value for term in terms)), None

# no pandas so just punt to the evaluator
if not _any_pandas_objects(terms):
return np.result_type(*(term.value for term in terms)), None

return f(terms)
return wrapper


@_filter_special_cases
def _align_core(terms):
term_index = [i for i, term in enumerate(terms) if hasattr(term.value,
'axes')]
term_dims = [terms[i].value.ndim for i in term_index]
ndims = pd.Series(dict(zip(term_index, term_dims)))

# initial axes are the axes of the largest-axis'd term
biggest = terms[ndims.idxmax()].value
typ = biggest._constructor
axes = biggest.axes
naxes = len(axes)

for term in (terms[i] for i in term_index):
for axis, items in enumerate(term.value.axes):
if com.is_series(term.value) and naxes > 1:
ax, itm = naxes - 1, term.value.index
else:
ax, itm = axis, items
axes[ax] = axes[ax].join(itm, how='outer')

for i, ndim in ndims.iteritems():
for axis, items in izip(xrange(ndim), axes):
ti = terms[i].value

if hasattr(ti, 'reindex_axis'):
transpose = com.is_series(ti) and naxes > 1

if transpose:
f = partial(ti.reindex, index=axes[naxes - 1], copy=False)
else:
f = partial(ti.reindex_axis, items, axis=axis, copy=False)

if pd.lib.is_bool_array(ti.values):
r = f(fill_value=True)
else:
r = f()

terms[i].update(r)

res = _maybe_promote_shape(terms[i].value.T if transpose else
terms[i].value, naxes)
res = res.T if transpose else res

try:
v = res.values
except AttributeError:
v = res
terms[i].update(v)

return typ, _zip_axes_from_type(typ, axes)


def _filter_terms(flat):
# numeric literals
literals = set(filter(is_const, flat))

# these are strings which are variable names
names = set(flat) - literals

# literals are not names and names are not literals, so intersection should
# be empty
if literals & names:
raise ValueError('literals cannot be names and names cannot be '
'literals')
return names, literals


def _align(terms, env):

# flatten the parse tree (a nested list)
terms = list(flatten(terms))

# separate names and literals
names, literals = _filter_terms(terms)

if not names: # only literals so just promote to a common type
return np.result_type(*literals).type, None

# if all resolved variables are numeric scalars
if all(term.isscalar for term in terms):
return np.result_type(*(term.value for term in terms)).type, None

# perform the main alignment
typ, axes = _align_core(terms)
return typ, axes


def _reconstruct_object(typ, obj, axes, dtype):
"""Reconstruct an object given its type, raw value, and possibly empty
(None) axes.

Parameters
----------
typ : object
A type
obj : object
The value to use in the type constructor
axes : dict
The axes to use to construct the resulting pandas object

Returns
-------
reconst : typ
An object of type ``typ`` with the value `obj` and possible axes
`axes`.
"""
#import ipdb; ipdb.set_trace()
try:
typ = typ.type
except AttributeError:
pass

if (not isinstance(typ, partial) and
issubclass(typ, pd.core.generic.PandasObject)):
return typ(obj, dtype=dtype, **axes)

ret_value = typ(obj).astype(dtype)

try:
ret = ret_value.item()
except ValueError:
ret = ret_value
return ret
2 changes: 2 additions & 0 deletions pandas/computation/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from pandas.computation.eval import eval
from pandas.computation.expr import Expr
11 changes: 11 additions & 0 deletions pandas/computation/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import collections
from pandas.core.common import is_string


def flatten(l):
for el in l:
if isinstance(el, collections.Iterable) and not is_string(el):
for s in flatten(el):
yield s
else:
yield el
79 changes: 79 additions & 0 deletions pandas/computation/engines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import abc

from pandas.computation.align import _align, _reconstruct_object


class AbstractEngine(object):
""""""
__metaclass__ = abc.ABCMeta

has_neg_frac = False

def __init__(self, expr):
self.expr = expr
self.aligned_axes = None
self.result_type = None

@abc.abstractmethod
def convert(self):
"""Convert an expression for evaluation."""
pass

def evaluate(self):
if not self._is_aligned:
self.result_type, self.aligned_axes = _align(self.expr.terms,
self.expr.env)

res = self._evaluate(self.expr.env)
return _reconstruct_object(self.result_type, res, self.aligned_axes,
self.expr.terms.return_type)

@property
def _is_aligned(self):
return self.aligned_axes is not None and self.result_type is not None

@abc.abstractmethod
def _evaluate(self, env):
"""Return an evaluated expression."""
pass


class NumExprEngine(AbstractEngine):
"""NumExpr engine class"""
has_neg_frac = True

def __init__(self, expr):
super(NumExprEngine, self).__init__(expr)

def convert(self):
"""Return a string"""
return '%s' % self.expr

def _evaluate(self, env):
import numexpr as ne

try:
return ne.evaluate(self.convert(), local_dict=env.locals,
global_dict=env.globals,
truediv=self.expr.truediv)
except KeyError as e:
raise NameError('{0!r} is not defined'.format(e.message))


class PythonEngine(AbstractEngine):
"""Use NumPy even if numexpr is installed"""
has_neg_frac = False

def __init__(self, expr):
super(PythonEngine, self).__init__(expr)

def convert(self):
pass

def evaluate(self):
return self.expr(self.expr.env)

def _evaluate(self, env):
pass

_engines = {'numexpr': NumExprEngine, 'python': PythonEngine }
Loading