Skip to content

WPI: Eval with some PyTables support #4151

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 39 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
a36a63e
ENH: add new computation module and toplevel eval function
cpcloud Jun 16, 2013
e944f87
ENH/TST: add new instance testing functions and their tests
cpcloud Jun 16, 2013
6f511b4
BUG: prevent certain index types from joining with DatetimeIndex
cpcloud Jun 16, 2013
1a1e436
TST/ENH: add 2d bare numpy array and nan support
cpcloud Jun 16, 2013
f670c12
ENH: add modulus support
cpcloud Jun 17, 2013
60e52b2
TST: add failing modulus tests
cpcloud Jun 17, 2013
24a2272
CLN: use format string for unicode
cpcloud Jun 18, 2013
f1ca19f
CLN: remove engine detection and manip for datetimes
cpcloud Jun 18, 2013
ac3c3f7
CLN/ENH: add new interface to encapsulate Terms and Constants
cpcloud Jun 20, 2013
51a2edc
ENH: allow an already-parsed expression to be passed to eval
cpcloud Jun 20, 2013
8ace9cd
CLN: add automatic scope creating object
cpcloud Jun 26, 2013
ecf9a6f
CLN: make the environment an implementation detail
cpcloud Jun 28, 2013
0b7ed5e
DOC: add docstring to eval
cpcloud Jun 28, 2013
8a55e5c
CLN: cleanup pytables.py a bit
cpcloud Jun 28, 2013
0c5cd36
CLN: clean up engines
cpcloud Jun 29, 2013
49b9070
CLN: clean up eval and have the Scope instance auto create the scope …
cpcloud Jul 4, 2013
0396fcc
CLN: add six.string_types checking instead of basestring
cpcloud Jul 4, 2013
d8833ac
TST: clean up some tests, add minor assertions where none existed
cpcloud Jul 4, 2013
a589c1d
CLN: clean up frame.py a bit
cpcloud Jul 4, 2013
f762e32
CLN: clean up pytables arguments a bit
cpcloud Jul 4, 2013
4aa8779
CLN: use shiny new string mixin to refactor repring
cpcloud Jul 4, 2013
56ad533
CLN: move align to its own file
cpcloud Jul 4, 2013
5eab1ee
CLN: clean up and use new stringmixin for Expr
cpcloud Jul 4, 2013
0d514ee
ENH/CLN: be more careful about unicode
cpcloud Jul 4, 2013
82042e4
CLN: run autopep8 on pandas/io/pytables.py
cpcloud Jul 4, 2013
3d47ee6
DOC: reference future enhancingperf.eval section
cpcloud Jul 4, 2013
0b3f18b
CLN/DOC: clean up docstrings in pytables
cpcloud Jul 4, 2013
f60009b
CLN: actually pass fletcher32 in get_store
cpcloud Jul 4, 2013
9d6f7ef
CLN: remove unused variables
cpcloud Jul 4, 2013
998b0b1
CLN: more pep8 and get rid of most raise Exception clauses
cpcloud Jul 4, 2013
5c4d6a1
CLN: change NameError to match python
cpcloud Jul 4, 2013
a0c79ae
API: expose the Expr object to top level pandas
cpcloud Jul 5, 2013
4f8055a
CLN/TST: fail with a NotImplementedError on and or not
cpcloud Jul 5, 2013
9a29eb6
CLN: generlize operator/expression printing
cpcloud Jul 5, 2013
663fed5
CLN: clean up testing and expr
cpcloud Jul 5, 2013
995ad57
Merge branch 'eval-3393' of https://github.com/cpcloud/pandas into cp…
jreback Jul 6, 2013
fbeb99d
ENH: initial commit for adding Expr based terms for pytables support
jreback Jul 6, 2013
6b67214
WIP: still some debugging statements in
jreback Jul 7, 2013
c4143a1
WIP: conditions working now, filtering still only ok
jreback Jul 7, 2013
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from pandas.stats.api import *
from pandas.tseries.api import *
from pandas.io.api import *
from pandas.computation.api import *

from pandas.util.testing import debug

Expand Down
Empty file added pandas/computation/__init__.py
Empty file.
219 changes: 219 additions & 0 deletions pandas/computation/align.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
from functools import partial, wraps
from itertools import izip

import numpy as np

import pandas as pd
import pandas.core.common as com
from pandas.computation.ops import is_const
from pandas.computation.common import flatten


def _align_core_single_unary_op(term):
if isinstance(term.value, np.ndarray) and not com.is_series(term.value):
typ = partial(np.asanyarray, dtype=term.value.dtype)
else:
typ = type(term.value)
ret = typ,

if not hasattr(term.value, 'axes'):
ret += None,
else:
ret += _zip_axes_from_type(typ, term.value.axes),
return ret


def _zip_axes_from_type(typ, new_axes):
axes = {}
for ax_ind, ax_name in typ._AXIS_NAMES.iteritems():
axes[ax_name] = new_axes[ax_ind]
return axes


def _maybe_promote_shape(values, naxes):
# test to see if we have an array else leave since must be a number
if not isinstance(values, np.ndarray):
return values

ndims = values.ndim
if ndims > naxes:
raise AssertionError('cannot have more dims than axes, '
'{0} > {1}'.format(ndims, naxes))
if ndims == naxes:
return values

ndim = set(xrange(ndims))
nax = set(xrange(naxes))

axes_slice = [slice(None)] * naxes

# symmetric difference of numaxes and ndims
slices = nax - ndim

if ndims == naxes:
if slices:
raise AssertionError('slices should be empty if ndims == naxes '
'{0}'.format(slices))
else:
if not slices:
raise AssertionError('slices should NOT be empty if ndim != naxes '
'{0}'.format(slices))

for sl in slices:
axes_slice[sl] = np.newaxis

return values[tuple(axes_slice)]


def _any_pandas_objects(terms):
"""Check a sequence of terms for instances of PandasObject."""
return any(com.is_pd_obj(term.value) for term in terms)


def _filter_special_cases(f):
@wraps(f)
def wrapper(terms):
# single unary operand
if len(terms) == 1:
return _align_core_single_unary_op(terms[0])

# only scalars
elif all(term.isscalar for term in terms):
return np.result_type(*(term.value for term in terms)), None

# single element ndarrays
all_has_size = all(hasattr(term.value, 'size') for term in terms)
if (all_has_size and all(term.value.size == 1 for term in terms)):
return np.result_type(*(term.value for term in terms)), None

# no pandas so just punt to the evaluator
if not _any_pandas_objects(terms):
return np.result_type(*(term.value for term in terms)), None

return f(terms)
return wrapper


@_filter_special_cases
def _align_core(terms):
term_index = [i for i, term in enumerate(terms) if hasattr(term.value,
'axes')]
term_dims = [terms[i].value.ndim for i in term_index]
ndims = pd.Series(dict(zip(term_index, term_dims)))

# initial axes are the axes of the largest-axis'd term
biggest = terms[ndims.idxmax()].value
typ = biggest._constructor
axes = biggest.axes
naxes = len(axes)

for term in (terms[i] for i in term_index):
for axis, items in enumerate(term.value.axes):
if com.is_series(term.value) and naxes > 1:
ax, itm = naxes - 1, term.value.index
else:
ax, itm = axis, items
axes[ax] = axes[ax].join(itm, how='outer')

for i, ndim in ndims.iteritems():
for axis, items in izip(xrange(ndim), axes):
ti = terms[i].value

if hasattr(ti, 'reindex_axis'):
transpose = com.is_series(ti) and naxes > 1

if transpose:
f = partial(ti.reindex, index=axes[naxes - 1], copy=False)
else:
f = partial(ti.reindex_axis, items, axis=axis, copy=False)

if pd.lib.is_bool_array(ti.values):
r = f(fill_value=True)
else:
r = f()

terms[i].update(r)

res = _maybe_promote_shape(terms[i].value.T if transpose else
terms[i].value, naxes)
res = res.T if transpose else res

try:
v = res.values
except AttributeError:
v = res
terms[i].update(v)

return typ, _zip_axes_from_type(typ, axes)


def _filter_terms(flat):
# numeric literals
literals = set(filter(is_const, flat))

# these are strings which are variable names
names = set(flat) - literals

# literals are not names and names are not literals, so intersection should
# be empty
if literals & names:
raise ValueError('literals cannot be names and names cannot be '
'literals')
return names, literals


def _align(terms, env):
# flatten the parse tree (a nested list)
terms = list(flatten(terms))

# separate names and literals
names, literals = _filter_terms(terms)

if not names: # only literals so just promote to a common type
return np.result_type(*literals).type, None

# if all resolved variables are numeric scalars
if all(term.isscalar for term in terms):
return np.result_type(*(term.value for term in terms)).type, None

# perform the main alignment
typ, axes = _align_core(terms)
return typ, axes


def _reconstruct_object(typ, obj, axes):
"""Reconstruct an object given its type, raw value, and possibly empty
(None) axes.

Parameters
----------
typ : object
A type
obj : object
The value to use in the type constructor
axes : dict
The axes to use to construct the resulting pandas object

Returns
-------
reconst : typ
An object of type ``typ`` with the value `obj` and possible axes
`axes`.
"""
try:
# handle numpy dtypes
typ = typ.type
except AttributeError:
pass

if (not isinstance(typ, partial) and
issubclass(typ, pd.core.generic.PandasObject)):
return typ(obj, **axes)

ret_value = typ(obj)

try:
return ret_value.item()
except (AttributeError, ValueError):
return ret_value

2 changes: 2 additions & 0 deletions pandas/computation/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from pandas.computation.eval import eval
from pandas.computation.expr import Expr
11 changes: 11 additions & 0 deletions pandas/computation/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import collections
from pandas.core.common import is_string


def flatten(l):
for el in l:
if isinstance(el, collections.Iterable) and not is_string(el):
for s in flatten(el):
yield s
else:
yield el
78 changes: 78 additions & 0 deletions pandas/computation/engines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import abc

from pandas.computation.align import _align, _reconstruct_object


class AbstractEngine(object):
""""""
__metaclass__ = abc.ABCMeta

has_neg_frac = False

def __init__(self, expr):
self.expr = expr
self.aligned_axes = None
self.result_type = None

@abc.abstractmethod
def convert(self):
"""Convert an expression for evaluation."""
pass

def evaluate(self):
if not self._is_aligned:
self.result_type, self.aligned_axes = _align(self.expr.terms,
self.expr.env)

res = self._evaluate(self.expr.env)
return _reconstruct_object(self.result_type, res, self.aligned_axes)

@property
def _is_aligned(self):
return self.aligned_axes is not None and self.result_type is not None

@abc.abstractmethod
def _evaluate(self, env):
"""Return an evaluated expression."""
pass


class NumExprEngine(AbstractEngine):
"""NumExpr engine class"""
has_neg_frac = True

def __init__(self, expr):
super(NumExprEngine, self).__init__(expr)

def convert(self):
"""Return a string"""
return '%s' % self.expr

def _evaluate(self, env):
import numexpr as ne

try:
return ne.evaluate(self.convert(), local_dict=env.locals,
global_dict=env.globals,
truediv=self.expr.truediv)
except KeyError as e:
raise NameError('{0!r} is not defined'.format(e.message))


class PythonEngine(AbstractEngine):
"""Use NumPy even if numexpr is installed"""
has_neg_frac = False

def __init__(self, expr):
super(PythonEngine, self).__init__(expr)

def convert(self):
pass

def evaluate(self):
return self.expr(self.expr.env)

def _evaluate(self, env):
pass

_engines = {'numexpr': NumExprEngine, 'python': PythonEngine }
Loading