Skip to content

Commit 995ad57

Browse files
committed
Merge branch 'eval-3393' of https://github.com/cpcloud/pandas into cpcloud-eval-3393
2 parents a488478 + 663fed5 commit 995ad57

26 files changed

+2341
-605
lines changed

pandas/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from pandas.stats.api import *
3030
from pandas.tseries.api import *
3131
from pandas.io.api import *
32+
from pandas.computation.api import *
3233

3334
from pandas.util.testing import debug
3435

pandas/computation/__init__.py

Whitespace-only changes.

pandas/computation/align.py

+219
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
from functools import partial, wraps
2+
from itertools import izip
3+
4+
import numpy as np
5+
6+
import pandas as pd
7+
import pandas.core.common as com
8+
from pandas.computation.ops import is_const
9+
from pandas.computation.common import flatten
10+
11+
12+
def _align_core_single_unary_op(term):
13+
if isinstance(term.value, np.ndarray) and not com.is_series(term.value):
14+
typ = partial(np.asanyarray, dtype=term.value.dtype)
15+
else:
16+
typ = type(term.value)
17+
ret = typ,
18+
19+
if not hasattr(term.value, 'axes'):
20+
ret += None,
21+
else:
22+
ret += _zip_axes_from_type(typ, term.value.axes),
23+
return ret
24+
25+
26+
def _zip_axes_from_type(typ, new_axes):
27+
axes = {}
28+
for ax_ind, ax_name in typ._AXIS_NAMES.iteritems():
29+
axes[ax_name] = new_axes[ax_ind]
30+
return axes
31+
32+
33+
def _maybe_promote_shape(values, naxes):
34+
# test to see if we have an array else leave since must be a number
35+
if not isinstance(values, np.ndarray):
36+
return values
37+
38+
ndims = values.ndim
39+
if ndims > naxes:
40+
raise AssertionError('cannot have more dims than axes, '
41+
'{0} > {1}'.format(ndims, naxes))
42+
if ndims == naxes:
43+
return values
44+
45+
ndim = set(xrange(ndims))
46+
nax = set(xrange(naxes))
47+
48+
axes_slice = [slice(None)] * naxes
49+
50+
# symmetric difference of numaxes and ndims
51+
slices = nax - ndim
52+
53+
if ndims == naxes:
54+
if slices:
55+
raise AssertionError('slices should be empty if ndims == naxes '
56+
'{0}'.format(slices))
57+
else:
58+
if not slices:
59+
raise AssertionError('slices should NOT be empty if ndim != naxes '
60+
'{0}'.format(slices))
61+
62+
for sl in slices:
63+
axes_slice[sl] = np.newaxis
64+
65+
return values[tuple(axes_slice)]
66+
67+
68+
def _any_pandas_objects(terms):
69+
"""Check a sequence of terms for instances of PandasObject."""
70+
return any(com.is_pd_obj(term.value) for term in terms)
71+
72+
73+
def _filter_special_cases(f):
74+
@wraps(f)
75+
def wrapper(terms):
76+
# single unary operand
77+
if len(terms) == 1:
78+
return _align_core_single_unary_op(terms[0])
79+
80+
# only scalars
81+
elif all(term.isscalar for term in terms):
82+
return np.result_type(*(term.value for term in terms)), None
83+
84+
# single element ndarrays
85+
all_has_size = all(hasattr(term.value, 'size') for term in terms)
86+
if (all_has_size and all(term.value.size == 1 for term in terms)):
87+
return np.result_type(*(term.value for term in terms)), None
88+
89+
# no pandas so just punt to the evaluator
90+
if not _any_pandas_objects(terms):
91+
return np.result_type(*(term.value for term in terms)), None
92+
93+
return f(terms)
94+
return wrapper
95+
96+
97+
@_filter_special_cases
98+
def _align_core(terms):
99+
term_index = [i for i, term in enumerate(terms) if hasattr(term.value,
100+
'axes')]
101+
term_dims = [terms[i].value.ndim for i in term_index]
102+
ndims = pd.Series(dict(zip(term_index, term_dims)))
103+
104+
# initial axes are the axes of the largest-axis'd term
105+
biggest = terms[ndims.idxmax()].value
106+
typ = biggest._constructor
107+
axes = biggest.axes
108+
naxes = len(axes)
109+
110+
for term in (terms[i] for i in term_index):
111+
for axis, items in enumerate(term.value.axes):
112+
if com.is_series(term.value) and naxes > 1:
113+
ax, itm = naxes - 1, term.value.index
114+
else:
115+
ax, itm = axis, items
116+
axes[ax] = axes[ax].join(itm, how='outer')
117+
118+
for i, ndim in ndims.iteritems():
119+
for axis, items in izip(xrange(ndim), axes):
120+
ti = terms[i].value
121+
122+
if hasattr(ti, 'reindex_axis'):
123+
transpose = com.is_series(ti) and naxes > 1
124+
125+
if transpose:
126+
f = partial(ti.reindex, index=axes[naxes - 1], copy=False)
127+
else:
128+
f = partial(ti.reindex_axis, items, axis=axis, copy=False)
129+
130+
if pd.lib.is_bool_array(ti.values):
131+
r = f(fill_value=True)
132+
else:
133+
r = f()
134+
135+
terms[i].update(r)
136+
137+
res = _maybe_promote_shape(terms[i].value.T if transpose else
138+
terms[i].value, naxes)
139+
res = res.T if transpose else res
140+
141+
try:
142+
v = res.values
143+
except AttributeError:
144+
v = res
145+
terms[i].update(v)
146+
147+
return typ, _zip_axes_from_type(typ, axes)
148+
149+
150+
def _filter_terms(flat):
151+
# numeric literals
152+
literals = set(filter(is_const, flat))
153+
154+
# these are strings which are variable names
155+
names = set(flat) - literals
156+
157+
# literals are not names and names are not literals, so intersection should
158+
# be empty
159+
if literals & names:
160+
raise ValueError('literals cannot be names and names cannot be '
161+
'literals')
162+
return names, literals
163+
164+
165+
def _align(terms, env):
166+
# flatten the parse tree (a nested list)
167+
terms = list(flatten(terms))
168+
169+
# separate names and literals
170+
names, literals = _filter_terms(terms)
171+
172+
if not names: # only literals so just promote to a common type
173+
return np.result_type(*literals).type, None
174+
175+
# if all resolved variables are numeric scalars
176+
if all(term.isscalar for term in terms):
177+
return np.result_type(*(term.value for term in terms)).type, None
178+
179+
# perform the main alignment
180+
typ, axes = _align_core(terms)
181+
return typ, axes
182+
183+
184+
def _reconstruct_object(typ, obj, axes):
185+
"""Reconstruct an object given its type, raw value, and possibly empty
186+
(None) axes.
187+
188+
Parameters
189+
----------
190+
typ : object
191+
A type
192+
obj : object
193+
The value to use in the type constructor
194+
axes : dict
195+
The axes to use to construct the resulting pandas object
196+
197+
Returns
198+
-------
199+
reconst : typ
200+
An object of type ``typ`` with the value `obj` and possible axes
201+
`axes`.
202+
"""
203+
try:
204+
# handle numpy dtypes
205+
typ = typ.type
206+
except AttributeError:
207+
pass
208+
209+
if (not isinstance(typ, partial) and
210+
issubclass(typ, pd.core.generic.PandasObject)):
211+
return typ(obj, **axes)
212+
213+
ret_value = typ(obj)
214+
215+
try:
216+
return ret_value.item()
217+
except (AttributeError, ValueError):
218+
return ret_value
219+

pandas/computation/api.py

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from pandas.computation.eval import eval
2+
from pandas.computation.expr import Expr

pandas/computation/common.py

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import collections
2+
from pandas.core.common import is_string
3+
4+
5+
def flatten(l):
6+
for el in l:
7+
if isinstance(el, collections.Iterable) and not is_string(el):
8+
for s in flatten(el):
9+
yield s
10+
else:
11+
yield el

pandas/computation/engines.py

+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import abc
2+
3+
from pandas.computation.align import _align, _reconstruct_object
4+
5+
6+
class AbstractEngine(object):
7+
""""""
8+
__metaclass__ = abc.ABCMeta
9+
10+
has_neg_frac = False
11+
12+
def __init__(self, expr):
13+
self.expr = expr
14+
self.aligned_axes = None
15+
self.result_type = None
16+
17+
@abc.abstractmethod
18+
def convert(self):
19+
"""Convert an expression for evaluation."""
20+
pass
21+
22+
def evaluate(self):
23+
if not self._is_aligned:
24+
self.result_type, self.aligned_axes = _align(self.expr.terms,
25+
self.expr.env)
26+
27+
res = self._evaluate(self.expr.env)
28+
return _reconstruct_object(self.result_type, res, self.aligned_axes)
29+
30+
@property
31+
def _is_aligned(self):
32+
return self.aligned_axes is not None and self.result_type is not None
33+
34+
@abc.abstractmethod
35+
def _evaluate(self, env):
36+
"""Return an evaluated expression."""
37+
pass
38+
39+
40+
class NumExprEngine(AbstractEngine):
41+
"""NumExpr engine class"""
42+
has_neg_frac = True
43+
44+
def __init__(self, expr):
45+
super(NumExprEngine, self).__init__(expr)
46+
47+
def convert(self):
48+
"""Return a string"""
49+
return '%s' % self.expr
50+
51+
def _evaluate(self, env):
52+
import numexpr as ne
53+
54+
try:
55+
return ne.evaluate(self.convert(), local_dict=env.locals,
56+
global_dict=env.globals,
57+
truediv=self.expr.truediv)
58+
except KeyError as e:
59+
raise NameError('{0!r} is not defined'.format(e.message))
60+
61+
62+
class PythonEngine(AbstractEngine):
63+
"""Use NumPy even if numexpr is installed"""
64+
has_neg_frac = False
65+
66+
def __init__(self, expr):
67+
super(PythonEngine, self).__init__(expr)
68+
69+
def convert(self):
70+
pass
71+
72+
def evaluate(self):
73+
return self.expr(self.expr.env)
74+
75+
def _evaluate(self, env):
76+
pass
77+
78+
79+
_engines = {'numexpr': NumExprEngine, 'python': PythonEngine}

0 commit comments

Comments
 (0)