Skip to content

Commit 855d3d7

Browse files
committed
Merge pull request pandas-dev#4924 from cpcloud/eval-datetime-in-python
ENH: evaluate datetime ops in python with eval
2 parents 03ac0bf + 1375c51 commit 855d3d7

File tree

10 files changed

+284
-64
lines changed

10 files changed

+284
-64
lines changed

doc/source/release.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,9 @@ Experimental Features
335335
- A :meth:`~pandas.DataFrame.query` method has been added that allows
336336
you to select elements of a ``DataFrame`` using a natural query syntax nearly
337337
identical to Python syntax.
338+
- ``pd.eval`` and friends now evaluate operations involving ``datetime64``
339+
objects in Python space because ``numexpr`` cannot handle ``NaT`` values
340+
(:issue:`4897`).
338341

339342
.. _release.bug_fixes-0.13.0:
340343

pandas/computation/align.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -111,14 +111,20 @@ def _align_core(terms):
111111
typ = biggest._constructor
112112
axes = biggest.axes
113113
naxes = len(axes)
114+
gt_than_one_axis = naxes > 1
114115

115-
for term in (terms[i] for i in term_index):
116-
for axis, items in enumerate(term.value.axes):
117-
if isinstance(term.value, pd.Series) and naxes > 1:
118-
ax, itm = naxes - 1, term.value.index
116+
for value in (terms[i].value for i in term_index):
117+
is_series = isinstance(value, pd.Series)
118+
is_series_and_gt_one_axis = is_series and gt_than_one_axis
119+
120+
for axis, items in enumerate(value.axes):
121+
if is_series_and_gt_one_axis:
122+
ax, itm = naxes - 1, value.index
119123
else:
120124
ax, itm = axis, items
121-
axes[ax] = axes[ax].join(itm, how='outer')
125+
126+
if not axes[ax].is_(itm):
127+
axes[ax] = axes[ax].join(itm, how='outer')
122128

123129
for i, ndim in compat.iteritems(ndims):
124130
for axis, items in zip(range(ndim), axes):
@@ -136,7 +142,7 @@ def _align_core(terms):
136142
warnings.warn("Alignment difference on axis {0} is larger"
137143
" than an order of magnitude on term {1!r}, "
138144
"by more than {2:.4g}; performance may suffer"
139-
"".format(axis, term.name, ordm),
145+
"".format(axis, terms[i].name, ordm),
140146
category=pd.io.common.PerformanceWarning)
141147

142148
if transpose:

pandas/computation/expr.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -493,8 +493,15 @@ def _possibly_evaluate_binop(self, op, op_class, lhs, rhs,
493493
maybe_eval_in_python=('==', '!=')):
494494
res = op(lhs, rhs)
495495

496-
# "in"/"not in" ops are always evaluated in python
496+
if (res.op in _cmp_ops_syms and
497+
lhs.is_datetime or rhs.is_datetime and
498+
self.engine != 'pytables'):
499+
# all date ops must be done in python bc numexpr doesn't work well
500+
# with NaT
501+
return self._possibly_eval(res, self.binary_ops)
502+
497503
if res.op in eval_in_python:
504+
# "in"/"not in" ops are always evaluated in python
498505
return self._possibly_eval(res, eval_in_python)
499506
elif (lhs.return_type == object or rhs.return_type == object and
500507
self.engine != 'pytables'):

pandas/computation/ops.py

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import operator as op
66
from functools import partial
77
from itertools import product, islice, chain
8+
from datetime import datetime
89

910
import numpy as np
1011

@@ -161,24 +162,16 @@ def raw(self):
161162
self.type))
162163

163164
@property
164-
def kind(self):
165+
def is_datetime(self):
165166
try:
166-
return self.type.__name__
167+
t = self.type.type
167168
except AttributeError:
168-
return self.type.type.__name__
169+
t = self.type
170+
171+
return issubclass(t, (datetime, np.datetime64))
169172

170173
@property
171174
def value(self):
172-
kind = self.kind.lower()
173-
if kind == 'datetime64':
174-
try:
175-
return self._value.asi8
176-
except AttributeError:
177-
return self._value.view('i8')
178-
elif kind == 'datetime':
179-
return pd.Timestamp(self._value)
180-
elif kind == 'timestamp':
181-
return self._value.asm8.view('i8')
182175
return self._value
183176

184177
@value.setter
@@ -248,6 +241,15 @@ def return_type(self):
248241
def isscalar(self):
249242
return all(operand.isscalar for operand in self.operands)
250243

244+
@property
245+
def is_datetime(self):
246+
try:
247+
t = self.return_type.type
248+
except AttributeError:
249+
t = self.return_type
250+
251+
return issubclass(t, (datetime, np.datetime64))
252+
251253

252254
def _in(x, y):
253255
"""Compute the vectorized membership of ``x in y`` if possible, otherwise
@@ -424,24 +426,20 @@ def stringify(value):
424426

425427
lhs, rhs = self.lhs, self.rhs
426428

427-
if (is_term(lhs) and lhs.kind.startswith('datetime') and is_term(rhs)
428-
and rhs.isscalar):
429+
if is_term(lhs) and lhs.is_datetime and is_term(rhs) and rhs.isscalar:
429430
v = rhs.value
430431
if isinstance(v, (int, float)):
431432
v = stringify(v)
432-
v = _ensure_decoded(v)
433-
v = pd.Timestamp(v)
433+
v = pd.Timestamp(_ensure_decoded(v))
434434
if v.tz is not None:
435435
v = v.tz_convert('UTC')
436436
self.rhs.update(v)
437437

438-
if (is_term(rhs) and rhs.kind.startswith('datetime') and
439-
is_term(lhs) and lhs.isscalar):
438+
if is_term(rhs) and rhs.is_datetime and is_term(lhs) and lhs.isscalar:
440439
v = lhs.value
441440
if isinstance(v, (int, float)):
442441
v = stringify(v)
443-
v = _ensure_decoded(v)
444-
v = pd.Timestamp(v)
442+
v = pd.Timestamp(_ensure_decoded(v))
445443
if v.tz is not None:
446444
v = v.tz_convert('UTC')
447445
self.lhs.update(v)

pandas/computation/tests/test_eval.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1003,7 +1003,7 @@ def check_performance_warning_for_poor_alignment(self, engine, parser):
10031003
expected = ("Alignment difference on axis {0} is larger"
10041004
" than an order of magnitude on term {1!r}, "
10051005
"by more than {2:.4g}; performance may suffer"
1006-
"".format(1, 's', np.log10(s.size - df.shape[1])))
1006+
"".format(1, 'df', np.log10(s.size - df.shape[1])))
10071007
assert_equal(msg, expected)
10081008

10091009
def test_performance_warning_for_poor_alignment(self):

pandas/core/frame.py

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1894,29 +1894,6 @@ def _getitem_frame(self, key):
18941894
raise ValueError('Must pass DataFrame with boolean values only')
18951895
return self.where(key)
18961896

1897-
def _get_index_resolvers(self, axis):
1898-
# index or columns
1899-
axis_index = getattr(self, axis)
1900-
d = dict()
1901-
1902-
for i, name in enumerate(axis_index.names):
1903-
if name is not None:
1904-
key = level = name
1905-
else:
1906-
# prefix with 'i' or 'c' depending on the input axis
1907-
# e.g., you must do ilevel_0 for the 0th level of an unnamed
1908-
# multiiindex
1909-
level_string = '{prefix}level_{i}'.format(prefix=axis[0], i=i)
1910-
key = level_string
1911-
level = i
1912-
1913-
d[key] = Series(axis_index.get_level_values(level).values,
1914-
index=axis_index, name=level)
1915-
1916-
# put the index/columns itself in the dict
1917-
d[axis] = axis_index
1918-
return d
1919-
19201897
def query(self, expr, **kwargs):
19211898
"""Query the columns of a frame with a boolean expression.
19221899
@@ -2037,8 +2014,7 @@ def eval(self, expr, **kwargs):
20372014
"""
20382015
resolvers = kwargs.pop('resolvers', None)
20392016
if resolvers is None:
2040-
index_resolvers = self._get_index_resolvers('index')
2041-
index_resolvers.update(self._get_index_resolvers('columns'))
2017+
index_resolvers = self._get_resolvers()
20422018
resolvers = [self, index_resolvers]
20432019
kwargs['local_dict'] = _ensure_scope(resolvers=resolvers, **kwargs)
20442020
return _eval(expr, **kwargs)

pandas/core/generic.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,42 @@ def _get_block_manager_axis(self, axis):
272272
return m - axis
273273
return axis
274274

275+
def _get_axis_resolvers(self, axis):
276+
# index or columns
277+
axis_index = getattr(self, axis)
278+
d = dict()
279+
prefix = axis[0]
280+
281+
for i, name in enumerate(axis_index.names):
282+
if name is not None:
283+
key = level = name
284+
else:
285+
# prefix with 'i' or 'c' depending on the input axis
286+
# e.g., you must do ilevel_0 for the 0th level of an unnamed
287+
# multiiindex
288+
key = '{prefix}level_{i}'.format(prefix=prefix, i=i)
289+
level = i
290+
291+
level_values = axis_index.get_level_values(level)
292+
s = level_values.to_series()
293+
s.index = axis_index
294+
d[key] = s
295+
296+
# put the index/columns itself in the dict
297+
if isinstance(axis_index, MultiIndex):
298+
dindex = axis_index
299+
else:
300+
dindex = axis_index.to_series()
301+
302+
d[axis] = dindex
303+
return d
304+
305+
def _get_resolvers(self):
306+
d = {}
307+
for axis_name in self._AXIS_ORDERS:
308+
d.update(self._get_axis_resolvers(axis_name))
309+
return d
310+
275311
@property
276312
def _info_axis(self):
277313
return getattr(self, self._info_axis_name)

0 commit comments

Comments
 (0)