diff --git a/doc/source/release.rst b/doc/source/release.rst
index e49812b207921..a932eda55ff32 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -335,6 +335,9 @@ Experimental Features
 - A :meth:`~pandas.DataFrame.query` method has been added that allows
   you to select elements of a ``DataFrame`` using a natural query syntax nearly
   identical to Python syntax.
+- ``pd.eval`` and friends now evaluate operations involving ``datetime64``
+  objects in Python space because ``numexpr`` cannot handle ``NaT`` values
+  (:issue:`4897`).
 
 .. _release.bug_fixes-0.13.0:
 
diff --git a/pandas/computation/align.py b/pandas/computation/align.py
index 60975bdc8a5b4..f420d0dacf34c 100644
--- a/pandas/computation/align.py
+++ b/pandas/computation/align.py
@@ -111,14 +111,20 @@ def _align_core(terms):
     typ = biggest._constructor
     axes = biggest.axes
     naxes = len(axes)
+    gt_than_one_axis = naxes > 1
 
-    for term in (terms[i] for i in term_index):
-        for axis, items in enumerate(term.value.axes):
-            if isinstance(term.value, pd.Series) and naxes > 1:
-                ax, itm = naxes - 1, term.value.index
+    for value in (terms[i].value for i in term_index):
+        is_series = isinstance(value, pd.Series)
+        is_series_and_gt_one_axis = is_series and gt_than_one_axis
+
+        for axis, items in enumerate(value.axes):
+            if is_series_and_gt_one_axis:
+                ax, itm = naxes - 1, value.index
             else:
                 ax, itm = axis, items
-            axes[ax] = axes[ax].join(itm, how='outer')
+
+            if not axes[ax].is_(itm):
+                axes[ax] = axes[ax].join(itm, how='outer')
 
     for i, ndim in compat.iteritems(ndims):
         for axis, items in zip(range(ndim), axes):
@@ -136,7 +142,7 @@ def _align_core(terms):
                     warnings.warn("Alignment difference on axis {0} is larger"
                                   " than an order of magnitude on term {1!r}, "
                                   "by more than {2:.4g}; performance may suffer"
-                                  "".format(axis, term.name, ordm),
+                                  "".format(axis, terms[i].name, ordm),
                                   category=pd.io.common.PerformanceWarning)
 
                 if transpose:
diff --git a/pandas/computation/expr.py b/pandas/computation/expr.py
index ff9adc26b8201..ba2dffa9e71b8 100644
--- a/pandas/computation/expr.py
+++ b/pandas/computation/expr.py
@@ -493,8 +493,15 @@ def _possibly_evaluate_binop(self, op, op_class, lhs, rhs,
                                  maybe_eval_in_python=('==', '!=')):
         res = op(lhs, rhs)
 
-        # "in"/"not in" ops are always evaluated in python
+        if (res.op in _cmp_ops_syms and
+            lhs.is_datetime or rhs.is_datetime and
+            self.engine != 'pytables'):
+            # all date ops must be done in python bc numexpr doesn't work well
+            # with NaT
+            return self._possibly_eval(res, self.binary_ops)
+
         if res.op in eval_in_python:
+            # "in"/"not in" ops are always evaluated in python
             return self._possibly_eval(res, eval_in_python)
         elif (lhs.return_type == object or rhs.return_type == object and
               self.engine != 'pytables'):
diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py
index debc79e33968c..fd5ee159fe2b4 100644
--- a/pandas/computation/ops.py
+++ b/pandas/computation/ops.py
@@ -5,6 +5,7 @@
 import operator as op
 from functools import partial
 from itertools import product, islice, chain
+from datetime import datetime
 
 import numpy as np
 
@@ -161,24 +162,16 @@ def raw(self):
                                           self.type))
 
     @property
-    def kind(self):
+    def is_datetime(self):
         try:
-            return self.type.__name__
+            t = self.type.type
         except AttributeError:
-            return self.type.type.__name__
+            t = self.type
+
+        return issubclass(t, (datetime, np.datetime64))
 
     @property
     def value(self):
-        kind = self.kind.lower()
-        if kind == 'datetime64':
-            try:
-                return self._value.asi8
-            except AttributeError:
-                return self._value.view('i8')
-        elif kind == 'datetime':
-            return pd.Timestamp(self._value)
-        elif kind == 'timestamp':
-            return self._value.asm8.view('i8')
         return self._value
 
     @value.setter
@@ -248,6 +241,15 @@ def return_type(self):
     def isscalar(self):
         return all(operand.isscalar for operand in self.operands)
 
+    @property
+    def is_datetime(self):
+        try:
+            t = self.return_type.type
+        except AttributeError:
+            t = self.return_type
+
+        return issubclass(t, (datetime, np.datetime64))
+
 
 def _in(x, y):
     """Compute the vectorized membership of ``x in y`` if possible, otherwise
@@ -424,24 +426,20 @@ def stringify(value):
 
         lhs, rhs = self.lhs, self.rhs
 
-        if (is_term(lhs) and lhs.kind.startswith('datetime') and is_term(rhs)
-                and rhs.isscalar):
+        if is_term(lhs) and lhs.is_datetime and is_term(rhs) and rhs.isscalar:
             v = rhs.value
             if isinstance(v, (int, float)):
                 v = stringify(v)
-            v = _ensure_decoded(v)
-            v = pd.Timestamp(v)
+            v = pd.Timestamp(_ensure_decoded(v))
             if v.tz is not None:
                 v = v.tz_convert('UTC')
             self.rhs.update(v)
 
-        if (is_term(rhs) and rhs.kind.startswith('datetime') and
-                is_term(lhs) and lhs.isscalar):
+        if is_term(rhs) and rhs.is_datetime and is_term(lhs) and lhs.isscalar:
             v = lhs.value
             if isinstance(v, (int, float)):
                 v = stringify(v)
-            v = _ensure_decoded(v)
-            v = pd.Timestamp(v)
+            v = pd.Timestamp(_ensure_decoded(v))
             if v.tz is not None:
                 v = v.tz_convert('UTC')
             self.lhs.update(v)
diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py
index 3554b8a3f81e1..e9201c233753f 100755
--- a/pandas/computation/tests/test_eval.py
+++ b/pandas/computation/tests/test_eval.py
@@ -1003,7 +1003,7 @@ def check_performance_warning_for_poor_alignment(self, engine, parser):
                 expected = ("Alignment difference on axis {0} is larger"
                             " than an order of magnitude on term {1!r}, "
                             "by more than {2:.4g}; performance may suffer"
-                            "".format(1, 's', np.log10(s.size - df.shape[1])))
+                            "".format(1, 'df', np.log10(s.size - df.shape[1])))
                 assert_equal(msg, expected)
 
     def test_performance_warning_for_poor_alignment(self):
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 7b9a75753136e..01e0d74ef8ce6 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1894,29 +1894,6 @@ def _getitem_frame(self, key):
             raise ValueError('Must pass DataFrame with boolean values only')
         return self.where(key)
 
-    def _get_index_resolvers(self, axis):
-        # index or columns
-        axis_index = getattr(self, axis)
-        d = dict()
-
-        for i, name in enumerate(axis_index.names):
-            if name is not None:
-                key = level = name
-            else:
-                # prefix with 'i' or 'c' depending on the input axis
-                # e.g., you must do ilevel_0 for the 0th level of an unnamed
-                # multiiindex
-                level_string = '{prefix}level_{i}'.format(prefix=axis[0], i=i)
-                key = level_string
-                level = i
-
-            d[key] = Series(axis_index.get_level_values(level).values,
-                            index=axis_index, name=level)
-
-        # put the index/columns itself in the dict
-        d[axis] = axis_index
-        return d
-
     def query(self, expr, **kwargs):
         """Query the columns of a frame with a boolean expression.
 
@@ -2037,8 +2014,7 @@ def eval(self, expr, **kwargs):
         """
         resolvers = kwargs.pop('resolvers', None)
         if resolvers is None:
-            index_resolvers = self._get_index_resolvers('index')
-            index_resolvers.update(self._get_index_resolvers('columns'))
+            index_resolvers = self._get_resolvers()
             resolvers = [self, index_resolvers]
         kwargs['local_dict'] = _ensure_scope(resolvers=resolvers, **kwargs)
         return _eval(expr, **kwargs)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 4553e4804e98b..705679136c3d2 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -272,6 +272,42 @@ def _get_block_manager_axis(self, axis):
             return m - axis
         return axis
 
+    def _get_axis_resolvers(self, axis):
+        # index or columns
+        axis_index = getattr(self, axis)
+        d = dict()
+        prefix = axis[0]
+
+        for i, name in enumerate(axis_index.names):
+            if name is not None:
+                key = level = name
+            else:
+                # prefix with 'i' or 'c' depending on the input axis
+                # e.g., you must do ilevel_0 for the 0th level of an unnamed
+                # multiiindex
+                key = '{prefix}level_{i}'.format(prefix=prefix, i=i)
+                level = i
+
+            level_values = axis_index.get_level_values(level)
+            s = level_values.to_series()
+            s.index = axis_index
+            d[key] = s
+
+        # put the index/columns itself in the dict
+        if isinstance(axis_index, MultiIndex):
+            dindex = axis_index
+        else:
+            dindex = axis_index.to_series()
+
+        d[axis] = dindex
+        return d
+
+    def _get_resolvers(self):
+        d = {}
+        for axis_name in self._AXIS_ORDERS:
+            d.update(self._get_axis_resolvers(axis_name))
+        return d
+
     @property
     def _info_axis(self):
         return getattr(self, self._info_axis_name)
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index a6f806d5ce097..e5d2bb17ec7a8 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -11423,6 +11423,57 @@ def test_query_with_partially_named_multiindex(self):
         for parser, engine in product(['pandas'], ENGINES):
             yield self.check_query_with_partially_named_multiindex, parser, engine
 
+    def test_query_multiindex_get_index_resolvers(self):
+        for parser, engine in product(['pandas'], ENGINES):
+            yield self.check_query_multiindex_get_index_resolvers, parser, engine
+
+    def check_query_multiindex_get_index_resolvers(self, parser, engine):
+        df = mkdf(10, 3, r_idx_nlevels=2, r_idx_names=['spam', 'eggs'])
+        resolvers = df._get_resolvers()
+
+        def to_series(mi, level):
+            level_values = mi.get_level_values(level)
+            s = level_values.to_series()
+            s.index = mi
+            return s
+
+        col_series = df.columns.to_series()
+        expected = {'index': df.index,
+                    'columns': col_series,
+                    'spam': to_series(df.index, 'spam'),
+                    'eggs': to_series(df.index, 'eggs'),
+                    'C0': col_series}
+        for k, v in resolvers.items():
+            if isinstance(v, Index):
+                assert v.is_(expected[k])
+            elif isinstance(v, Series):
+                print(k)
+                tm.assert_series_equal(v, expected[k])
+            else:
+                raise AssertionError("object must be a Series or Index")
+
+    def test_raise_on_panel_with_multiindex(self):
+        for parser, engine in product(PARSERS, ENGINES):
+            yield self.check_raise_on_panel_with_multiindex, parser, engine
+
+    def check_raise_on_panel_with_multiindex(self, parser, engine):
+        skip_if_no_ne()
+        p = tm.makePanel(7)
+        p.items = tm.makeCustomIndex(len(p.items), nlevels=2)
+        with tm.assertRaises(NotImplementedError):
+            pd.eval('p + 1', parser=parser, engine=engine)
+
+    def test_raise_on_panel4d_with_multiindex(self):
+        for parser, engine in product(PARSERS, ENGINES):
+            yield self.check_raise_on_panel4d_with_multiindex, parser, engine
+
+    def check_raise_on_panel4d_with_multiindex(self, parser, engine):
+        skip_if_no_ne()
+        p4d = tm.makePanel4D(7)
+        p4d.items = tm.makeCustomIndex(len(p4d.items), nlevels=2)
+        with tm.assertRaises(NotImplementedError):
+            pd.eval('p4d + 1', parser=parser, engine=engine)
+
 
 class TestDataFrameQueryNumExprPandas(unittest.TestCase):
     @classmethod
@@ -11446,6 +11497,71 @@ def test_date_query_method(self):
         expec = df[(df.dates1 < '20130101') & ('20130101' < df.dates3)]
         assert_frame_equal(res, expec)
 
+    def test_date_query_with_NaT(self):
+        engine, parser = self.engine, self.parser
+        n = 10
+        df = DataFrame(randn(n, 3))
+        df['dates1'] = date_range('1/1/2012', periods=n)
+        df['dates2'] = date_range('1/1/2013', periods=n)
+        df['dates3'] = date_range('1/1/2014', periods=n)
+        df.loc[np.random.rand(n) > 0.5, 'dates1'] = pd.NaT
+        df.loc[np.random.rand(n) > 0.5, 'dates3'] = pd.NaT
+        res = df.query('dates1 < 20130101 < dates3', engine=engine,
+                       parser=parser)
+        expec = df[(df.dates1 < '20130101') & ('20130101' < df.dates3)]
+        assert_frame_equal(res, expec)
+
+    def test_date_index_query(self):
+        engine, parser = self.engine, self.parser
+        n = 10
+        df = DataFrame(randn(n, 3))
+        df['dates1'] = date_range('1/1/2012', periods=n)
+        df['dates3'] = date_range('1/1/2014', periods=n)
+        df.set_index('dates1', inplace=True, drop=True)
+        res = df.query('index < 20130101 < dates3', engine=engine,
+                       parser=parser)
+        expec = df[(df.index < '20130101') & ('20130101' < df.dates3)]
+        assert_frame_equal(res, expec)
+
+    def test_date_index_query_with_NaT(self):
+        engine, parser = self.engine, self.parser
+        n = 10
+        df = DataFrame(randn(n, 3))
+        df['dates1'] = date_range('1/1/2012', periods=n)
+        df['dates3'] = date_range('1/1/2014', periods=n)
+        df.iloc[0, 0] = pd.NaT
+        df.set_index('dates1', inplace=True, drop=True)
+        res = df.query('index < 20130101 < dates3', engine=engine,
+                       parser=parser)
+        expec = df[(df.index < '20130101') & ('20130101' < df.dates3)]
+        assert_frame_equal(res, expec)
+
+    def test_date_index_query_with_NaT_duplicates(self):
+        engine, parser = self.engine, self.parser
+        n = 10
+        d = {}
+        d['dates1'] = date_range('1/1/2012', periods=n)
+        d['dates3'] = date_range('1/1/2014', periods=n)
+        df = DataFrame(d)
+        df.loc[np.random.rand(n) > 0.5, 'dates1'] = pd.NaT
+        df.set_index('dates1', inplace=True, drop=True)
+        res = df.query('index < 20130101 < dates3', engine=engine, parser=parser)
+        expec = df[(df.index.to_series() < '20130101') & ('20130101' < df.dates3)]
+        assert_frame_equal(res, expec)
+
+    def test_date_query_with_non_date(self):
+        engine, parser = self.engine, self.parser
+
+        n = 10
+        df = DataFrame({'dates': date_range('1/1/2012', periods=n),
+             'nondate': np.arange(n)})
+
+        ops = '==', '!=', '<', '>', '<=', '>='
+
+        for op in ops:
+            with tm.assertRaises(TypeError):
+                df.query('dates %s nondate' % op, parser=parser, engine=engine)
+
     def test_query_scope(self):
         engine, parser = self.engine, self.parser
         from pandas.computation.common import NameResolutionError
@@ -11608,6 +11724,57 @@ def test_date_query_method(self):
         expec = df[(df.dates1 < '20130101') & ('20130101' < df.dates3)]
         assert_frame_equal(res, expec)
 
+    def test_date_query_with_NaT(self):
+        engine, parser = self.engine, self.parser
+        n = 10
+        df = DataFrame(randn(n, 3))
+        df['dates1'] = date_range('1/1/2012', periods=n)
+        df['dates2'] = date_range('1/1/2013', periods=n)
+        df['dates3'] = date_range('1/1/2014', periods=n)
+        df.loc[np.random.rand(n) > 0.5, 'dates1'] = pd.NaT
+        df.loc[np.random.rand(n) > 0.5, 'dates3'] = pd.NaT
+        res = df.query('(dates1 < 20130101) & (20130101 < dates3)',
+                       engine=engine, parser=parser)
+        expec = df[(df.dates1 < '20130101') & ('20130101' < df.dates3)]
+        assert_frame_equal(res, expec)
+
+    def test_date_index_query(self):
+        engine, parser = self.engine, self.parser
+        n = 10
+        df = DataFrame(randn(n, 3))
+        df['dates1'] = date_range('1/1/2012', periods=n)
+        df['dates3'] = date_range('1/1/2014', periods=n)
+        df.set_index('dates1', inplace=True, drop=True)
+        res = df.query('(index < 20130101) & (20130101 < dates3)',
+                       engine=engine, parser=parser)
+        expec = df[(df.index < '20130101') & ('20130101' < df.dates3)]
+        assert_frame_equal(res, expec)
+
+    def test_date_index_query_with_NaT(self):
+        engine, parser = self.engine, self.parser
+        n = 10
+        df = DataFrame(randn(n, 3))
+        df['dates1'] = date_range('1/1/2012', periods=n)
+        df['dates3'] = date_range('1/1/2014', periods=n)
+        df.iloc[0, 0] = pd.NaT
+        df.set_index('dates1', inplace=True, drop=True)
+        res = df.query('(index < 20130101) & (20130101 < dates3)',
+                       engine=engine, parser=parser)
+        expec = df[(df.index < '20130101') & ('20130101' < df.dates3)]
+        assert_frame_equal(res, expec)
+
+    def test_date_index_query_with_NaT_duplicates(self):
+        engine, parser = self.engine, self.parser
+        n = 10
+        df = DataFrame(randn(n, 3))
+        df['dates1'] = date_range('1/1/2012', periods=n)
+        df['dates3'] = date_range('1/1/2014', periods=n)
+        df.loc[np.random.rand(n) > 0.5, 'dates1'] = pd.NaT
+        df.set_index('dates1', inplace=True, drop=True)
+        with tm.assertRaises(NotImplementedError):
+            res = df.query('index < 20130101 < dates3', engine=engine,
+                           parser=parser)
+
     def test_nested_scope(self):
         engine = self.engine
         parser = self.parser
diff --git a/vb_suite/binary_ops.py b/vb_suite/binary_ops.py
index 8293f650425e3..fc84dd8bcdb81 100644
--- a/vb_suite/binary_ops.py
+++ b/vb_suite/binary_ops.py
@@ -106,7 +106,7 @@
 setup = common_setup + """
 N = 1000000
 halfway = N // 2 - 1
-s  = Series(date_range('20010101', periods=N, freq='D'))
+s = Series(date_range('20010101', periods=N, freq='T'))
 ts = s[halfway]
 """
 
diff --git a/vb_suite/eval.py b/vb_suite/eval.py
index c666cd431cbb4..506d00b8bf9f9 100644
--- a/vb_suite/eval.py
+++ b/vb_suite/eval.py
@@ -47,12 +47,12 @@
 eval_frame_mult_all_threads = \
     Benchmark("pd.eval('df * df2 * df3 * df4')", common_setup,
               name='eval_frame_mult_all_threads',
-              start_date=datetime(2012, 7, 21))
+              start_date=datetime(2013, 7, 21))
 
 eval_frame_mult_one_thread = \
     Benchmark("pd.eval('df * df2 * df3 * df4')", setup,
               name='eval_frame_mult_one_thread',
-              start_date=datetime(2012, 7, 26))
+              start_date=datetime(2013, 7, 26))
 
 eval_frame_mult_python = \
     Benchmark("pdl.eval('df * df2 * df3 * df4', engine='python')",
@@ -62,7 +62,7 @@
 eval_frame_mult_python_one_thread = \
     Benchmark("pd.eval('df * df2 * df3 * df4', engine='python')", setup,
               name='eval_frame_mult_python_one_thread',
-              start_date=datetime(2012, 7, 26))
+              start_date=datetime(2013, 7, 26))
 
 #----------------------------------------------------------------------
 # multi and
@@ -71,12 +71,12 @@
     Benchmark("pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)')",
               common_setup,
               name='eval_frame_and_all_threads',
-              start_date=datetime(2012, 7, 21))
+              start_date=datetime(2013, 7, 21))
 
 eval_frame_and_one_thread = \
     Benchmark("pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)')", setup,
               name='eval_frame_and_one_thread',
-              start_date=datetime(2012, 7, 26))
+              start_date=datetime(2013, 7, 26))
 
 setup = common_setup
 eval_frame_and_python = \
@@ -88,19 +88,19 @@
     Benchmark("pd.eval('(df > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)', engine='python')",
               setup,
               name='eval_frame_and_python_one_thread',
-              start_date=datetime(2012, 7, 26))
+              start_date=datetime(2013, 7, 26))
 
 #--------------------------------------------------------------------
 # chained comp
 eval_frame_chained_cmp_all_threads = \
     Benchmark("pd.eval('df < df2 < df3 < df4')", common_setup,
               name='eval_frame_chained_cmp_all_threads',
-              start_date=datetime(2012, 7, 21))
+              start_date=datetime(2013, 7, 21))
 
 eval_frame_chained_cmp_one_thread = \
     Benchmark("pd.eval('df < df2 < df3 < df4')", setup,
               name='eval_frame_chained_cmp_one_thread',
-              start_date=datetime(2012, 7, 26))
+              start_date=datetime(2013, 7, 26))
 
 setup = common_setup
 eval_frame_chained_cmp_python = \
@@ -111,4 +111,31 @@
 eval_frame_chained_cmp_one_thread = \
     Benchmark("pd.eval('df < df2 < df3 < df4', engine='python')", setup,
               name='eval_frame_chained_cmp_python_one_thread',
-              start_date=datetime(2012, 7, 26))
+              start_date=datetime(2013, 7, 26))
+
+
+common_setup = """from pandas_vb_common import *
+"""
+
+setup = common_setup + """
+N = 1000000
+halfway = N // 2 - 1
+index = date_range('20010101', periods=N, freq='T')
+s = Series(index)
+ts = s.iloc[halfway]
+"""
+
+series_setup = setup + """
+df = DataFrame({'dates': s.values})
+"""
+
+query_datetime_series = Benchmark("df.query('dates < ts')",
+                                  series_setup,
+                                  start_date=datetime(2013, 9, 27))
+
+index_setup = setup + """
+df = DataFrame({'a': np.random.randn(N)}, index=index)
+"""
+
+query_datetime_index = Benchmark("df.query('index < ts')",
+                                 index_setup, start_date=datetime(2013, 9, 27))