pandas-dev · jreback · Jan 16, 2014 · Jan 16, 2014 · Jan 16, 2014
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -83,6 +83,7 @@ Improvements to existing features
   - pd.show_versions() is now available for convenience when reporting issues.
   - perf improvements to Series.str.extract (:issue:`5944`)
   - perf improvments in ``dtypes/ftypes`` methods (:issue:`5968`)
+  - perf improvments in indexing with object dtypes (:issue:`5968`)
 
 .. _release.bug_fixes-0.13.1:
 
@@ -116,6 +117,7 @@ Bug Fixes
   - Fixed ``to_datetime`` for array with both Tz-aware datetimes and ``NaT``s  (:issue:`5961`)
   - Bug in rolling skew/kurtosis when passed a Series with bad data (:issue:`5749`)
   - Bug in scipy ``interpolate`` methods with a datetime index (:issue: `5975`)
+  - Bug in NaT comparison if a mixed datetime/np.datetime64 with NaT were passed (:issue:`5968`)
 
 pandas 0.13.0
 -------------

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -1889,7 +1889,10 @@ def make_block(values, items, ref_items, klass=None, ndim=None, dtype=None,
 
             if np.prod(values.shape):
                 flat = values.ravel()
-                inferred_type = lib.infer_dtype(flat)
+
+                # try with just the first element; we just need to see if
+                # this is a datetime or not
+                inferred_type = lib.infer_dtype(flat[0:1])
                 if inferred_type in ['datetime', 'datetime64']:
 
                     # we have an object array that has been inferred as

diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py
@@ -4,9 +4,12 @@
 import os
 import warnings
 import nose
+import sys
+from distutils.version import LooseVersion
 
 import numpy as np
 
+import pandas as pd
 from pandas.core.frame import DataFrame, Series
 from pandas.io.parsers import read_csv
 from pandas.io.stata import read_stata, StataReader
@@ -66,6 +69,9 @@ def test_read_dta1(self):
         tm.assert_frame_equal(parsed_13, expected)
 
     def test_read_dta2(self):
+        if LooseVersion(sys.version) < '2.7':
+            raise nose.SkipTest('datetime interp under 2.6 is faulty')
+
         expected = DataFrame.from_records(
             [
                 (
@@ -89,14 +95,14 @@ def test_read_dta2(self):
                     datetime(2, 1, 1)
                 ),
                 (
-                    np.datetime64('NaT'),
-                    np.datetime64('NaT'),
-                    np.datetime64('NaT'),
-                    np.datetime64('NaT'),
-                    np.datetime64('NaT'),
-                    np.datetime64('NaT'),
-                    np.datetime64('NaT'),
-                    np.datetime64('NaT')
+                    pd.NaT,
+                    pd.NaT,
+                    pd.NaT,
+                    pd.NaT,
+                    pd.NaT,
+                    pd.NaT,
+                    pd.NaT,
+                    pd.NaT,
                 )
             ],
             columns=['datetime_c', 'datetime_big_c', 'date', 'weekly_date',

diff --git a/pandas/src/util.pxd b/pandas/src/util.pxd
@@ -67,7 +67,7 @@ cdef inline is_array(object o):
 
 cdef inline bint _checknull(object val):
     try:
-        return val is None or (cpython.PyFloat_Check(val) and val != val) 
+        return val is None or (cpython.PyFloat_Check(val) and val != val)
     except ValueError:
         return False
 

diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py
@@ -829,6 +829,11 @@ def test_to_datetime_mixed(self):
         expected = Series([NaT,Timestamp('20130408'),Timestamp('20130409')])
         assert_series_equal(result,expected)
 
+        # mixed datetime/np.datetime64('NaT')
+        result = Series(to_datetime([dt.datetime(2000,1,1),np.datetime64('NaT')]))
+        expected = Series([dt.datetime(2000,1,1),NaT])
+        assert_series_equal(result, expected)
+
     def test_dayfirst(self):
 
         # GH 3341

diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx
@@ -8,6 +8,7 @@ import numpy as np
 from cpython cimport (
     PyTypeObject,
     PyFloat_Check,
+    PyLong_Check,
     PyObject_RichCompareBool,
     PyObject_RichCompare,
     PyString_Check,
@@ -55,6 +56,9 @@ cdef int64_t NPY_NAT = util.get_nat()
 # < numpy 1.7 compat for NaT
 compat_NaT = np.array([NPY_NAT]).astype('m8[ns]').item()
 
+# numpy actual nat object
+np_NaT = np.datetime64('NaT',dtype='M8')
+
 try:
     basestring
 except NameError: # py3
@@ -416,6 +420,11 @@ NaT = NaTType()
 iNaT = util.get_nat()
 
 
+cdef inline bint _checknull_with_nat(object val):
+    """ utility to check if a value is a nat or not """
+    return val is None or (
+        PyFloat_Check(val) and val != val) or val is NaT
+
 cdef inline bint _cmp_nat_dt(_NaT lhs, _Timestamp rhs, int op) except -1:
     return _nat_scalar_rules[op]
 
@@ -761,7 +770,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit):
 
     obj = _TSObject()
 
-    if ts is None or ts is NaT:
+    if ts is None or ts is NaT or ts is np_NaT:
         obj.value = NPY_NAT
     elif is_datetime64_object(ts):
         obj.value = _get_datetime64_nanos(ts)
@@ -933,7 +942,7 @@ def datetime_to_datetime64(ndarray[object] values):
     iresult = result.view('i8')
     for i in range(n):
         val = values[i]
-        if util._checknull(val) or val is NaT:
+        if _checknull_with_nat(val):
             iresult[i] = iNaT
         elif PyDateTime_Check(val):
             if val.tzinfo is not None:
@@ -999,7 +1008,7 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False,
         iresult = result.view('i8')
         for i in range(n):
             val = values[i]
-            if util._checknull(val) or val is NaT:
+            if _checknull_with_nat(val):
                 iresult[i] = iNaT
             elif PyDateTime_Check(val):
                 if val.tzinfo is not None:
@@ -1038,13 +1047,16 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False,
                         continue
                     raise
             elif util.is_datetime64_object(val):
-                try:
-                    iresult[i] = _get_datetime64_nanos(val)
-                except ValueError:
-                    if coerce:
-                        iresult[i] = iNaT
-                        continue
-                    raise
+                if val == np_NaT:
+                    iresult[i] = iNaT
+                else:
+                    try:
+                        iresult[i] = _get_datetime64_nanos(val)
+                    except ValueError:
+                        if coerce:
+                            iresult[i] = iNaT
+                            continue
+                        raise
 
             # if we are coercing, dont' allow integers
             elif util.is_integer_object(val) and not coerce:
@@ -1114,7 +1126,7 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False,
 
         for i in range(n):
             val = values[i]
-            if util._checknull(val):
+            if _checknull_with_nat(val):
                 oresult[i] = val
             elif util.is_string_object(val):
                 if len(val) == 0:
@@ -1166,7 +1178,7 @@ def array_to_timedelta64(ndarray[object] values, coerce=True):
 
              result[i] = val
 
-        elif util._checknull(val) or val == iNaT or val is NaT:
+        elif _checknull_with_nat(val):
              result[i] = iNaT
 
         else:
@@ -1316,7 +1328,7 @@ def array_strptime(ndarray[object] values, object fmt, coerce=False):
                 iresult[i] = iNaT
                 continue
         else:
-            if util._checknull(val) or val is NaT:
+            if _checknull_with_nat(val):
                 iresult[i] = iNaT
                 continue
             else:

diff --git a/vb_suite/indexing.py b/vb_suite/indexing.py
@@ -167,3 +167,10 @@
 
 frame_loc_dups = Benchmark('df2.loc[idx]', setup,
                             start_date=datetime(2013, 1, 1))
+
+setup = common_setup + """
+df = DataFrame(dict( A = [ 'foo'] * 1000000))
+"""
+
+frame_iloc_big = Benchmark('df.iloc[:100,0]', setup,
+                            start_date=datetime(2013, 1, 1))