pandas-dev · jreback · May 16, 2013 · May 16, 2013
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -122,6 +122,8 @@ pandas 0.11.1
   - Fix ``read_csv`` to correctly encode identical na_values, e.g. ``na_values=[-999.0,-999]``
     was failing (GH3611_)
   - Fix indexing issue in ndim >= 3 with ``iloc`` (GH3617_)
+  - Correctly parse date columns with embedded (nan/NaT) into datetime64[ns] dtype in ``read_csv``
+    when ``parse_dates`` is specified (GH3062_)
 
 .. _GH3164: https://github.com/pydata/pandas/issues/3164
 .. _GH2786: https://github.com/pydata/pandas/issues/2786
@@ -172,6 +174,7 @@ pandas 0.11.1
 .. _GH3617: https://github.com/pydata/pandas/issues/3617
 .. _GH3435: https://github.com/pydata/pandas/issues/3435
 .. _GH3611: https://github.com/pydata/pandas/issues/3611
+.. _GH3062: https://github.com/pydata/pandas/issues/3062
 .. _GH1512: https://github.com/pydata/pandas/issues/1512
 
 

diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
@@ -531,6 +531,28 @@ def test_custom_na_values(self):
                               skiprows=[1])
         assert_almost_equal(df3.values, expected)
 
+    def test_nat_parse(self):
+
+        # GH 3062
+        df = DataFrame(dict({
+                    'A' : np.asarray(range(10),dtype='float64'), 
+                    'B' : pd.Timestamp('20010101') }))
+        df.iloc[3:6,:] = np.nan
+
+        with ensure_clean('__nat_parse_.csv') as path:
+            df.to_csv(path)
+            result = read_csv(path,index_col=0,parse_dates=['B'])
+            tm.assert_frame_equal(result,df)
+
+            expected = Series(dict( A = 'float64',B = 'datetime64[ns]'))
+            tm.assert_series_equal(expected,result.dtypes)
+
+            # test with NaT for the nan_rep
+            # we don't have a method to specif the Datetime na_rep (it defaults to '')
+            df.to_csv(path)
+            result = read_csv(path,index_col=0,parse_dates=['B'])
+            tm.assert_frame_equal(result,df)
+
     def test_skiprows_bug(self):
         # GH #505
         text = """#foo,a,b,c

diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx
@@ -318,8 +318,10 @@ class Timestamp(_Timestamp):
                         ts.dts.us, ts.tzinfo)
 
 
+_nat_strings = set(['NaT','nat','NAT','nan','NaN','NAN'])
 class NaTType(_NaT):
     """(N)ot-(A)-(T)ime, the time equivalent of NaN"""
+
     def __new__(cls):
         cdef _NaT base
 
@@ -647,8 +649,11 @@ cdef convert_to_tsobject(object ts, object tz):
         obj.value = ts
         pandas_datetime_to_datetimestruct(ts, PANDAS_FR_ns, &obj.dts)
     elif util.is_string_object(ts):
-        _string_to_dts(ts, &obj.dts)
-        obj.value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &obj.dts)
+        if ts in _nat_strings:
+            obj.value = NPY_NAT
+        else:
+            _string_to_dts(ts, &obj.dts)
+            obj.value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &obj.dts)
     elif PyDateTime_Check(ts):
         if tz is not None:
             # sort of a temporary hack
@@ -862,6 +867,10 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False,
                        iresult[i] = iNaT
                        continue
 
+                    elif val in _nat_strings:
+                       iresult[i] = iNaT
+                       continue
+
                     _string_to_dts(val, &dts)
                     iresult[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns,
                                                                    &dts)