TST: Parse dates with empty space (pandas-dev#6428) (pandas-dev#14862)

mroeschke · ischurov · commit 950cbff22721 · 2016-12-19T04:04:58.000+03:00
+ Add doc explaining parse_date limitation
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -867,6 +867,12 @@ data columns:
                     index_col=0) #index is the nominal column
    df
 
+.. note::
+   If a column or index contains an unparseable date, the entire column or
+   index will be returned unaltered as an object data type. For non-standard
+   datetime parsing, use :func:`to_datetime` after ``pd.read_csv``.
+
+
 .. note::
    read_csv has a fast_path for parsing datetime strings in iso8601 format,
    e.g "2000-01-01T00:01:02+00:00" and similar variations. If you can arrange
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -167,6 +167,10 @@
     * dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call result
       'foo'
 
+    If a column or index contains an unparseable date, the entire column or
+    index will be returned unaltered as an object data type. For non-standard
+    datetime parsing, use ``pd.to_datetime`` after ``pd.read_csv``
+
     Note: A fast-path exists for iso8601-formatted dates.
 infer_datetime_format : boolean, default False
     If True and parse_dates is enabled, pandas will attempt to infer the format
diff --git a/pandas/io/tests/test_date_converters.py b/pandas/io/tests/test_date_converters.py
@@ -138,6 +138,19 @@ def date_parser(date, time):
                                names=['datetime', 'prn']))
         assert_frame_equal(df, df_correct)
 
+    def test_parse_date_column_with_empty_string(self):
+        # GH 6428
+        data = """case,opdate
+                  7,10/18/2006
+                  7,10/18/2008
+                  621, """
+        result = read_csv(StringIO(data), parse_dates=['opdate'])
+        expected_data = [[7, '10/18/2006'],
+                         [7, '10/18/2008'],
+                         [621, ' ']]
+        expected = DataFrame(expected_data, columns=['case', 'opdate'])
+        assert_frame_equal(result, expected)
+
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
                    exit=False)
diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py
@@ -946,6 +946,18 @@ def test_to_datetime_on_datetime64_series(self):
         result = to_datetime(s)
         self.assertEqual(result[0], s[0])
 
+    def test_to_datetime_with_space_in_series(self):
+        # GH 6428
+        s = Series(['10/18/2006', '10/18/2008', ' '])
+        tm.assertRaises(ValueError, lambda: to_datetime(s, errors='raise'))
+        result_coerce = to_datetime(s, errors='coerce')
+        expected_coerce = Series([datetime(2006, 10, 18),
+                                  datetime(2008, 10, 18),
+                                  pd.NaT])
+        tm.assert_series_equal(result_coerce, expected_coerce)
+        result_ignore = to_datetime(s, errors='ignore')
+        tm.assert_series_equal(result_ignore, s)
+
     def test_to_datetime_with_apply(self):
         # this is only locale tested with US/None locales
         _skip_if_has_locale()