Skip to content

Commit 510dd67

Browse files
mroeschkejorisvandenbossche
authored andcommitted
TST: Parse dates with empty space (#6428) (#14862)
+ Add doc explaining parse_date limitation
1 parent 86233e1 commit 510dd67

File tree

4 files changed

+35
-0
lines changed

4 files changed

+35
-0
lines changed

doc/source/io.rst

+6
Original file line numberDiff line numberDiff line change
@@ -867,6 +867,12 @@ data columns:
867867
index_col=0) #index is the nominal column
868868
df
869869
870+
.. note::
871+
If a column or index contains an unparseable date, the entire column or
872+
index will be returned unaltered as an object data type. For non-standard
873+
datetime parsing, use :func:`to_datetime` after ``pd.read_csv``.
874+
875+
870876
.. note::
871877
read_csv has a fast_path for parsing datetime strings in iso8601 format,
872878
e.g "2000-01-01T00:01:02+00:00" and similar variations. If you can arrange

pandas/io/parsers.py

+4
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,10 @@
167167
* dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call result
168168
'foo'
169169
170+
If a column or index contains an unparseable date, the entire column or
171+
index will be returned unaltered as an object data type. For non-standard
172+
datetime parsing, use ``pd.to_datetime`` after ``pd.read_csv``
173+
170174
Note: A fast-path exists for iso8601-formatted dates.
171175
infer_datetime_format : boolean, default False
172176
If True and parse_dates is enabled, pandas will attempt to infer the format

pandas/io/tests/test_date_converters.py

+13
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,19 @@ def date_parser(date, time):
138138
names=['datetime', 'prn']))
139139
assert_frame_equal(df, df_correct)
140140

141+
def test_parse_date_column_with_empty_string(self):
142+
# GH 6428
143+
data = """case,opdate
144+
7,10/18/2006
145+
7,10/18/2008
146+
621, """
147+
result = read_csv(StringIO(data), parse_dates=['opdate'])
148+
expected_data = [[7, '10/18/2006'],
149+
[7, '10/18/2008'],
150+
[621, ' ']]
151+
expected = DataFrame(expected_data, columns=['case', 'opdate'])
152+
assert_frame_equal(result, expected)
153+
141154
if __name__ == '__main__':
142155
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
143156
exit=False)

pandas/tseries/tests/test_timeseries.py

+12
Original file line numberDiff line numberDiff line change
@@ -947,6 +947,18 @@ def test_to_datetime_on_datetime64_series(self):
947947
result = to_datetime(s)
948948
self.assertEqual(result[0], s[0])
949949

950+
def test_to_datetime_with_space_in_series(self):
951+
# GH 6428
952+
s = Series(['10/18/2006', '10/18/2008', ' '])
953+
tm.assertRaises(ValueError, lambda: to_datetime(s, errors='raise'))
954+
result_coerce = to_datetime(s, errors='coerce')
955+
expected_coerce = Series([datetime(2006, 10, 18),
956+
datetime(2008, 10, 18),
957+
pd.NaT])
958+
tm.assert_series_equal(result_coerce, expected_coerce)
959+
result_ignore = to_datetime(s, errors='ignore')
960+
tm.assert_series_equal(result_ignore, s)
961+
950962
def test_to_datetime_with_apply(self):
951963
# this is only locale tested with US/None locales
952964
_skip_if_has_locale()

0 commit comments

Comments
 (0)