Skip to content

Commit 950cbff

Browse files
mroeschkeischurov
authored andcommitted
TST: Parse dates with empty space (pandas-dev#6428) (pandas-dev#14862)
+ Add doc explaining parse_date limitation
1 parent 4ce03c6 commit 950cbff

File tree

4 files changed

+35
-0
lines changed

4 files changed

+35
-0
lines changed

doc/source/io.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -867,6 +867,12 @@ data columns:
867867
index_col=0) #index is the nominal column
868868
df
869869
870+
.. note::
871+
If a column or index contains an unparseable date, the entire column or
872+
index will be returned unaltered as an object data type. For non-standard
873+
datetime parsing, use :func:`to_datetime` after ``pd.read_csv``.
874+
875+
870876
.. note::
871877
read_csv has a fast_path for parsing datetime strings in iso8601 format,
872878
e.g "2000-01-01T00:01:02+00:00" and similar variations. If you can arrange

pandas/io/parsers.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,10 @@
167167
* dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call result
168168
'foo'
169169
170+
If a column or index contains an unparseable date, the entire column or
171+
index will be returned unaltered as an object data type. For non-standard
172+
datetime parsing, use ``pd.to_datetime`` after ``pd.read_csv``
173+
170174
Note: A fast-path exists for iso8601-formatted dates.
171175
infer_datetime_format : boolean, default False
172176
If True and parse_dates is enabled, pandas will attempt to infer the format

pandas/io/tests/test_date_converters.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,19 @@ def date_parser(date, time):
138138
names=['datetime', 'prn']))
139139
assert_frame_equal(df, df_correct)
140140

141+
def test_parse_date_column_with_empty_string(self):
142+
# GH 6428
143+
data = """case,opdate
144+
7,10/18/2006
145+
7,10/18/2008
146+
621, """
147+
result = read_csv(StringIO(data), parse_dates=['opdate'])
148+
expected_data = [[7, '10/18/2006'],
149+
[7, '10/18/2008'],
150+
[621, ' ']]
151+
expected = DataFrame(expected_data, columns=['case', 'opdate'])
152+
assert_frame_equal(result, expected)
153+
141154
if __name__ == '__main__':
142155
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
143156
exit=False)

pandas/tseries/tests/test_timeseries.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -946,6 +946,18 @@ def test_to_datetime_on_datetime64_series(self):
946946
result = to_datetime(s)
947947
self.assertEqual(result[0], s[0])
948948

949+
def test_to_datetime_with_space_in_series(self):
950+
# GH 6428
951+
s = Series(['10/18/2006', '10/18/2008', ' '])
952+
tm.assertRaises(ValueError, lambda: to_datetime(s, errors='raise'))
953+
result_coerce = to_datetime(s, errors='coerce')
954+
expected_coerce = Series([datetime(2006, 10, 18),
955+
datetime(2008, 10, 18),
956+
pd.NaT])
957+
tm.assert_series_equal(result_coerce, expected_coerce)
958+
result_ignore = to_datetime(s, errors='ignore')
959+
tm.assert_series_equal(result_ignore, s)
960+
949961
def test_to_datetime_with_apply(self):
950962
# this is only locale tested with US/None locales
951963
_skip_if_has_locale()

0 commit comments

Comments
 (0)