Skip to content

Commit 8d737e3

Browse files
committed
Merge pull request #7931 from jreback/dt_format
BUG: Bug in to_datetime when format='%Y%m%d and coerce=True are specified (GH7930)
2 parents 5ba6254 + 9c34ed0 commit 8d737e3

File tree

3 files changed

+15
-4
lines changed

3 files changed

+15
-4
lines changed

doc/source/v0.15.0.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,8 @@ Bug Fixes
303303
- Bug in ``Series.astype("unicode")`` not calling ``unicode`` on the values correctly (:issue:`7758`)
304304
- Bug in ``DataFrame.as_matrix()`` with mixed ``datetime64[ns]`` and ``timedelta64[ns]`` dtypes (:issue:`7778`)
305305
- Bug in ``HDFStore.select_column()`` not preserving UTC timezone info when selecting a DatetimeIndex (:issue:`7777`)
306-
306+
- Bug in ``to_datetime`` when ``format='%Y%m%d'`` and ``coerce=True`` are specified, where previously an object array was returned (rather than
307+
a coerced time-series with ``NaT``), (:issue:`7930`)
307308
- Bug in ``DatetimeIndex`` and ``PeriodIndex`` in-place addition and subtraction cause different result from normal one (:issue:`6527`)
308309
- Bug in adding and subtracting ``PeriodIndex`` with ``PeriodIndex`` raise ``TypeError`` (:issue:`7741`)
309310
- Bug in ``combine_first`` with ``PeriodIndex`` data raises ``TypeError`` (:issue:`3367`)

pandas/tseries/tests/test_timeseries.py

+10
Original file line numberDiff line numberDiff line change
@@ -3934,6 +3934,16 @@ def test_to_datetime_format_YYYYMMDD(self):
39343934
result = to_datetime(s,format='%Y%m%d')
39353935
assert_series_equal(result, expected)
39363936

3937+
# coercion
3938+
# GH 7930
3939+
s = Series([20121231, 20141231, 99991231])
3940+
result = pd.to_datetime(s,format='%Y%m%d')
3941+
expected = np.array([ datetime(2012,12,31), datetime(2014,12,31), datetime(9999,12,31) ], dtype=object)
3942+
self.assert_numpy_array_equal(result, expected)
3943+
3944+
result = pd.to_datetime(s,format='%Y%m%d', coerce=True)
3945+
expected = Series(['20121231','20141231','NaT'],dtype='M8[ns]')
3946+
assert_series_equal(result, expected)
39373947

39383948
def test_to_datetime_format_microsecond(self):
39393949
val = '01-Apr-2011 00:00:01.978'

pandas/tseries/tools.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ def _convert_listlike(arg, box, format):
262262
# shortcut formatting here
263263
if format == '%Y%m%d':
264264
try:
265-
result = _attempt_YYYYMMDD(arg)
265+
result = _attempt_YYYYMMDD(arg, coerce=coerce)
266266
except:
267267
raise ValueError("cannot convert the input to '%Y%m%d' date format")
268268

@@ -313,14 +313,14 @@ def _convert_listlike(arg, box, format):
313313
class DateParseError(ValueError):
314314
pass
315315

316-
def _attempt_YYYYMMDD(arg):
316+
def _attempt_YYYYMMDD(arg, coerce):
317317
""" try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like,
318318
arg is a passed in as an object dtype, but could really be ints/strings with nan-like/or floats (e.g. with nan) """
319319

320320
def calc(carg):
321321
# calculate the actual result
322322
carg = carg.astype(object)
323-
return lib.try_parse_year_month_day(carg/10000,carg/100 % 100, carg % 100)
323+
return tslib.array_to_datetime(lib.try_parse_year_month_day(carg/10000,carg/100 % 100, carg % 100), coerce=coerce)
324324

325325
def calc_with_mask(carg,mask):
326326
result = np.empty(carg.shape, dtype='M8[ns]')

0 commit comments

Comments
 (0)