Skip to content

Commit 5a76b44

Browse files
vincentdavisjreback
authored andcommitted
BUG: inconsistent behavior with invalid dates in to_datetime, #10154
1 parent fbe8c0b commit 5a76b44

File tree

3 files changed

+41
-13
lines changed

3 files changed

+41
-13
lines changed

doc/source/whatsnew/v0.17.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ Bug Fixes
103103
~~~~~~~~~
104104

105105
- Bug in ``DataFrame.apply`` when function returns categorical series. (:issue:`9573`)
106-
106+
- Bug in ``to_datetime`` with invalid dates and formats supplied (:issue:`10154`)
107107

108108
- Bug in ``Index.drop_duplicates`` dropping name(s) (:issue:`10115`)
109109

pandas/tseries/tests/test_timeseries.py

+23-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from pandas import (Index, Series, TimeSeries, DataFrame,
1313
isnull, date_range, Timestamp, Period, DatetimeIndex,
14-
Int64Index, to_datetime, bdate_range, Float64Index, TimedeltaIndex)
14+
Int64Index, to_datetime, bdate_range, Float64Index, TimedeltaIndex, NaT)
1515

1616
import pandas.core.datetools as datetools
1717
import pandas.tseries.offsets as offsets
@@ -4461,6 +4461,28 @@ def test_second(self):
44614461
self.assertIsInstance(r2, Float64Index)
44624462
tm.assert_index_equal(r1, r2)
44634463

4464+
class TestDaysInMonth(tm.TestCase):
4465+
4466+
# tests for issue #10154
4467+
4468+
def test_day_not_in_month_coerce_true_NaT(self):
4469+
self.assertTrue(isnull(to_datetime('2015-02-29', coerce=True)))
4470+
self.assertTrue(isnull(to_datetime('2015-02-29', format="%Y-%m-%d", coerce=True)))
4471+
self.assertTrue(isnull(to_datetime('2015-02-32', format="%Y-%m-%d", coerce=True)))
4472+
self.assertTrue(isnull(to_datetime('2015-04-31', format="%Y-%m-%d", coerce=True)))
4473+
4474+
def test_day_not_in_month_coerce_false_raise(self):
4475+
self.assertRaises(ValueError, to_datetime, '2015-02-29', errors='raise', coerce=False)
4476+
self.assertRaises(ValueError, to_datetime, '2015-02-29', errors='raise', format="%Y-%m-%d", coerce=False)
4477+
self.assertRaises(ValueError, to_datetime, '2015-02-32', errors='raise', format="%Y-%m-%d", coerce=False)
4478+
self.assertRaises(ValueError, to_datetime, '2015-04-31', errors='raise', format="%Y-%m-%d", coerce=False)
4479+
4480+
def test_day_not_in_month_coerce_false_ignore(self):
4481+
self.assertEqual(to_datetime('2015-02-29', errors='ignore', coerce=False), '2015-02-29')
4482+
self.assertRaises(ValueError, to_datetime, '2015-02-29', errors='ignore', format="%Y-%m-%d", coerce=False)
4483+
self.assertRaises(ValueError, to_datetime, '2015-02-32', errors='ignore', format="%Y-%m-%d", coerce=False)
4484+
self.assertRaises(ValueError, to_datetime, '2015-04-31', errors='ignore', format="%Y-%m-%d", coerce=False)
4485+
44644486
if __name__ == '__main__':
44654487
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
44664488
exit=False)

pandas/tslib.pyx

+17-11
Original file line numberDiff line numberDiff line change
@@ -2760,17 +2760,23 @@ def array_strptime(ndarray[object] values, object fmt, bint exact=True, bint coe
27602760
# Cannot pre-calculate datetime_date() since can change in Julian
27612761
# calculation and thus could have different value for the day of the wk
27622762
# calculation.
2763-
if julian == -1:
2764-
# Need to add 1 to result since first day of the year is 1, not 0.
2765-
julian = datetime_date(year, month, day).toordinal() - \
2766-
datetime_date(year, 1, 1).toordinal() + 1
2767-
else: # Assume that if they bothered to include Julian day it will
2768-
# be accurate.
2769-
datetime_result = datetime_date.fromordinal(
2770-
(julian - 1) + datetime_date(year, 1, 1).toordinal())
2771-
year = datetime_result.year
2772-
month = datetime_result.month
2773-
day = datetime_result.day
2763+
try:
2764+
if julian == -1:
2765+
# Need to add 1 to result since first day of the year is 1, not 0.
2766+
julian = datetime_date(year, month, day).toordinal() - \
2767+
datetime_date(year, 1, 1).toordinal() + 1
2768+
else: # Assume that if they bothered to include Julian day it will
2769+
# be accurate.
2770+
datetime_result = datetime_date.fromordinal(
2771+
(julian - 1) + datetime_date(year, 1, 1).toordinal())
2772+
year = datetime_result.year
2773+
month = datetime_result.month
2774+
day = datetime_result.day
2775+
except ValueError:
2776+
if coerce:
2777+
iresult[i] = iNaT
2778+
continue
2779+
raise
27742780
if weekday == -1:
27752781
weekday = datetime_date(year, month, day).weekday()
27762782

0 commit comments

Comments
 (0)