Skip to content

Fix and tests for issue #10154 inconsistent behavior with invalid dates #10520

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
19 changes: 18 additions & 1 deletion pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from pandas import (Index, Series, TimeSeries, DataFrame,
isnull, date_range, Timestamp, Period, DatetimeIndex,
Int64Index, to_datetime, bdate_range, Float64Index, TimedeltaIndex)
Int64Index, to_datetime, bdate_range, Float64Index, TimedeltaIndex, NaT)

import pandas.core.datetools as datetools
import pandas.tseries.offsets as offsets
Expand Down Expand Up @@ -4461,6 +4461,23 @@ def test_second(self):
self.assertIsInstance(r2, Float64Index)
tm.assert_index_equal(r1, r2)

class TestDaysInMonth(tm.TestCase):
def test_day_not_in_month_coerce_true_NaT(self):
self.assertTrue(isnull(to_datetime('2015-02-29', coerce=True)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add the issue number as a comment

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pls add a blank line in between functions

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added comments and blank lines. Should I open new issues for these test cases that do not pass?

self.assertTrue(isnull(to_datetime('2015-02-29', format="%Y-%m-%d", coerce=True)))
self.assertTrue(isnull(to_datetime('2015-04-31', format="%Y-%m-%d", coerce=True)))

self.assertTrue(isnull(to_datetime('2015-02-29', format="%Y-%m-%d", coerce=True)))
self.assertTrue(isnull(to_datetime('2015-02-32', format="%Y-%m-%d", coerce=True)))
self.assertTrue(isnull(to_datetime('2015-04-31', format="%Y-%m-%d", coerce=True)))
def test_day_not_in_month_coerce_false_raise(self):
self.assertRaises(ValueError, to_datetime, '2015-02-29', errors='raise', coerce=False)
self.assertRaises(ValueError, to_datetime, '2015-02-29', errors='raise', format="%Y-%m-%d", coerce=False)
self.assertRaises(ValueError, to_datetime, '2015-02-32', errors='raise', format="%Y-%m-%d", coerce=False)
self.assertRaises(ValueError, to_datetime, '2015-04-31', errors='raise', format="%Y-%m-%d", coerce=False)
def test_day_not_in_month_coerce_false_ignore(self):
self.assertEqual(to_datetime('2015-02-29', errors='ignore', coerce=False), '2015-02-29')
self.assertRaises(ValueError, to_datetime, '2015-02-29', errors='ignore', format="%Y-%m-%d", coerce=False)
self.assertRaises(ValueError, to_datetime, '2015-02-32', errors='ignore', format="%Y-%m-%d", coerce=False)
self.assertRaises(ValueError, to_datetime, '2015-04-31', errors='ignore', format="%Y-%m-%d", coerce=False)

if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)
9 changes: 7 additions & 2 deletions pandas/tseries/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pandas.tslib as tslib
import pandas.core.common as com
from pandas.compat import StringIO, callable
from pandas.tslib import NaT, iNaT
import pandas.compat as compat

try:
Expand Down Expand Up @@ -320,7 +321,7 @@ def _convert_listlike(arg, box, format):
except ValueError:
# Only raise this error if the user provided the
# datetime format, and not when it was inferred
if not infer_datetime_format:
if not infer_datetime_format and not coerce:
raise

if result is None and (format is None or infer_datetime_format):
Expand Down Expand Up @@ -349,7 +350,11 @@ def _convert_listlike(arg, box, format):
elif com.is_list_like(arg):
return _convert_listlike(arg, box, format)

return _convert_listlike(np.array([ arg ]), box, format)[0]
try:
return _convert_listlike(np.array([ arg ]), box, format)[0]
except ValueError as e:
if not coerce:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what case does this catch here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test would not pass
self.assertTrue(isnull(to_datetime('2015-02-29', format="%Y-%m-%d", coerce=True)))

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be caught in the cython code. .

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without the try/except this is the error.
It seems like _convert_listlike() would have to accept coerce as an argument to fix it?

Traceback (most recent call last):
  File "/Users/vmd/Github/pandas_to_datetime_fix/pandas/tseries/tests/test_timeseries.py", line 4467, in test_day_not_in_month_coerce_true_NaT
    self.assertTrue(isnull(to_datetime('2015-02-29', format="%Y-%m-%d", coerce=True)))
  File "/Users/vmd/Github/pandas_to_datetime_fix/pandas/tseries/tools.py", line 354, in to_datetime
    return _convert_listlike(np.array([ arg ]), box, format)[0]
  File "/Users/vmd/Github/pandas_to_datetime_fix/pandas/tseries/tools.py", line 341, in _convert_listlike
    raise e
  File "/Users/vmd/Github/pandas_to_datetime_fix/pandas/tseries/tools.py", line 332, in _convert_listlike
    if com.is_datetime64_dtype(result) and box:
  File "/Users/vmd/Github/pandas_to_datetime_fix/pandas/core/common.py", line 2516, in is_datetime64_dtype
    tipo = _get_dtype_type(arr_or_dtype)
  File "/Users/vmd/Github/pandas_to_datetime_fix/pandas/core/common.py", line 2479, in _get_dtype_type
    raise ValueError('%r is not a dtype' % arr_or_dtype)
ValueError: None is not a dtype

----------------------------------------------------------------------
Ran 3 tests in 0.019s

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

coerce is already passed to the cython functions

my point is that this should be cause and coerced at a lower level

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you are catching an incorrect exception (something else is wrong) - it's not an expected exception

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I reversed this, the tests will not pass now. I don't know how to fix it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well I'll have a look tomorrow

raise e

class DateParseError(ValueError):
pass
Expand Down
28 changes: 17 additions & 11 deletions pandas/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2760,17 +2760,23 @@ def array_strptime(ndarray[object] values, object fmt, bint exact=True, bint coe
# Cannot pre-calculate datetime_date() since can change in Julian
# calculation and thus could have different value for the day of the wk
# calculation.
if julian == -1:
# Need to add 1 to result since first day of the year is 1, not 0.
julian = datetime_date(year, month, day).toordinal() - \
datetime_date(year, 1, 1).toordinal() + 1
else: # Assume that if they bothered to include Julian day it will
# be accurate.
datetime_result = datetime_date.fromordinal(
(julian - 1) + datetime_date(year, 1, 1).toordinal())
year = datetime_result.year
month = datetime_result.month
day = datetime_result.day
try:
if julian == -1:
# Need to add 1 to result since first day of the year is 1, not 0.
julian = datetime_date(year, month, day).toordinal() - \
datetime_date(year, 1, 1).toordinal() + 1
else: # Assume that if they bothered to include Julian day it will
# be accurate.
datetime_result = datetime_date.fromordinal(
(julian - 1) + datetime_date(year, 1, 1).toordinal())
year = datetime_result.year
month = datetime_result.month
day = datetime_result.day
except ValueError:
if coerce:
iresult[i] = iNaT
continue
raise
if weekday == -1:
weekday = datetime_date(year, month, day).weekday()

Expand Down