diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 8429aebbd85b8..c01017f6fbd47 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -3,6 +3,7 @@ Parsing functions for datetime and datetime-like strings. """ import re import time +import warnings from libc.string cimport strchr @@ -154,14 +155,28 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst): # date_string can't be converted to date, above format return None, None + swapped_day_and_month = False if 1 <= month <= MAX_DAYS_IN_MONTH and 1 <= day <= MAX_DAYS_IN_MONTH \ and (month <= MAX_MONTH or day <= MAX_MONTH): if (month > MAX_MONTH or (day <= MAX_MONTH and dayfirst)) and can_swap: day, month = month, day + swapped_day_and_month = True if PY_VERSION_HEX >= 0x03060100: # In Python <= 3.6.0 there is no range checking for invalid dates # in C api, thus we call faster C version for 3.6.1 or newer + + if dayfirst and not swapped_day_and_month: + warnings.warn(f"Parsing '{date_string}' in MM/DD/YYYY format.") + elif not dayfirst and swapped_day_and_month: + warnings.warn(f"Parsing '{date_string}' in DD/MM/YYYY format.") + return datetime_new(year, month, day, 0, 0, 0, 0, None), reso + + if dayfirst and not swapped_day_and_month: + warnings.warn(f"Parsing '{date_string}' in MM/DD/YYYY format.") + elif not dayfirst and swapped_day_and_month: + warnings.warn(f"Parsing '{date_string}' in DD/MM/YYYY format.") + return datetime(year, month, day, 0, 0, 0, 0, None), reso raise DateParseError(f"Invalid date specified ({month}/{day})") diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index d2049892705ea..471e83b11b035 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1752,6 +1752,60 @@ def test_dayfirst(self, cache): tm.assert_index_equal(expected, idx5) tm.assert_index_equal(expected, idx6) + def test_dayfirst_warnings(self): + # GH 12585 + + # CASE 1: valid input + arr = ["31/12/2014", "10/03/2011"] + expected = DatetimeIndex( + ["2014-12-31", "2011-03-10"], dtype="datetime64[ns]", freq=None + ) + + # A. dayfirst arg correct, no warning + res1 = to_datetime(arr, dayfirst=True) + tm.assert_index_equal(expected, res1) + + # B. dayfirst arg incorrect, warning + incorrect output + res2 = to_datetime(arr, dayfirst=False) + with pytest.raises(AssertionError): + tm.assert_index_equal(expected, res2) + + # C. dayfirst default arg, same as B + res3 = to_datetime(arr, dayfirst=False) + with pytest.raises(AssertionError): + tm.assert_index_equal(expected, res3) + + # D. infer_datetime_format=True overrides dayfirst default + # no warning + correct result + res4 = to_datetime(arr, infer_datetime_format=True) + tm.assert_index_equal(expected, res4) + + # CASE 2: invalid input + # cannot consistently process with single format + # warnings *always* raised + + arr = ["31/12/2014", "03/30/2011"] + # first in DD/MM/YYYY, second in MM/DD/YYYY + expected = DatetimeIndex( + ["2014-12-31", "2011-03-30"], dtype="datetime64[ns]", freq=None + ) + + # A. use dayfirst=True + res5 = to_datetime(arr, dayfirst=True) + tm.assert_index_equal(expected, res5) + + # B. use dayfirst=False + res6 = to_datetime(arr, dayfirst=False) + tm.assert_index_equal(expected, res6) + + # C. use dayfirst default arg, same as B + res7 = to_datetime(arr, dayfirst=False) + tm.assert_index_equal(expected, res7) + + # D. use infer_datetime_format=True + res8 = to_datetime(arr, infer_datetime_format=True) + tm.assert_index_equal(expected, res8) + @pytest.mark.parametrize("klass", [DatetimeIndex, DatetimeArray]) def test_to_datetime_dta_tz(self, klass): # GH#27733