From 007a7619b1125c5a65eb76fce100f4020de7ea73 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 27 Jul 2020 14:08:15 +0000 Subject: [PATCH 1/5] added warnings when parse inconsistent with dayfirst arg --- pandas/_libs/tslibs/parsing.pyx | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index c4f369d0d3b3f..ac467fa3f36e5 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -3,6 +3,7 @@ Parsing functions for datetime and datetime-like strings. """ import re import time +import warnings from libc.string cimport strchr @@ -149,14 +150,28 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst): # date_string can't be converted to date, above format return None, None + swapped_day_and_month = False if 1 <= month <= MAX_DAYS_IN_MONTH and 1 <= day <= MAX_DAYS_IN_MONTH \ and (month <= MAX_MONTH or day <= MAX_MONTH): if (month > MAX_MONTH or (day <= MAX_MONTH and dayfirst)) and can_swap: day, month = month, day + swapped_day_and_month = True if PY_VERSION_HEX >= 0x03060100: # In Python <= 3.6.0 there is no range checking for invalid dates # in C api, thus we call faster C version for 3.6.1 or newer + + if dayfirst and not swapped_day_and_month: + warnings.warn(f"Parsing {date_string} MM/DD format.") + elif not dayfirst and swapped_day_and_month: + warnings.warn(f"Parsing {date_string} DD/MM format.") + return datetime_new(year, month, day, 0, 0, 0, 0, None), reso + + if dayfirst and not swapped_day_and_month: + warnings.warn(f"Parsing {date_string} MM/DD format.") + elif not dayfirst and swapped_day_and_month: + warnings.warn(f"Parsing {date_string} DD/MM format.") + return datetime(year, month, day, 0, 0, 0, 0, None), reso raise DateParseError(f"Invalid date specified ({month}/{day})") From 56175ea0d4f11f829e83ea4b216b8e931a162779 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 27 Jul 2020 17:06:28 +0000 Subject: [PATCH 2/5] improved error message --- pandas/_libs/tslibs/parsing.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index ac467fa3f36e5..c2410fcc71d7a 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -161,16 +161,16 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst): # in C api, thus we call faster C version for 3.6.1 or newer if dayfirst and not swapped_day_and_month: - warnings.warn(f"Parsing {date_string} MM/DD format.") + warnings.warn(f"Parsing '{date_string}' in MM/DD/YYYY format.") elif not dayfirst and swapped_day_and_month: - warnings.warn(f"Parsing {date_string} DD/MM format.") + warnings.warn(f"Parsing '{date_string}' in DD/MM/YYYY format.") return datetime_new(year, month, day, 0, 0, 0, 0, None), reso if dayfirst and not swapped_day_and_month: - warnings.warn(f"Parsing {date_string} MM/DD format.") + warnings.warn(f"Parsing '{date_string}' in MM/DD/YYYY format.") elif not dayfirst and swapped_day_and_month: - warnings.warn(f"Parsing {date_string} DD/MM format.") + warnings.warn(f"Parsing '{date_string}' in DD/MM/YYYY format.") return datetime(year, month, day, 0, 0, 0, 0, None), reso From 7ce97224b9a99c5d87a02a4c3f8b4d2e6d52fc33 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 27 Jul 2020 17:08:40 +0000 Subject: [PATCH 3/5] TST: added tests --- pandas/tests/tools/test_to_datetime.py | 54 ++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index d2049892705ea..70839035fe1c7 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1752,6 +1752,60 @@ def test_dayfirst(self, cache): tm.assert_index_equal(expected, idx5) tm.assert_index_equal(expected, idx6) + def test_dayfirst_warnings(self): + # GH 12585 + + # CASE 1: valid input + arr = ["31/12/2014", "10/03/2011"] + expected = DatetimeIndex( + ["2014-12-31", "2011-03-10"], dtype="datetime64[ns]", freq=None + ) + + # A. dayfirst arg correct, no warning + res1 = to_datetime(arr, dayfirst=True) + tm.assert_index_equal(expected, res1) + + # B. dayfirst arg incorrect, warning + incorrect output + msg = r"Parsing '31/12/2014' in DD/MM/YYYY format." + with pytest.warns(UserWarning, match=msg): + res2 = to_datetime(arr, dayfirst=False) + with pytest.raises(AssertionError): + tm.assert_index_equal(expected, res2) + + # C. dayfirst default arg, same as B + msg = r"Parsing '31/12/2014' in DD/MM/YYYY format." + with pytest.warns(UserWarning, match=msg): + res3 = to_datetime(arr, dayfirst=False) + with pytest.raises(AssertionError): + tm.assert_index_equal(expected, res3) + + # D. infer_datetime_format=True overrides dayfirst default + # no warning + correct result + res4 = to_datetime(arr, infer_datetime_format=True) + tm.assert_index_equal(expected, res4) + + # CASE 2: invalid input + # cannot consistently process with single format + # warnings *always* raised + + arr = ["31/12/2014", "03/30/2011"] + + msg = r"Parsing '03/30/2011' in MM/DD/YYYY format." + with pytest.warns(UserWarning, match=msg): + to_datetime(arr, dayfirst=True) + + msg = r"Parsing '31/12/2014' in DD/MM/YYYY format." + with pytest.warns(UserWarning, match=msg): + to_datetime(arr, dayfirst=False) + + msg = r"Parsing '31/12/2014' in DD/MM/YYYY format." + with pytest.warns(UserWarning, match=msg): + to_datetime(arr) + + msg = r"Parsing '31/12/2014' in DD/MM/YYYY format." + with pytest.warns(UserWarning, match=msg): + to_datetime(arr, infer_datetime_format=True) + @pytest.mark.parametrize("klass", [DatetimeIndex, DatetimeArray]) def test_to_datetime_dta_tz(self, klass): # GH#27733 From 6424c41cc042ae6fb66145e02815196f826caf5e Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 27 Jul 2020 18:04:49 +0000 Subject: [PATCH 4/5] removed trailing whitespaces --- pandas/_libs/tslibs/parsing.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index c2410fcc71d7a..410837a4ed822 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -159,19 +159,19 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst): if PY_VERSION_HEX >= 0x03060100: # In Python <= 3.6.0 there is no range checking for invalid dates # in C api, thus we call faster C version for 3.6.1 or newer - + if dayfirst and not swapped_day_and_month: warnings.warn(f"Parsing '{date_string}' in MM/DD/YYYY format.") elif not dayfirst and swapped_day_and_month: warnings.warn(f"Parsing '{date_string}' in DD/MM/YYYY format.") - + return datetime_new(year, month, day, 0, 0, 0, 0, None), reso - + if dayfirst and not swapped_day_and_month: warnings.warn(f"Parsing '{date_string}' in MM/DD/YYYY format.") elif not dayfirst and swapped_day_and_month: warnings.warn(f"Parsing '{date_string}' in DD/MM/YYYY format.") - + return datetime(year, month, day, 0, 0, 0, 0, None), reso raise DateParseError(f"Invalid date specified ({month}/{day})") From cfad81746954675b7ef934adc9cb31146298c454 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 27 Jul 2020 18:05:35 +0000 Subject: [PATCH 5/5] removed pytest.warns --- pandas/tests/tools/test_to_datetime.py | 36 +++++++++++++------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 70839035fe1c7..471e83b11b035 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1766,16 +1766,12 @@ def test_dayfirst_warnings(self): tm.assert_index_equal(expected, res1) # B. dayfirst arg incorrect, warning + incorrect output - msg = r"Parsing '31/12/2014' in DD/MM/YYYY format." - with pytest.warns(UserWarning, match=msg): - res2 = to_datetime(arr, dayfirst=False) + res2 = to_datetime(arr, dayfirst=False) with pytest.raises(AssertionError): tm.assert_index_equal(expected, res2) # C. dayfirst default arg, same as B - msg = r"Parsing '31/12/2014' in DD/MM/YYYY format." - with pytest.warns(UserWarning, match=msg): - res3 = to_datetime(arr, dayfirst=False) + res3 = to_datetime(arr, dayfirst=False) with pytest.raises(AssertionError): tm.assert_index_equal(expected, res3) @@ -1789,22 +1785,26 @@ def test_dayfirst_warnings(self): # warnings *always* raised arr = ["31/12/2014", "03/30/2011"] + # first in DD/MM/YYYY, second in MM/DD/YYYY + expected = DatetimeIndex( + ["2014-12-31", "2011-03-30"], dtype="datetime64[ns]", freq=None + ) - msg = r"Parsing '03/30/2011' in MM/DD/YYYY format." - with pytest.warns(UserWarning, match=msg): - to_datetime(arr, dayfirst=True) + # A. use dayfirst=True + res5 = to_datetime(arr, dayfirst=True) + tm.assert_index_equal(expected, res5) - msg = r"Parsing '31/12/2014' in DD/MM/YYYY format." - with pytest.warns(UserWarning, match=msg): - to_datetime(arr, dayfirst=False) + # B. use dayfirst=False + res6 = to_datetime(arr, dayfirst=False) + tm.assert_index_equal(expected, res6) - msg = r"Parsing '31/12/2014' in DD/MM/YYYY format." - with pytest.warns(UserWarning, match=msg): - to_datetime(arr) + # C. use dayfirst default arg, same as B + res7 = to_datetime(arr, dayfirst=False) + tm.assert_index_equal(expected, res7) - msg = r"Parsing '31/12/2014' in DD/MM/YYYY format." - with pytest.warns(UserWarning, match=msg): - to_datetime(arr, infer_datetime_format=True) + # D. use infer_datetime_format=True + res8 = to_datetime(arr, infer_datetime_format=True) + tm.assert_index_equal(expected, res8) @pytest.mark.parametrize("klass", [DatetimeIndex, DatetimeArray]) def test_to_datetime_dta_tz(self, klass):