Skip to content

WIP BUG: Inconsistent date parsing of to_datetime #35428

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions pandas/_libs/tslibs/parsing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ Parsing functions for datetime and datetime-like strings.
"""
import re
import time
import warnings

from libc.string cimport strchr

Expand Down Expand Up @@ -149,14 +150,28 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst):
# date_string can't be converted to date, above format
return None, None

swapped_day_and_month = False
if 1 <= month <= MAX_DAYS_IN_MONTH and 1 <= day <= MAX_DAYS_IN_MONTH \
and (month <= MAX_MONTH or day <= MAX_MONTH):
if (month > MAX_MONTH or (day <= MAX_MONTH and dayfirst)) and can_swap:
day, month = month, day
swapped_day_and_month = True
if PY_VERSION_HEX >= 0x03060100:
# In Python <= 3.6.0 there is no range checking for invalid dates
# in C api, thus we call faster C version for 3.6.1 or newer

if dayfirst and not swapped_day_and_month:
warnings.warn(f"Parsing '{date_string}' in MM/DD/YYYY format.")
elif not dayfirst and swapped_day_and_month:
warnings.warn(f"Parsing '{date_string}' in DD/MM/YYYY format.")

return datetime_new(year, month, day, 0, 0, 0, 0, None), reso

if dayfirst and not swapped_day_and_month:
warnings.warn(f"Parsing '{date_string}' in MM/DD/YYYY format.")
elif not dayfirst and swapped_day_and_month:
warnings.warn(f"Parsing '{date_string}' in DD/MM/YYYY format.")

return datetime(year, month, day, 0, 0, 0, 0, None), reso

raise DateParseError(f"Invalid date specified ({month}/{day})")
Expand Down
54 changes: 54 additions & 0 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1752,6 +1752,60 @@ def test_dayfirst(self, cache):
tm.assert_index_equal(expected, idx5)
tm.assert_index_equal(expected, idx6)

def test_dayfirst_warnings(self):
# GH 12585

# CASE 1: valid input
arr = ["31/12/2014", "10/03/2011"]
expected = DatetimeIndex(
["2014-12-31", "2011-03-10"], dtype="datetime64[ns]", freq=None
)

# A. dayfirst arg correct, no warning
res1 = to_datetime(arr, dayfirst=True)
tm.assert_index_equal(expected, res1)

# B. dayfirst arg incorrect, warning + incorrect output
res2 = to_datetime(arr, dayfirst=False)
with pytest.raises(AssertionError):
tm.assert_index_equal(expected, res2)

# C. dayfirst default arg, same as B
res3 = to_datetime(arr, dayfirst=False)
with pytest.raises(AssertionError):
tm.assert_index_equal(expected, res3)

# D. infer_datetime_format=True overrides dayfirst default
# no warning + correct result
res4 = to_datetime(arr, infer_datetime_format=True)
tm.assert_index_equal(expected, res4)

# CASE 2: invalid input
# cannot consistently process with single format
# warnings *always* raised

arr = ["31/12/2014", "03/30/2011"]
# first in DD/MM/YYYY, second in MM/DD/YYYY
expected = DatetimeIndex(
["2014-12-31", "2011-03-30"], dtype="datetime64[ns]", freq=None
)

# A. use dayfirst=True
res5 = to_datetime(arr, dayfirst=True)
tm.assert_index_equal(expected, res5)

# B. use dayfirst=False
res6 = to_datetime(arr, dayfirst=False)
tm.assert_index_equal(expected, res6)

# C. use dayfirst default arg, same as B
res7 = to_datetime(arr, dayfirst=False)
tm.assert_index_equal(expected, res7)

# D. use infer_datetime_format=True
res8 = to_datetime(arr, infer_datetime_format=True)
tm.assert_index_equal(expected, res8)

@pytest.mark.parametrize("klass", [DatetimeIndex, DatetimeArray])
def test_to_datetime_dta_tz(self, klass):
# GH#27733
Expand Down