Skip to content

WARN: Clarify datetime warning when can't parse in accordance with dayfirst #47528

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jun 28, 2022
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,7 @@ Other Deprecations
- Deprecated the ``display.column_space`` global configuration option (:issue:`7576`)
- Deprecated the argument ``na_sentinel`` in :func:`factorize`, :meth:`Index.factorize`, and :meth:`.ExtensionArray.factorize`; pass ``use_na_sentinel=True`` instead to use the sentinel ``-1`` for NaN values and ``use_na_sentinel=False`` instead of ``na_sentinel=None`` to encode NaN values (:issue:`46910`)
- Deprecated :meth:`DataFrameGroupBy.transform` not aligning the result when the UDF returned DataFrame (:issue:`45648`)
- Clarified warning from :func:`to_datetime` when delimited dates can't be parsed in accordance to specified ``dayfirst`` argument (:issue:`46210`)


.. ---------------------------------------------------------------------------
Expand Down
13 changes: 7 additions & 6 deletions pandas/_libs/tslibs/parsing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,9 @@ _DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0,
second=0, microsecond=0)

PARSING_WARNING_MSG = (
"Parsing '{date_string}' in {format} format. Provide format "
"or specify infer_datetime_format=True for consistent parsing."
"Parsing dates in {format} format when dayfirst={dayfirst} was specified. "
"This may lead to inconsistently parsed dates! Specify a format "
"to ensure consistent parsing."
)

cdef:
Expand Down Expand Up @@ -185,16 +186,16 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst):
if dayfirst and not swapped_day_and_month:
warnings.warn(
PARSING_WARNING_MSG.format(
date_string=date_string,
format='MM/DD/YYYY'
format='MM/DD/YYYY',
dayfirst='True',
),
stacklevel=4,
)
elif not dayfirst and swapped_day_and_month:
warnings.warn(
PARSING_WARNING_MSG.format(
date_string=date_string,
format='DD/MM/YYYY'
format='DD/MM/YYYY',
dayfirst='False (the default)',
),
stacklevel=4,
)
Expand Down
25 changes: 18 additions & 7 deletions pandas/tests/io/parser/test_parse_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -1677,16 +1677,25 @@ def test_parse_delimited_date_swap_with_warning(
):
parser = all_parsers
expected = DataFrame({0: [expected]}, dtype="datetime64[ns]")
warning_msg = (
"Provide format or specify infer_datetime_format=True for consistent parsing"
)
warning_msg = "Specify a format to ensure consistent parsing"
with tm.assert_produces_warning(UserWarning, match=warning_msg):
result = parser.read_csv(
StringIO(date_string), header=None, dayfirst=dayfirst, parse_dates=[0]
)
tm.assert_frame_equal(result, expected)


def test_parse_multiple_delimited_dates_with_swap_warnings():
# GH46210
warning_msg = "Specify a format to ensure consistent parsing"
with tm.assert_produces_warning(UserWarning, match=warning_msg) as record:
pd.to_datetime(["01/01/2000", "31/05/2000", "31/05/2001", "01/02/2000"])
assert len({str(warning.message) for warning in record}) == 1
# Using set(record) as repetitions of the same warning are suppressed
# https://docs.python.org/3/library/warnings.html
# and here we care to check that the warning is only shows once to users.


def _helper_hypothesis_delimited_date(call, date_string, **kwargs):
msg, result = None, None
try:
Expand Down Expand Up @@ -1848,12 +1857,14 @@ def test_parse_dates_and_keep_orgin_column(all_parsers):
def test_dayfirst_warnings():
# GH 12585
warning_msg_day_first = (
"Parsing '31/12/2014' in DD/MM/YYYY format. Provide "
"format or specify infer_datetime_format=True for consistent parsing."
r"Parsing dates in DD/MM/YYYY format when dayfirst=False \(the default\) was "
r"specified. This may lead to inconsistently parsed dates! Specify a format "
r"to ensure consistent parsing."
)
warning_msg_month_first = (
"Parsing '03/30/2011' in MM/DD/YYYY format. Provide "
"format or specify infer_datetime_format=True for consistent parsing."
"Parsing dates in MM/DD/YYYY format when dayfirst=True was "
"specified. This may lead to inconsistently parsed dates! Specify a format "
"to ensure consistent parsing."
)

# CASE 1: valid input
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/xml/test_xml_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ def test_day_first_parse_dates(parser):
)

with tm.assert_produces_warning(
UserWarning, match="Parsing '31/12/2020' in DD/MM/YYYY format"
UserWarning, match="Parsing dates DD/MM/YYYY format"
):
df_result = read_xml(xml, parse_dates=["date"], parser=parser)
df_iter = read_xml_iterparse(
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1964,8 +1964,9 @@ def test_dayfirst(self, cache):
def test_dayfirst_warnings_valid_input(self):
# GH 12585
warning_msg_day_first = (
"Parsing '31/12/2014' in DD/MM/YYYY format. Provide "
"format or specify infer_datetime_format=True for consistent parsing."
r"Parsing dates in DD/MM/YYYY format when dayfirst=False \(the default\) "
"was specified. This may lead to inconsistently parsed dates! Specify a "
"format to ensure consistent parsing."
)

# CASE 1: valid input
Expand Down