Skip to content

Commit fd1f643

Browse files
authored
WARN: Clarify datetime warning when can't parse in accordance with dayfirst (#47528)
* try improving * WARN: clarify warning message from to_datetime when dayfirst cant be respected * add test to count number of warnings emitted * fix whatsnew note * fixup test to use set * update tests * inconsistently-parsed -> inconsistently parsed * remove date_string from .format call * fixup other tests
1 parent cfc7234 commit fd1f643

File tree

5 files changed

+36
-20
lines changed

5 files changed

+36
-20
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -727,6 +727,7 @@ Other Deprecations
727727
- Deprecated the ``display.column_space`` global configuration option (:issue:`7576`)
728728
- Deprecated the argument ``na_sentinel`` in :func:`factorize`, :meth:`Index.factorize`, and :meth:`.ExtensionArray.factorize`; pass ``use_na_sentinel=True`` instead to use the sentinel ``-1`` for NaN values and ``use_na_sentinel=False`` instead of ``na_sentinel=None`` to encode NaN values (:issue:`46910`)
729729
- Deprecated :meth:`DataFrameGroupBy.transform` not aligning the result when the UDF returned DataFrame (:issue:`45648`)
730+
- Clarified warning from :func:`to_datetime` when delimited dates can't be parsed in accordance to specified ``dayfirst`` argument (:issue:`46210`)
730731

731732

732733
.. ---------------------------------------------------------------------------

pandas/_libs/tslibs/parsing.pyx

+7-6
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,9 @@ _DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0,
8585
second=0, microsecond=0)
8686

8787
PARSING_WARNING_MSG = (
88-
"Parsing '{date_string}' in {format} format. Provide format "
89-
"or specify infer_datetime_format=True for consistent parsing."
88+
"Parsing dates in {format} format when dayfirst={dayfirst} was specified. "
89+
"This may lead to inconsistently parsed dates! Specify a format "
90+
"to ensure consistent parsing."
9091
)
9192

9293
cdef:
@@ -185,16 +186,16 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst):
185186
if dayfirst and not swapped_day_and_month:
186187
warnings.warn(
187188
PARSING_WARNING_MSG.format(
188-
date_string=date_string,
189-
format='MM/DD/YYYY'
189+
format='MM/DD/YYYY',
190+
dayfirst='True',
190191
),
191192
stacklevel=4,
192193
)
193194
elif not dayfirst and swapped_day_and_month:
194195
warnings.warn(
195196
PARSING_WARNING_MSG.format(
196-
date_string=date_string,
197-
format='DD/MM/YYYY'
197+
format='DD/MM/YYYY',
198+
dayfirst='False (the default)',
198199
),
199200
stacklevel=4,
200201
)

pandas/tests/io/parser/test_parse_dates.py

+18-7
Original file line numberDiff line numberDiff line change
@@ -1677,16 +1677,25 @@ def test_parse_delimited_date_swap_with_warning(
16771677
):
16781678
parser = all_parsers
16791679
expected = DataFrame({0: [expected]}, dtype="datetime64[ns]")
1680-
warning_msg = (
1681-
"Provide format or specify infer_datetime_format=True for consistent parsing"
1682-
)
1680+
warning_msg = "Specify a format to ensure consistent parsing"
16831681
with tm.assert_produces_warning(UserWarning, match=warning_msg):
16841682
result = parser.read_csv(
16851683
StringIO(date_string), header=None, dayfirst=dayfirst, parse_dates=[0]
16861684
)
16871685
tm.assert_frame_equal(result, expected)
16881686

16891687

1688+
def test_parse_multiple_delimited_dates_with_swap_warnings():
1689+
# GH46210
1690+
warning_msg = "Specify a format to ensure consistent parsing"
1691+
with tm.assert_produces_warning(UserWarning, match=warning_msg) as record:
1692+
pd.to_datetime(["01/01/2000", "31/05/2000", "31/05/2001", "01/02/2000"])
1693+
assert len({str(warning.message) for warning in record}) == 1
1694+
# Using set(record) as repetitions of the same warning are suppressed
1695+
# https://docs.python.org/3/library/warnings.html
1696+
# and here we care to check that the warning is only shows once to users.
1697+
1698+
16901699
def _helper_hypothesis_delimited_date(call, date_string, **kwargs):
16911700
msg, result = None, None
16921701
try:
@@ -1848,12 +1857,14 @@ def test_parse_dates_and_keep_orgin_column(all_parsers):
18481857
def test_dayfirst_warnings():
18491858
# GH 12585
18501859
warning_msg_day_first = (
1851-
"Parsing '31/12/2014' in DD/MM/YYYY format. Provide "
1852-
"format or specify infer_datetime_format=True for consistent parsing."
1860+
r"Parsing dates in DD/MM/YYYY format when dayfirst=False \(the default\) was "
1861+
r"specified. This may lead to inconsistently parsed dates! Specify a format "
1862+
r"to ensure consistent parsing."
18531863
)
18541864
warning_msg_month_first = (
1855-
"Parsing '03/30/2011' in MM/DD/YYYY format. Provide "
1856-
"format or specify infer_datetime_format=True for consistent parsing."
1865+
"Parsing dates in MM/DD/YYYY format when dayfirst=True was "
1866+
"specified. This may lead to inconsistently parsed dates! Specify a format "
1867+
"to ensure consistent parsing."
18571868
)
18581869

18591870
# CASE 1: valid input

pandas/tests/io/xml/test_xml_dtypes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@ def test_day_first_parse_dates(parser):
457457
)
458458

459459
with tm.assert_produces_warning(
460-
UserWarning, match="Parsing '31/12/2020' in DD/MM/YYYY format"
460+
UserWarning, match="Parsing dates in DD/MM/YYYY format"
461461
):
462462
df_result = read_xml(xml, parse_dates=["date"], parser=parser)
463463
df_iter = read_xml_iterparse(

pandas/tests/tools/test_to_datetime.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -1964,8 +1964,9 @@ def test_dayfirst(self, cache):
19641964
def test_dayfirst_warnings_valid_input(self):
19651965
# GH 12585
19661966
warning_msg_day_first = (
1967-
"Parsing '31/12/2014' in DD/MM/YYYY format. Provide "
1968-
"format or specify infer_datetime_format=True for consistent parsing."
1967+
r"Parsing dates in DD/MM/YYYY format when dayfirst=False \(the default\) "
1968+
"was specified. This may lead to inconsistently parsed dates! Specify a "
1969+
"format to ensure consistent parsing."
19691970
)
19701971

19711972
# CASE 1: valid input
@@ -2001,12 +2002,14 @@ def test_dayfirst_warnings_invalid_input(self):
20012002
# cannot consistently process with single format
20022003
# warnings *always* raised
20032004
warning_msg_day_first = (
2004-
"Parsing '31/12/2014' in DD/MM/YYYY format. Provide "
2005-
"format or specify infer_datetime_format=True for consistent parsing."
2005+
r"Parsing dates in DD/MM/YYYY format when dayfirst=False \(the default\) "
2006+
"was specified. This may lead to inconsistently parsed dates! Specify a "
2007+
"format to ensure consistent parsing."
20062008
)
20072009
warning_msg_month_first = (
2008-
"Parsing '03/30/2011' in MM/DD/YYYY format. Provide "
2009-
"format or specify infer_datetime_format=True for consistent parsing."
2010+
r"Parsing dates in MM/DD/YYYY format when dayfirst=True "
2011+
"was specified. This may lead to inconsistently parsed dates! Specify a "
2012+
"format to ensure consistent parsing."
20102013
)
20112014

20122015
arr = ["31/12/2014", "03/30/2011"]

0 commit comments

Comments
 (0)