diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 56b09348189ee..2fed373a5c79b 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -727,6 +727,7 @@ Other Deprecations - Deprecated the ``display.column_space`` global configuration option (:issue:`7576`) - Deprecated the argument ``na_sentinel`` in :func:`factorize`, :meth:`Index.factorize`, and :meth:`.ExtensionArray.factorize`; pass ``use_na_sentinel=True`` instead to use the sentinel ``-1`` for NaN values and ``use_na_sentinel=False`` instead of ``na_sentinel=None`` to encode NaN values (:issue:`46910`) - Deprecated :meth:`DataFrameGroupBy.transform` not aligning the result when the UDF returned DataFrame (:issue:`45648`) +- Clarified warning from :func:`to_datetime` when delimited dates can't be parsed in accordance to specified ``dayfirst`` argument (:issue:`46210`) .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 8b42ed195957b..5cb11436f6f45 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -85,8 +85,9 @@ _DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0, second=0, microsecond=0) PARSING_WARNING_MSG = ( - "Parsing '{date_string}' in {format} format. Provide format " - "or specify infer_datetime_format=True for consistent parsing." + "Parsing dates in {format} format when dayfirst={dayfirst} was specified. " + "This may lead to inconsistently parsed dates! Specify a format " + "to ensure consistent parsing." ) cdef: @@ -185,16 +186,16 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst): if dayfirst and not swapped_day_and_month: warnings.warn( PARSING_WARNING_MSG.format( - date_string=date_string, - format='MM/DD/YYYY' + format='MM/DD/YYYY', + dayfirst='True', ), stacklevel=4, ) elif not dayfirst and swapped_day_and_month: warnings.warn( PARSING_WARNING_MSG.format( - date_string=date_string, - format='DD/MM/YYYY' + format='DD/MM/YYYY', + dayfirst='False (the default)', ), stacklevel=4, ) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 449d5a954613b..d05961b702c51 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1677,9 +1677,7 @@ def test_parse_delimited_date_swap_with_warning( ): parser = all_parsers expected = DataFrame({0: [expected]}, dtype="datetime64[ns]") - warning_msg = ( - "Provide format or specify infer_datetime_format=True for consistent parsing" - ) + warning_msg = "Specify a format to ensure consistent parsing" with tm.assert_produces_warning(UserWarning, match=warning_msg): result = parser.read_csv( StringIO(date_string), header=None, dayfirst=dayfirst, parse_dates=[0] @@ -1687,6 +1685,17 @@ def test_parse_delimited_date_swap_with_warning( tm.assert_frame_equal(result, expected) +def test_parse_multiple_delimited_dates_with_swap_warnings(): + # GH46210 + warning_msg = "Specify a format to ensure consistent parsing" + with tm.assert_produces_warning(UserWarning, match=warning_msg) as record: + pd.to_datetime(["01/01/2000", "31/05/2000", "31/05/2001", "01/02/2000"]) + assert len({str(warning.message) for warning in record}) == 1 + # Using set(record) as repetitions of the same warning are suppressed + # https://docs.python.org/3/library/warnings.html + # and here we care to check that the warning is only shows once to users. + + def _helper_hypothesis_delimited_date(call, date_string, **kwargs): msg, result = None, None try: @@ -1848,12 +1857,14 @@ def test_parse_dates_and_keep_orgin_column(all_parsers): def test_dayfirst_warnings(): # GH 12585 warning_msg_day_first = ( - "Parsing '31/12/2014' in DD/MM/YYYY format. Provide " - "format or specify infer_datetime_format=True for consistent parsing." + r"Parsing dates in DD/MM/YYYY format when dayfirst=False \(the default\) was " + r"specified. This may lead to inconsistently parsed dates! Specify a format " + r"to ensure consistent parsing." ) warning_msg_month_first = ( - "Parsing '03/30/2011' in MM/DD/YYYY format. Provide " - "format or specify infer_datetime_format=True for consistent parsing." + "Parsing dates in MM/DD/YYYY format when dayfirst=True was " + "specified. This may lead to inconsistently parsed dates! Specify a format " + "to ensure consistent parsing." ) # CASE 1: valid input diff --git a/pandas/tests/io/xml/test_xml_dtypes.py b/pandas/tests/io/xml/test_xml_dtypes.py index 6aa4ddfac7628..5629830767c3c 100644 --- a/pandas/tests/io/xml/test_xml_dtypes.py +++ b/pandas/tests/io/xml/test_xml_dtypes.py @@ -457,7 +457,7 @@ def test_day_first_parse_dates(parser): ) with tm.assert_produces_warning( - UserWarning, match="Parsing '31/12/2020' in DD/MM/YYYY format" + UserWarning, match="Parsing dates in DD/MM/YYYY format" ): df_result = read_xml(xml, parse_dates=["date"], parser=parser) df_iter = read_xml_iterparse( diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 4c34b0c0aec0a..f712b4a24e5e5 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1964,8 +1964,9 @@ def test_dayfirst(self, cache): def test_dayfirst_warnings_valid_input(self): # GH 12585 warning_msg_day_first = ( - "Parsing '31/12/2014' in DD/MM/YYYY format. Provide " - "format or specify infer_datetime_format=True for consistent parsing." + r"Parsing dates in DD/MM/YYYY format when dayfirst=False \(the default\) " + "was specified. This may lead to inconsistently parsed dates! Specify a " + "format to ensure consistent parsing." ) # CASE 1: valid input @@ -2001,12 +2002,14 @@ def test_dayfirst_warnings_invalid_input(self): # cannot consistently process with single format # warnings *always* raised warning_msg_day_first = ( - "Parsing '31/12/2014' in DD/MM/YYYY format. Provide " - "format or specify infer_datetime_format=True for consistent parsing." + r"Parsing dates in DD/MM/YYYY format when dayfirst=False \(the default\) " + "was specified. This may lead to inconsistently parsed dates! Specify a " + "format to ensure consistent parsing." ) warning_msg_month_first = ( - "Parsing '03/30/2011' in MM/DD/YYYY format. Provide " - "format or specify infer_datetime_format=True for consistent parsing." + r"Parsing dates in MM/DD/YYYY format when dayfirst=True " + "was specified. This may lead to inconsistently parsed dates! Specify a " + "format to ensure consistent parsing." ) arr = ["31/12/2014", "03/30/2011"]