From ea796690dc3bce718cd3dafb78e7bf6bb2612149 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 18 Oct 2022 08:41:08 +0100
Subject: [PATCH 01/34] :wastebasket: deprecate infer_datetime_format, make
 strict

---
 pandas/core/tools/datetimes.py   | 92 ++++++++++++--------------------
 pandas/io/parsers/base_parser.py |  5 --
 pandas/io/parsers/readers.py     | 39 +++++++-------
 3 files changed, 53 insertions(+), 83 deletions(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 7791ea804a52a..5760952ba7324 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -19,7 +19,10 @@
 
 import numpy as np
 
-from pandas._libs import tslib
+from pandas._libs import (
+    lib,
+    tslib,
+)
 from pandas._libs.tslibs import (
     OutOfBoundsDatetime,
     Timedelta,
@@ -331,7 +334,6 @@ def _convert_listlike_datetimes(
     tz: Timezone | None = None,
     unit: str | None = None,
     errors: DateTimeErrorChoices = "raise",
-    infer_datetime_format: bool = False,
     dayfirst: bool | None = None,
     yearfirst: bool | None = None,
     exact: bool = True,
@@ -415,27 +417,19 @@ def _convert_listlike_datetimes(
     arg = ensure_object(arg)
     require_iso8601 = False
 
-    if infer_datetime_format and format is None:
+    if format is None:
         format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)
 
+    # There is a special fast-path for iso8601 formatted
+    # datetime strings, so in those cases don't use the inferred
+    # format because this path makes process slower in this
+    # special case
+    if format is not None and format_is_iso(format):
+        require_iso8601 = True
+        format = None
     if format is not None:
-        # There is a special fast-path for iso8601 formatted
-        # datetime strings, so in those cases don't use the inferred
-        # format because this path makes process slower in this
-        # special case
-        format_is_iso8601 = format_is_iso(format)
-        if format_is_iso8601:
-            require_iso8601 = not infer_datetime_format
-            format = None
-
-    if format is not None:
-        res = _to_datetime_with_format(
-            arg, orig_arg, name, tz, format, exact, errors, infer_datetime_format
-        )
-        if res is not None:
-            return res
+        return _to_datetime_with_format(arg, orig_arg, name, tz, format, exact, errors)
 
-    assert format is None or infer_datetime_format
     utc = tz == "utc"
     result, tz_parsed = objects_to_datetime64ns(
         arg,
@@ -464,8 +458,7 @@ def _array_strptime_with_fallback(
     fmt: str,
     exact: bool,
     errors: str,
-    infer_datetime_format: bool,
-) -> Index | None:
+) -> Index:
     """
     Call array_strptime, with fallback behavior depending on 'errors'.
     """
@@ -486,18 +479,14 @@ def _array_strptime_with_fallback(
         # if fmt was inferred, try falling back
         # to array_to_datetime - terminate here
         # for specified formats
-        if not infer_datetime_format:
-            if errors == "raise":
-                raise
-            elif errors == "coerce":
-                result = np.empty(arg.shape, dtype="M8[ns]")
-                iresult = result.view("i8")
-                iresult.fill(iNaT)
-            else:
-                result = arg
+        if errors == "raise":
+            raise
+        elif errors == "coerce":
+            result = np.empty(arg.shape, dtype="M8[ns]")
+            iresult = result.view("i8")
+            iresult.fill(iNaT)
         else:
-            # Indicates to the caller to fallback to objects_to_datetime64ns
-            return None
+            result = arg
     else:
         if "%Z" in fmt or "%z" in fmt:
             return _return_parsed_timezone_results(result, timezones, tz, name)
@@ -513,10 +502,9 @@ def _to_datetime_with_format(
     fmt: str,
     exact: bool,
     errors: str,
-    infer_datetime_format: bool,
-) -> Index | None:
+) -> Index:
     """
-    Try parsing with the given format, returning None on failure.
+    Try parsing with the given format.
     """
     result = None
 
@@ -537,9 +525,7 @@ def _to_datetime_with_format(
             return _box_as_indexlike(result, utc=utc, name=name)
 
     # fallback
-    res = _array_strptime_with_fallback(
-        arg, name, tz, fmt, exact, errors, infer_datetime_format
-    )
+    res = _array_strptime_with_fallback(arg, name, tz, fmt, exact, errors)
     return res
 
 
@@ -713,7 +699,7 @@ def to_datetime(
     format: str | None = None,
     exact: bool = True,
     unit: str | None = None,
-    infer_datetime_format: bool = False,
+    infer_datetime_format: lib.NoDefault | bool = lib.no_default,
     origin: str = "unix",
     cache: bool = True,
 ) -> DatetimeIndex | Series | DatetimeScalar | NaTType | None:
@@ -926,24 +912,6 @@ def to_datetime(
     1   2016-03-05
     dtype: datetime64[ns]
 
-    Passing ``infer_datetime_format=True`` can often-times speedup a parsing
-    if its not an ISO8601 format exactly, but in a regular format.
-
-    >>> s = pd.Series(['3/11/2000', '3/12/2000', '3/13/2000'] * 1000)
-    >>> s.head()
-    0    3/11/2000
-    1    3/12/2000
-    2    3/13/2000
-    3    3/11/2000
-    4    3/12/2000
-    dtype: object
-
-    >>> %timeit pd.to_datetime(s, infer_datetime_format=True)  # doctest: +SKIP
-    100 loops, best of 3: 10.4 ms per loop
-
-    >>> %timeit pd.to_datetime(s, infer_datetime_format=False)  # doctest: +SKIP
-    1 loop, best of 3: 471 ms per loop
-
     Using a unix epoch time
 
     >>> pd.to_datetime(1490195805, unit='s')
@@ -1060,6 +1028,15 @@ def to_datetime(
                    '2020-01-01 18:00:00+00:00', '2020-01-01 19:00:00+00:00'],
                   dtype='datetime64[ns, UTC]', freq=None)
     """
+    if infer_datetime_format is not lib.no_default:
+        warnings.warn(
+            "The argument 'infer_datetime_format' is deprecated and will "
+            "be removed in a future version. "
+            "A strict version of it is now the default, see "
+            "https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. "
+            "You can safely remove this argument.",
+            stacklevel=find_stack_level(),
+        )
     if arg is None:
         return None
 
@@ -1075,7 +1052,6 @@ def to_datetime(
         yearfirst=yearfirst,
         errors=errors,
         exact=exact,
-        infer_datetime_format=infer_datetime_format,
     )
 
     result: Timestamp | NaTType | Series | Index
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index 45f6469a31f4f..5080c15153ced 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -122,13 +122,11 @@ def __init__(self, kwds) -> None:
         self.true_values = kwds.get("true_values")
         self.false_values = kwds.get("false_values")
         self.mangle_dupe_cols = kwds.get("mangle_dupe_cols", True)
-        self.infer_datetime_format = kwds.pop("infer_datetime_format", False)
         self.cache_dates = kwds.pop("cache_dates", True)
 
         self._date_conv = _make_date_converter(
             date_parser=self.date_parser,
             dayfirst=self.dayfirst,
-            infer_datetime_format=self.infer_datetime_format,
             cache_dates=self.cache_dates,
         )
 
@@ -1105,7 +1103,6 @@ def _get_empty_meta(
 def _make_date_converter(
     date_parser=None,
     dayfirst: bool = False,
-    infer_datetime_format: bool = False,
     cache_dates: bool = True,
 ):
     def converter(*date_cols):
@@ -1118,7 +1115,6 @@ def converter(*date_cols):
                     utc=None,
                     dayfirst=dayfirst,
                     errors="ignore",
-                    infer_datetime_format=infer_datetime_format,
                     cache=cache_dates,
                 ).to_numpy()
 
@@ -1188,7 +1184,6 @@ def converter(*date_cols):
     "squeeze": None,
     "compression": None,
     "mangle_dupe_cols": True,
-    "infer_datetime_format": False,
     "skip_blank_lines": True,
     "encoding_errors": "strict",
     "on_bad_lines": ParserBase.BadLineHandleMethod.ERROR,
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index c1698c68ce465..6ed73bd1de1e8 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -262,11 +262,6 @@
     :ref:`io.csv.mixed_timezones` for more.
 
     Note: A fast-path exists for iso8601-formatted dates.
-infer_datetime_format : bool, default False
-    If True and `parse_dates` is enabled, pandas will attempt to infer the
-    format of the datetime strings in the columns, and if it can be inferred,
-    switch to a faster method of parsing them. In some cases this can increase
-    the parsing speed by 5-10x.
 keep_date_col : bool, default False
     If True and `parse_dates` specifies combining multiple columns then
     keep the original columns.
@@ -483,7 +478,6 @@
     "decimal",
     "iterator",
     "dayfirst",
-    "infer_datetime_format",
     "verbose",
     "skipinitialspace",
     "low_memory",
@@ -648,7 +642,7 @@ def read_csv(
     verbose: bool = ...,
     skip_blank_lines: bool = ...,
     parse_dates: bool | Sequence[Hashable] | None = ...,
-    infer_datetime_format: bool = ...,
+    infer_datetime_format: bool | lib.NoDefault = ...,
     keep_date_col: bool = ...,
     date_parser=...,
     dayfirst: bool = ...,
@@ -709,7 +703,7 @@ def read_csv(
     verbose: bool = ...,
     skip_blank_lines: bool = ...,
     parse_dates: bool | Sequence[Hashable] | None = ...,
-    infer_datetime_format: bool = ...,
+    infer_datetime_format: bool | lib.NoDefault = ...,
     keep_date_col: bool = ...,
     date_parser=...,
     dayfirst: bool = ...,
@@ -770,7 +764,7 @@ def read_csv(
     verbose: bool = ...,
     skip_blank_lines: bool = ...,
     parse_dates: bool | Sequence[Hashable] | None = ...,
-    infer_datetime_format: bool = ...,
+    infer_datetime_format: bool | lib.NoDefault = ...,
     keep_date_col: bool = ...,
     date_parser=...,
     dayfirst: bool = ...,
@@ -831,7 +825,7 @@ def read_csv(
     verbose: bool = ...,
     skip_blank_lines: bool = ...,
     parse_dates: bool | Sequence[Hashable] | None = ...,
-    infer_datetime_format: bool = ...,
+    infer_datetime_format: bool | lib.NoDefault = ...,
     keep_date_col: bool = ...,
     date_parser=...,
     dayfirst: bool = ...,
@@ -905,7 +899,7 @@ def read_csv(
     skip_blank_lines: bool = True,
     # Datetime Handling
     parse_dates: bool | Sequence[Hashable] | None = None,
-    infer_datetime_format: bool = False,
+    infer_datetime_format: bool | lib.NoDefault = lib.no_default,
     keep_date_col: bool = False,
     date_parser=None,
     dayfirst: bool = False,
@@ -940,6 +934,15 @@ def read_csv(
     storage_options: StorageOptions = None,
     use_nullable_dtypes: bool = False,
 ) -> DataFrame | TextFileReader:
+    if infer_datetime_format is not lib.no_default:
+        warnings.warn(
+            "The argument 'infer_datetime_format' is deprecated and will "
+            "be removed in a future version. "
+            "A strict version of it is now the default, see "
+            "https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. "
+            "You can safely remove this argument.",
+            stacklevel=find_stack_level(),
+        )
     # locals() should never be modified
     kwds = locals().copy()
     del kwds["filepath_or_buffer"]
@@ -992,7 +995,7 @@ def read_table(
     verbose: bool = ...,
     skip_blank_lines: bool = ...,
     parse_dates: bool | Sequence[Hashable] = ...,
-    infer_datetime_format: bool = ...,
+    infer_datetime_format: bool | lib.NoDefault = ...,
     keep_date_col: bool = ...,
     date_parser=...,
     dayfirst: bool = ...,
@@ -1053,7 +1056,7 @@ def read_table(
     verbose: bool = ...,
     skip_blank_lines: bool = ...,
     parse_dates: bool | Sequence[Hashable] = ...,
-    infer_datetime_format: bool = ...,
+    infer_datetime_format: bool | lib.NoDefault = ...,
     keep_date_col: bool = ...,
     date_parser=...,
     dayfirst: bool = ...,
@@ -1114,7 +1117,7 @@ def read_table(
     verbose: bool = ...,
     skip_blank_lines: bool = ...,
     parse_dates: bool | Sequence[Hashable] = ...,
-    infer_datetime_format: bool = ...,
+    infer_datetime_format: bool | lib.NoDefault = ...,
     keep_date_col: bool = ...,
     date_parser=...,
     dayfirst: bool = ...,
@@ -1175,7 +1178,7 @@ def read_table(
     verbose: bool = ...,
     skip_blank_lines: bool = ...,
     parse_dates: bool | Sequence[Hashable] = ...,
-    infer_datetime_format: bool = ...,
+    infer_datetime_format: bool | lib.NoDefault = ...,
     keep_date_col: bool = ...,
     date_parser=...,
     dayfirst: bool = ...,
@@ -1249,7 +1252,7 @@ def read_table(
     skip_blank_lines: bool = True,
     # Datetime Handling
     parse_dates: bool | Sequence[Hashable] = False,
-    infer_datetime_format: bool = False,
+    infer_datetime_format: bool | lib.NoDefault = lib.no_default,
     keep_date_col: bool = False,
     date_parser=None,
     dayfirst: bool = False,
@@ -1883,10 +1886,6 @@ def TextParser(*args, **kwds) -> TextFileReader:
         Encoding to use for UTF when reading/writing (ex. 'utf-8')
     squeeze : bool, default False
         returns Series if only one column.
-    infer_datetime_format: bool, default False
-        If True and `parse_dates` is True for a column, try to infer the
-        datetime format based on the first datetime string. If the format
-        can be inferred, there often will be a large parsing speed-up.
     float_precision : str, optional
         Specifies which converter the C engine should use for floating-point
         values. The options are `None` or `high` for the ordinary converter,

From bb68cc3526abbb60009e5fcaab23897e28769376 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 18 Oct 2022 08:43:02 +0100
Subject: [PATCH 02/34] :rotating_light: add warning about dayfirst

---
 pandas/_libs/tslibs/parsing.pyx | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
index 5c93edfee79f2..74de3502b73de 100644
--- a/pandas/_libs/tslibs/parsing.pyx
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -1088,6 +1088,7 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None:
     # rebuild string, capturing any inferred padding
     dt_str = ''.join(tokens)
     if parsed_datetime.strftime(guessed_format) == dt_str:
+        _maybe_warn_about_dayfirst(guessed_format, dayfirst)
         return guessed_format
     else:
         return None
@@ -1106,6 +1107,26 @@ cdef str _fill_token(token: str, padding: int):
         token_filled = f'{seconds}.{nanoseconds}'
     return token_filled
 
+cdef void _maybe_warn_about_dayfirst(format: str, bint dayfirst):
+    """Warn if guessed datetime format doesn't respect dayfirst argument."""
+    cdef:
+        int day_index = format.find('%d')
+        int month_index = format.find('%m')
+
+    if (day_index != -1) and (month_index != -1):
+        if (day_index > month_index) and dayfirst:
+            warnings.warn(
+                f"Parsing dates in {format} format when dayfirst=True was specified. "
+                f"Pass `dayfirst=False` or specify a format to silence this warning.",
+                stacklevel=find_stack_level(),
+            )
+        if (day_index < month_index) and not dayfirst:
+            warnings.warn(
+                f"Parsing dates in {format} format when dayfirst=False was specified. "
+                f"Pass `dayfirst=True` or specify a format to silence this warning.",
+                stacklevel=find_stack_level(),
+            )
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline object convert_to_unicode(object item, bint keep_trivial_numbers):

From 82266f45d2014cb85d1ff08e53e1de4dcdac32a7 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 18 Oct 2022 08:48:37 +0100
Subject: [PATCH 03/34] :white_check_mark: add/update tests

---
 pandas/tests/apply/test_frame_apply.py        |   3 +-
 pandas/tests/frame/methods/test_drop.py       |  10 +-
 pandas/tests/frame/methods/test_to_csv.py     |  10 +-
 .../indexes/datetimes/test_constructors.py    |  12 +-
 pandas/tests/indexes/test_base.py             |  12 +-
 .../io/parser/common/test_common_basic.py     |   4 +-
 pandas/tests/io/parser/test_parse_dates.py    | 101 ++++--------
 .../io/parser/usecols/test_parse_dates.py     |   8 +-
 pandas/tests/io/test_sql.py                   |   4 +-
 pandas/tests/io/xml/test_xml_dtypes.py        |   2 +-
 pandas/tests/plotting/test_converter.py       |   4 +-
 pandas/tests/series/methods/test_to_csv.py    |   6 +-
 pandas/tests/tools/test_to_datetime.py        | 148 +++++++-----------
 pandas/tests/tslibs/test_parsing.py           |  18 +--
 14 files changed, 138 insertions(+), 204 deletions(-)

diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index 3bcb7d964fad1..28a9871b76985 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -836,7 +836,8 @@ def test_with_dictlike_columns_with_datetime():
     df["author"] = ["X", "Y", "Z"]
     df["publisher"] = ["BBC", "NBC", "N24"]
     df["date"] = pd.to_datetime(
-        ["17-10-2010 07:15:30", "13-05-2011 08:20:35", "15-01-2013 09:09:09"]
+        ["17-10-2010 07:15:30", "13-05-2011 08:20:35", "15-01-2013 09:09:09"],
+        dayfirst=True,
     )
     result = df.apply(lambda x: {}, axis=1)
     expected = Series([{}, {}, {}])
diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py
index 6e5b97af7c297..1b295fd10c9d5 100644
--- a/pandas/tests/frame/methods/test_drop.py
+++ b/pandas/tests/frame/methods/test_drop.py
@@ -405,11 +405,11 @@ def test_drop_level_nonunique_datetime(self):
         idx = Index([2, 3, 4, 4, 5], name="id")
         idxdt = pd.to_datetime(
             [
-                "201603231400",
-                "201603231500",
-                "201603231600",
-                "201603231600",
-                "201603231700",
+                "2016-03-23 14:00",
+                "2016-03-23 15:00",
+                "2016-03-23 16:00",
+                "2016-03-23 16:00",
+                "2016-03-23 17:00",
             ]
         )
         df = DataFrame(np.arange(10).reshape(5, 2), columns=list("ab"), index=idx)
diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
index 1933278efb443..3b4dec8bff7f1 100644
--- a/pandas/tests/frame/methods/test_to_csv.py
+++ b/pandas/tests/frame/methods/test_to_csv.py
@@ -27,7 +27,7 @@
 
 class TestDataFrameToCSV:
     def read_csv(self, path, **kwargs):
-        params = {"index_col": 0, "parse_dates": True}
+        params = {"index_col": 0}
         params.update(**kwargs)
 
         return read_csv(path, **params)
@@ -46,17 +46,17 @@ def test_to_csv_from_csv1(self, float_frame, datetime_frame):
             # freq does not roundtrip
             datetime_frame.index = datetime_frame.index._with_freq(None)
             datetime_frame.to_csv(path)
-            recons = self.read_csv(path)
+            recons = self.read_csv(path, parse_dates=True)
             tm.assert_frame_equal(datetime_frame, recons)
 
             datetime_frame.to_csv(path, index_label="index")
-            recons = self.read_csv(path, index_col=None)
+            recons = self.read_csv(path, index_col=None, parse_dates=True)
 
             assert len(recons.columns) == len(datetime_frame.columns) + 1
 
             # no index
             datetime_frame.to_csv(path, index=False)
-            recons = self.read_csv(path, index_col=None)
+            recons = self.read_csv(path, index_col=None, parse_dates=True)
             tm.assert_almost_equal(datetime_frame.values, recons.values)
 
             # corner case
@@ -1056,7 +1056,7 @@ def test_to_csv_date_format(self, datetime_frame):
 
             # test NaTs
             nat_index = to_datetime(
-                ["NaT"] * 10 + ["2000-01-01", "1/1/2000", "1-1-2000"]
+                ["NaT"] * 10 + ["2000-01-01", "2000-01-01", "2000-01-01"]
             )
             nat_frame = DataFrame({"A": nat_index}, index=nat_index)
             nat_frame.to_csv(path, date_format="%Y-%m-%d")
diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py
index 9914f4357cee4..c1039728f5b5e 100644
--- a/pandas/tests/indexes/datetimes/test_constructors.py
+++ b/pandas/tests/indexes/datetimes/test_constructors.py
@@ -1042,10 +1042,18 @@ def test_datetimeindex_constructor_misc(self):
         arr = np.array(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"], dtype="O")
         idx4 = DatetimeIndex(arr)
 
-        arr = to_datetime(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"])
+        # Can't be parsed consistently, need to parse each element individually
+        arr = [
+            to_datetime(date_string)
+            for date_string in ["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"]
+        ]
         idx5 = DatetimeIndex(arr)
 
-        arr = to_datetime(["1/1/2005", "1/2/2005", "Jan 3, 2005", "2005-01-04"])
+        # Can't be parsed consistently, need to parse each element individually
+        arr = [
+            to_datetime(date_string)
+            for date_string in ["1/1/2005", "1/2/2005", "Jan 3, 2005", "2005-01-04"]
+        ]
         idx6 = DatetimeIndex(arr)
 
         idx7 = DatetimeIndex(["12/05/2007", "25/01/2008"], dayfirst=True)
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 4b0821a50e09b..56ef410b4d94e 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -1185,10 +1185,16 @@ def test_equals_op_index_vs_mi_same_length(self):
         expected = np.array([False, False, False])
         tm.assert_numpy_array_equal(result, expected)
 
-    @pytest.mark.parametrize("dt_conv", [pd.to_datetime, pd.to_timedelta])
-    def test_dt_conversion_preserves_name(self, dt_conv):
+    @pytest.mark.parametrize(
+        "dt_conv, arg",
+        [
+            (pd.to_datetime, ["2000-01-01", "2000-01-02"]),
+            (pd.to_timedelta, ["01:02:03", "01:02:04"]),
+        ],
+    )
+    def test_dt_conversion_preserves_name(self, dt_conv, arg):
         # GH 10875
-        index = Index(["01:02:03", "01:02:04"], name="label")
+        index = Index(arg, name="label")
         assert index.name == dt_conv(index).name
 
     def test_cached_properties_not_settable(self):
diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py
index 52d8abe76ecbc..e7c4066b13640 100644
--- a/pandas/tests/io/parser/common/test_common_basic.py
+++ b/pandas/tests/io/parser/common/test_common_basic.py
@@ -58,8 +58,8 @@ def _set_noconvert_columns(self):
             return CParserWrapper._set_noconvert_columns(self)
 
     data = """a,b,c,d,e
-0,1,20140101,0900,4
-0,1,20140102,1000,4"""
+0,1,2014-01-01,09:00,4
+0,1,2014-01-02,10:00,4"""
 
     parse_dates = [[1, 2]]
     cols = {
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
index 9c8809b6099f9..b8d515a67b7fe 100644
--- a/pandas/tests/io/parser/test_parse_dates.py
+++ b/pandas/tests/io/parser/test_parse_dates.py
@@ -1666,9 +1666,9 @@ def test_parse_delimited_date_swap_no_warning(
 @pytest.mark.parametrize(
     "date_string,dayfirst,expected",
     [
-        # %d/%m/%Y; month > 12 thus replacement
+        # %d/%m/%Y; month > 12
         ("13/02/2019", False, datetime(2019, 2, 13)),
-        # %m/%d/%Y; day > 12 thus there will be no replacement
+        # %m/%d/%Y; day > 12
         ("02/13/2019", True, datetime(2019, 2, 13)),
     ],
 )
@@ -1677,7 +1677,10 @@ def test_parse_delimited_date_swap_with_warning(
 ):
     parser = all_parsers
     expected = DataFrame({0: [expected]}, dtype="datetime64[ns]")
-    warning_msg = "Specify a format to ensure consistent parsing"
+    warning_msg = (
+        "Parsing dates in .* format when dayfirst=.* was specified. "
+        "Pass `dayfirst=.*` or specify a format to silence this warning."
+    )
     result = parser.read_csv_check_warnings(
         UserWarning,
         warning_msg,
@@ -1691,13 +1694,11 @@ def test_parse_delimited_date_swap_with_warning(
 
 def test_parse_multiple_delimited_dates_with_swap_warnings():
     # GH46210
-    warning_msg = "Specify a format to ensure consistent parsing"
-    with tm.assert_produces_warning(UserWarning, match=warning_msg) as record:
+    with pytest.raises(
+        ValueError,
+        match=r"^time data '31/05/2000' does not match format '%m/%d/%Y' \(match\)$",
+    ):
         pd.to_datetime(["01/01/2000", "31/05/2000", "31/05/2001", "01/02/2000"])
-    assert len({str(warning.message) for warning in record}) == 1
-    # Using set(record) as repetitions of the same warning are suppressed
-    # https://docs.python.org/3/library/warnings.html
-    # and here we care to check that the warning is only shows once to users.
 
 
 def _helper_hypothesis_delimited_date(call, date_string, **kwargs):
@@ -1860,97 +1861,51 @@ def test_parse_dates_and_keep_orgin_column(all_parsers):
 
 def test_dayfirst_warnings():
     # GH 12585
-    warning_msg_day_first = (
-        r"Parsing dates in DD/MM/YYYY format when dayfirst=False \(the default\) was "
-        r"specified. This may lead to inconsistently parsed dates! Specify a format "
-        r"to ensure consistent parsing."
-    )
-    warning_msg_month_first = (
-        "Parsing dates in MM/DD/YYYY format when dayfirst=True was "
-        "specified. This may lead to inconsistently parsed dates! Specify a format "
-        "to ensure consistent parsing."
-    )
 
     # CASE 1: valid input
     input = "date\n31/12/2014\n10/03/2011"
-    expected_consistent = DatetimeIndex(
+    expected = DatetimeIndex(
         ["2014-12-31", "2011-03-10"], dtype="datetime64[ns]", freq=None, name="date"
     )
-    expected_inconsistent = DatetimeIndex(
-        ["2014-12-31", "2011-10-03"], dtype="datetime64[ns]", freq=None, name="date"
+    warning_msg = (
+        "Parsing dates in .* format when dayfirst=.* was specified. "
+        "Pass `dayfirst=.*` or specify a format to silence this warning."
     )
 
     # A. dayfirst arg correct, no warning
     res1 = read_csv(
         StringIO(input), parse_dates=["date"], dayfirst=True, index_col="date"
     ).index
-    tm.assert_index_equal(expected_consistent, res1)
+    tm.assert_index_equal(expected, res1)
 
-    # B. dayfirst arg incorrect, warning + incorrect output
-    with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
+    # B. dayfirst arg incorrect, warning
+    with tm.assert_produces_warning(UserWarning, match=warning_msg):
         res2 = read_csv(
             StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date"
         ).index
-    tm.assert_index_equal(expected_inconsistent, res2)
-
-    # C. dayfirst default arg, same as B
-    with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
-        res3 = read_csv(
-            StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date"
-        ).index
-    tm.assert_index_equal(expected_inconsistent, res3)
-
-    # D. infer_datetime_format=True overrides dayfirst default
-    # no warning + correct result
-    res4 = read_csv(
-        StringIO(input),
-        parse_dates=["date"],
-        infer_datetime_format=True,
-        index_col="date",
-    ).index
-    tm.assert_index_equal(expected_consistent, res4)
+    tm.assert_index_equal(expected, res2)
 
     # CASE 2: invalid input
     # cannot consistently process with single format
-    # warnings *always* raised
+    # return to user unaltered
 
     # first in DD/MM/YYYY, second in MM/DD/YYYY
     input = "date\n31/12/2014\n03/30/2011"
-    expected = DatetimeIndex(
-        ["2014-12-31", "2011-03-30"], dtype="datetime64[ns]", freq=None, name="date"
-    )
+    expected = Index(["31/12/2014", "03/30/2011"], dtype="object", name="date")
 
     # A. use dayfirst=True
-    with tm.assert_produces_warning(UserWarning, match=warning_msg_month_first):
-        res5 = read_csv(
-            StringIO(input), parse_dates=["date"], dayfirst=True, index_col="date"
-        ).index
+    res5 = read_csv(
+        StringIO(input), parse_dates=["date"], dayfirst=True, index_col="date"
+    ).index
     tm.assert_index_equal(expected, res5)
 
     # B. use dayfirst=False
-    with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
+    with tm.assert_produces_warning(UserWarning, match=warning_msg):
         res6 = read_csv(
             StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date"
         ).index
     tm.assert_index_equal(expected, res6)
 
-    # C. use dayfirst default arg, same as B
-    with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
-        res7 = read_csv(
-            StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date"
-        ).index
-    tm.assert_index_equal(expected, res7)
-
-    # D. use infer_datetime_format=True
-    with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
-        res8 = read_csv(
-            StringIO(input),
-            parse_dates=["date"],
-            infer_datetime_format=True,
-            index_col="date",
-        ).index
-    tm.assert_index_equal(expected, res8)
-
 
 @pytest.mark.parametrize(
     "date_string, dayfirst",
@@ -1973,9 +1928,11 @@ def test_dayfirst_warnings_no_leading_zero(date_string, dayfirst):
     expected = DatetimeIndex(
         ["2014-01-31"], dtype="datetime64[ns]", freq=None, name="date"
     )
-    with tm.assert_produces_warning(
-        UserWarning, match=r"may lead to inconsistently parsed dates"
-    ):
+    warning_msg = (
+        "Parsing dates in .* format when dayfirst=.* was specified. "
+        "Pass `dayfirst=.*` or specify a format to silence this warning."
+    )
+    with tm.assert_produces_warning(UserWarning, match=warning_msg):
         res = read_csv(
             StringIO(initial_value),
             parse_dates=["date"],
diff --git a/pandas/tests/io/parser/usecols/test_parse_dates.py b/pandas/tests/io/parser/usecols/test_parse_dates.py
index 50000dab8a7aa..6d40435a4107e 100644
--- a/pandas/tests/io/parser/usecols/test_parse_dates.py
+++ b/pandas/tests/io/parser/usecols/test_parse_dates.py
@@ -31,8 +31,8 @@
 def test_usecols_with_parse_dates(all_parsers, usecols):
     # see gh-9755
     data = """a,b,c,d,e
-0,1,20140101,0900,4
-0,1,20140102,1000,4"""
+0,1,2014-01-01,09:00,4
+0,1,2014-01-02,10:00,4"""
     parser = all_parsers
     parse_dates = [[1, 2]]
 
@@ -138,8 +138,8 @@ def test_usecols_with_parse_dates4(all_parsers):
 )
 def test_usecols_with_parse_dates_and_names(all_parsers, usecols, names):
     # see gh-9755
-    s = """0,1,20140101,0900,4
-0,1,20140102,1000,4"""
+    s = """0,1,2014-01-01,09:00,4
+0,1,2014-01-02,10:00,4"""
     parse_dates = [[1, 2]]
     parser = all_parsers
 
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 9adada8afb2c2..129d6f89fd019 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -1386,7 +1386,7 @@ def test_sqlalchemy_type_mapping(self):
 
         # Test Timestamp objects (no datetime64 because of timezone) (GH9085)
         df = DataFrame(
-            {"time": to_datetime(["201412120154", "201412110254"], utc=True)}
+            {"time": to_datetime(["2014-12-12 01:54", "2014-12-11 02:54"], utc=True)}
         )
         db = sql.SQLDatabase(self.conn)
         table = sql.SQLTable("test_type", db, frame=df)
@@ -1595,7 +1595,7 @@ def test_sqlite_type_mapping(self):
 
         # Test Timestamp objects (no datetime64 because of timezone) (GH9085)
         df = DataFrame(
-            {"time": to_datetime(["201412120154", "201412110254"], utc=True)}
+            {"time": to_datetime(["2014-12-12 01:54", "2014-12-11 02:54"], utc=True)}
         )
         db = sql.SQLiteDatabase(self.conn)
         table = sql.SQLiteTable("test_type", db, frame=df)
diff --git a/pandas/tests/io/xml/test_xml_dtypes.py b/pandas/tests/io/xml/test_xml_dtypes.py
index 5629830767c3c..7b2ffbc7cda5e 100644
--- a/pandas/tests/io/xml/test_xml_dtypes.py
+++ b/pandas/tests/io/xml/test_xml_dtypes.py
@@ -457,7 +457,7 @@ def test_day_first_parse_dates(parser):
     )
 
     with tm.assert_produces_warning(
-        UserWarning, match="Parsing dates in DD/MM/YYYY format"
+        UserWarning, match="Parsing dates in %d/%m/%Y format"
     ):
         df_result = read_xml(xml, parse_dates=["date"], parser=parser)
         df_iter = read_xml_iterparse(
diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py
index 9a6fed1afad1f..87d5aaf0c3205 100644
--- a/pandas/tests/plotting/test_converter.py
+++ b/pandas/tests/plotting/test_converter.py
@@ -161,8 +161,8 @@ def dtc(self):
         return converter.DatetimeConverter()
 
     def test_convert_accepts_unicode(self, dtc):
-        r1 = dtc.convert("12:22", None, None)
-        r2 = dtc.convert("12:22", None, None)
+        r1 = dtc.convert("2000-01-01 12:22", None, None)
+        r2 = dtc.convert("2000-01-01 12:22", None, None)
         assert r1 == r2, "DatetimeConverter.convert should accept unicode"
 
     def test_conversion(self, dtc):
diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py
index 28519fc9b529f..7827483644634 100644
--- a/pandas/tests/series/methods/test_to_csv.py
+++ b/pandas/tests/series/methods/test_to_csv.py
@@ -13,7 +13,7 @@
 
 class TestSeriesToCSV:
     def read_csv(self, path, **kwargs):
-        params = {"index_col": 0, "header": None, "parse_dates": True}
+        params = {"index_col": 0, "header": None}
         params.update(**kwargs)
 
         header = params.get("header")
@@ -30,7 +30,7 @@ def test_from_csv(self, datetime_series, string_series):
 
         with tm.ensure_clean() as path:
             datetime_series.to_csv(path, header=False)
-            ts = self.read_csv(path)
+            ts = self.read_csv(path, parse_dates=True)
             tm.assert_series_equal(datetime_series, ts, check_names=False)
 
             assert ts.name is None
@@ -55,7 +55,7 @@ def test_from_csv(self, datetime_series, string_series):
             with open(path, "w") as outfile:
                 outfile.write("1998-01-01|1.0\n1999-01-01|2.0")
 
-            series = self.read_csv(path, sep="|")
+            series = self.read_csv(path, sep="|", parse_dates=True)
             check_series = Series(
                 {datetime(1998, 1, 1): 1.0, datetime(1999, 1, 1): 2.0}
             )
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index f524bc18793d8..286036440073f 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -219,7 +219,6 @@ def test_to_datetime_format_YYYYMMDD_overflow(self, input_s, expected):
             ),
             (["201010", pd.NA], "%y%m%d", DatetimeIndex(["2020-10-10", "NaT"])),
             (["201010", pd.NA], "%d%m%y", DatetimeIndex(["2010-10-20", "NaT"])),
-            (["201010", pd.NA], None, DatetimeIndex(["2010-10-20", "NaT"])),
             ([None, np.nan, pd.NA], None, DatetimeIndex(["NaT", "NaT", "NaT"])),
             ([None, np.nan, pd.NA], "%Y%m%d", DatetimeIndex(["NaT", "NaT", "NaT"])),
         ],
@@ -463,14 +462,14 @@ def test_to_datetime_parse_timezone_keeps_name(self):
 class TestToDatetime:
     def test_to_datetime_mixed_datetime_and_string(self):
         # GH#47018 adapted old doctest with new behavior
-        d1 = datetime(2020, 1, 1, 17, tzinfo=timezone(-timedelta(hours=1)))
         d2 = datetime(2020, 1, 1, 18, tzinfo=timezone(-timedelta(hours=1)))
-        res = to_datetime(["2020-01-01 17:00 -0100", d2])
-        expected = to_datetime([d1, d2]).tz_convert(pytz.FixedOffset(-60))
-        tm.assert_index_equal(res, expected)
+        with pytest.raises(
+            ValueError,
+            match=r"time data '.*' does not match format '%Y-%m-%d %H:%M %z' \(match\)",
+        ):
+            to_datetime(["2020-01-01 17:00 -0100", d2])
 
-    @pytest.mark.parametrize("infer_datetime_format", [True, False])
-    def test_to_datetime_np_str(self, infer_datetime_format):
+    def test_to_datetime_np_str(self):
         # GH#32264
         # GH#48969
         value = np.str_("2019-02-04 10:18:46.297000+0000")
@@ -482,11 +481,11 @@ def test_to_datetime_np_str(self, infer_datetime_format):
         assert to_datetime(value) == exp
         assert to_datetime(ser.iloc[0]) == exp
 
-        res = to_datetime([value], infer_datetime_format=infer_datetime_format)
+        res = to_datetime([value])
         expected = Index([exp])
         tm.assert_index_equal(res, expected)
 
-        res = to_datetime(ser, infer_datetime_format=infer_datetime_format)
+        res = to_datetime(ser)
         expected = Series(expected)
         tm.assert_series_equal(res, expected)
 
@@ -927,7 +926,10 @@ def test_datetime_bool_arrays_mixed(self, cache):
         msg = f"{type(cache)} is not convertible to datetime"
         with pytest.raises(TypeError, match=msg):
             to_datetime([False, datetime.today()], cache=cache)
-        with pytest.raises(TypeError, match=msg):
+        with pytest.raises(
+            ValueError,
+            match=r"^time data 'True' does not match format '%Y%m%d' \(match\)$",
+        ):
             to_datetime(["20130101", True], cache=cache)
         tm.assert_index_equal(
             to_datetime([0, False, NaT, 0.0], errors="coerce", cache=cache),
@@ -1071,8 +1073,7 @@ def test_to_datetime_cache_scalar(self):
                 (None,)
                 + (NaT,) * start_caching_at
                 + ("2012 July 26", Timestamp("2012-07-26")),
-                (NaT,) * (start_caching_at + 1)
-                + (Timestamp("2012-07-26"), Timestamp("2012-07-26")),
+                (NaT,) * (start_caching_at + 1) + (Timestamp("2012-07-26"), NaT),
             ),
         ),
     )
@@ -1153,7 +1154,6 @@ def test_to_datetime_coerce(self):
         )
         tm.assert_index_equal(result, expected)
 
-    @pytest.mark.parametrize("infer_datetime_format", [True, False])
     @pytest.mark.parametrize(
         "errors, expected",
         [
@@ -1224,15 +1224,18 @@ def test_iso_8601_strings_with_different_offsets_utc(self):
 
     def test_iso8601_strings_mixed_offsets_with_naive(self):
         # GH 24992
-        result = to_datetime(
+        # Can't parse consistently, need to parse each element in loop.
+        result = DatetimeIndex(
             [
-                "2018-11-28T00:00:00",
-                "2018-11-28T00:00:00+12:00",
-                "2018-11-28T00:00:00",
-                "2018-11-28T00:00:00+06:00",
-                "2018-11-28T00:00:00",
-            ],
-            utc=True,
+                to_datetime(string, utc=True)
+                for string in [
+                    "2018-11-28T00:00:00",
+                    "2018-11-28T00:00:00+12:00",
+                    "2018-11-28T00:00:00",
+                    "2018-11-28T00:00:00+06:00",
+                    "2018-11-28T00:00:00",
+                ]
+            ]
         )
         expected = to_datetime(
             [
@@ -1248,9 +1251,10 @@ def test_iso8601_strings_mixed_offsets_with_naive(self):
 
     def test_iso8601_strings_mixed_offsets_with_naive_reversed(self):
         items = ["2018-11-28T00:00:00+12:00", "2018-11-28T00:00:00"]
-        result = to_datetime(items, utc=True)
-        expected = to_datetime(list(reversed(items)), utc=True)[::-1]
-        tm.assert_index_equal(result, expected)
+        # Can't parse consistently, need to parse each element in loop.
+        result = [to_datetime(item, utc=True) for item in items]
+        expected = [to_datetime(item, utc=True) for item in list(reversed(items))][::-1]
+        assert result == expected
 
     def test_mixed_offsets_with_native_datetime_raises(self):
         # GH 25978
@@ -1778,7 +1782,7 @@ def test_to_datetime_on_datetime64_series(self, cache):
     def test_to_datetime_with_space_in_series(self, cache):
         # GH 6428
         ser = Series(["10/18/2006", "10/18/2008", " "])
-        msg = r"(\(')?String does not contain a date(:', ' '\))?"
+        msg = r"^time data ' ' does not match format '%m/%d/%Y' \(match\)$"
         with pytest.raises(ValueError, match=msg):
             to_datetime(ser, errors="raise", cache=cache)
         result_coerce = to_datetime(ser, errors="coerce", cache=cache)
@@ -1838,7 +1842,7 @@ def test_to_datetime_strings(self, cache):
 
     def test_to_datetime_strings_variation(self, cache):
         array = ["2012", "20120101", "20120101 12:01:01"]
-        expected = list(to_datetime(array, cache=cache))
+        expected = [to_datetime(dt_str, cache=cache) for dt_str in array]
         result = [Timestamp(date_str) for date_str in array]
         tm.assert_almost_equal(result, expected)
 
@@ -1908,7 +1912,10 @@ def test_string_na_nat_conversion(self, cache):
         result = tslib.array_to_datetime(strings)[0]
         tm.assert_almost_equal(result, expected)
 
-        result2 = to_datetime(strings, cache=cache)
+        # Can't parse in consistent format, so need to convert each individually.
+        result2 = DatetimeIndex(
+            [to_datetime(string, cache=cache) for string in strings]
+        )
         assert isinstance(result2, DatetimeIndex)
         tm.assert_numpy_array_equal(result, result2.values)
 
@@ -2011,80 +2018,39 @@ def test_dayfirst(self, cache):
 
     def test_dayfirst_warnings_valid_input(self):
         # GH 12585
-        warning_msg_day_first = (
-            r"Parsing dates in DD/MM/YYYY format when dayfirst=False \(the default\) "
-            "was specified. This may lead to inconsistently parsed dates! Specify a "
-            "format to ensure consistent parsing."
+        warning_msg = (
+            "Parsing dates in .* format when dayfirst=.* was specified. "
+            "Pass `dayfirst=.*` or specify a format to silence this warning."
         )
 
         # CASE 1: valid input
         arr = ["31/12/2014", "10/03/2011"]
-        expected_consistent = DatetimeIndex(
+        expected = DatetimeIndex(
             ["2014-12-31", "2011-03-10"], dtype="datetime64[ns]", freq=None
         )
-        expected_inconsistent = DatetimeIndex(
-            ["2014-12-31", "2011-10-03"], dtype="datetime64[ns]", freq=None
-        )
 
         # A. dayfirst arg correct, no warning
         res1 = to_datetime(arr, dayfirst=True)
-        tm.assert_index_equal(expected_consistent, res1)
+        tm.assert_index_equal(expected, res1)
 
-        # B. dayfirst arg incorrect, warning + incorrect output
-        with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
+        # B. dayfirst arg incorrect, warning
+        with tm.assert_produces_warning(UserWarning, match=warning_msg):
             res2 = to_datetime(arr, dayfirst=False)
-        tm.assert_index_equal(expected_inconsistent, res2)
-
-        # C. dayfirst default arg, same as B
-        with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
-            res3 = to_datetime(arr, dayfirst=False)
-        tm.assert_index_equal(expected_inconsistent, res3)
-
-        # D. infer_datetime_format=True overrides dayfirst default
-        # no warning + correct result
-        res4 = to_datetime(arr, infer_datetime_format=True)
-        tm.assert_index_equal(expected_consistent, res4)
+        tm.assert_index_equal(expected, res2)
 
     def test_dayfirst_warnings_invalid_input(self):
         # CASE 2: invalid input
         # cannot consistently process with single format
-        # warnings *always* raised
-        warning_msg_day_first = (
-            r"Parsing dates in DD/MM/YYYY format when dayfirst=False \(the default\) "
-            "was specified. This may lead to inconsistently parsed dates! Specify a "
-            "format to ensure consistent parsing."
-        )
-        warning_msg_month_first = (
-            r"Parsing dates in MM/DD/YYYY format when dayfirst=True "
-            "was specified. This may lead to inconsistently parsed dates! Specify a "
-            "format to ensure consistent parsing."
-        )
+        # ValueError *always* raised
 
-        arr = ["31/12/2014", "03/30/2011"]
         # first in DD/MM/YYYY, second in MM/DD/YYYY
-        expected = DatetimeIndex(
-            ["2014-12-31", "2011-03-30"], dtype="datetime64[ns]", freq=None
-        )
-
-        # A. use dayfirst=True
-        with tm.assert_produces_warning(UserWarning, match=warning_msg_month_first):
-            res5 = to_datetime(arr, dayfirst=True)
-        tm.assert_index_equal(expected, res5)
-
-        # B. use dayfirst=False
-        with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
-            res6 = to_datetime(arr, dayfirst=False)
-        tm.assert_index_equal(expected, res6)
-
-        # C. use dayfirst default arg, same as B
-        with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
-            res7 = to_datetime(arr, dayfirst=False)
-        tm.assert_index_equal(expected, res7)
+        arr = ["31/12/2014", "03/30/2011"]
 
-        # D. use infer_datetime_format=True
-        with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
-            res8 = to_datetime(arr, infer_datetime_format=True)
-        tm.assert_index_equal(expected, res8)
+        with pytest.raises(
+            ValueError,
+            match=r"time data '03/30/2011' does not match format '%d/%m/%Y' \(match\)$",
+        ):
+            to_datetime(arr, dayfirst=True)
 
     @pytest.mark.parametrize("klass", [DatetimeIndex, DatetimeArray])
     def test_to_datetime_dta_tz(self, klass):
@@ -2139,12 +2105,8 @@ def test_to_datetime_infer_datetime_format_consistent_format(
         s_as_dt_strings = ser.apply(lambda x: x.strftime(test_format))
 
         with_format = to_datetime(s_as_dt_strings, format=test_format, cache=cache)
-        no_infer = to_datetime(
-            s_as_dt_strings, infer_datetime_format=False, cache=cache
-        )
-        yes_infer = to_datetime(
-            s_as_dt_strings, infer_datetime_format=True, cache=cache
-        )
+        no_infer = to_datetime(s_as_dt_strings, cache=cache)
+        yes_infer = to_datetime(s_as_dt_strings, cache=cache)
 
         # Whether the format is explicitly passed, it is inferred, or
         # it is not inferred, the results should all be the same
@@ -2223,7 +2185,7 @@ def test_infer_datetime_format_tz_name(self, tz_name, offset):
     def test_infer_datetime_format_zero_tz(self, ts, zero_tz):
         # GH 41047
         ser = Series([ts + zero_tz])
-        result = to_datetime(ser, infer_datetime_format=True)
+        result = to_datetime(ser)
         tz = pytz.utc if zero_tz == "Z" else None
         expected = Series([Timestamp(ts, tz=tz)])
         tm.assert_series_equal(result, expected)
@@ -2782,9 +2744,9 @@ def test_empty_string_datetime_coerce_format():
     with pytest.raises(ValueError, match="does not match format"):
         to_datetime(td, format=format, errors="raise")
 
-    # don't raise an exception in case no format is given
-    result = to_datetime(td, errors="raise")
-    tm.assert_series_equal(result, expected)
+    # still raise an exception in case no format is given
+    with pytest.raises(ValueError, match="does not match format"):
+        to_datetime(td, errors="raise")
 
 
 def test_empty_string_datetime_coerce__unit():
diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py
index a4e12315d34e0..49d83a8fa5c56 100644
--- a/pandas/tests/tslibs/test_parsing.py
+++ b/pandas/tests/tslibs/test_parsing.py
@@ -235,19 +235,19 @@ def test_guess_datetime_format_wrong_type_inputs(invalid_type_dt):
 
 
 @pytest.mark.parametrize(
-    "string,fmt",
+    "string,fmt,dayfirst",
     [
-        ("2011-1-1", "%Y-%m-%d"),
-        ("1/1/2011", "%m/%d/%Y"),
-        ("30-1-2011", "%d-%m-%Y"),
-        ("2011-1-1 0:0:0", "%Y-%m-%d %H:%M:%S"),
-        ("2011-1-3T00:00:0", "%Y-%m-%dT%H:%M:%S"),
-        ("2011-1-1 00:00:00", "%Y-%m-%d %H:%M:%S"),
+        ("2011-1-1", "%Y-%m-%d", False),
+        ("1/1/2011", "%m/%d/%Y", False),
+        ("30-1-2011", "%d-%m-%Y", True),
+        ("2011-1-1 0:0:0", "%Y-%m-%d %H:%M:%S", False),
+        ("2011-1-3T00:00:0", "%Y-%m-%dT%H:%M:%S", False),
+        ("2011-1-1 00:00:00", "%Y-%m-%d %H:%M:%S", False),
     ],
 )
-def test_guess_datetime_format_no_padding(string, fmt):
+def test_guess_datetime_format_no_padding(string, fmt, dayfirst):
     # see gh-11142
-    result = parsing.guess_datetime_format(string)
+    result = parsing.guess_datetime_format(string, dayfirst=dayfirst)
     assert result == fmt
 
 

From 4a6f19856f674dfdd3b5cc8248548ab121786801 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 18 Oct 2022 09:16:00 +0100
Subject: [PATCH 04/34] :rotating_light: add warning if format cant be guessed

---
 pandas/core/tools/datetimes.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 5760952ba7324..09729c2aab22c 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -132,7 +132,16 @@ def _guess_datetime_format_for_array(arr, dayfirst: bool | None = False) -> str
     if (first_non_null := tslib.first_non_null(arr)) != -1:
         if type(first_non_nan_element := arr[first_non_null]) is str:
             # GH#32264 np.str_ object
-            return guess_datetime_format(first_non_nan_element, dayfirst=dayfirst)
+            guessed_format = guess_datetime_format(
+                first_non_nan_element, dayfirst=dayfirst
+            )
+            if guessed_format is not None:
+                return guessed_format
+            warnings.warn(
+                "Could not infer format - "
+                "to ensure consistent parsing, specify a format.",
+                stacklevel=find_stack_level(),
+            )
     return None
 
 

From 5568dca44d1f7a5267dcbfdcf843a5222f088258 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 18 Oct 2022 09:22:00 +0100
Subject: [PATCH 05/34] :goal_net: catch warnings

---
 pandas/core/tools/datetimes.py                |   2 +-
 pandas/tests/frame/methods/test_to_csv.py     |   5 +-
 pandas/tests/groupby/test_function.py         |   3 +-
 .../tests/groupby/transform/test_transform.py |   3 +-
 pandas/tests/io/excel/test_readers.py         |  13 +-
 pandas/tests/io/parser/test_parse_dates.py    |  98 ++++-
 .../io/parser/usecols/test_parse_dates.py     |   8 +-
 pandas/tests/test_algos.py                    |   3 +-
 pandas/tests/tools/test_to_datetime.py        | 391 +++++++++---------
 9 files changed, 309 insertions(+), 217 deletions(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 09729c2aab22c..41feb153978d4 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -1002,7 +1002,7 @@ def to_datetime(
       are constant:
 
     >>> from datetime import datetime
-    >>> pd.to_datetime(["2020-01-01 01:00 -01:00", datetime(2020, 1, 1, 3, 0)])
+    >>> pd.to_datetime(["2020-01-01 01:00:00-01:00", datetime(2020, 1, 1, 3, 0)])
     DatetimeIndex(['2020-01-01 01:00:00-01:00', '2020-01-01 02:00:00-01:00'],
                   dtype='datetime64[ns, pytz.FixedOffset(-60)]', freq=None)
 
diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
index 3b4dec8bff7f1..3985bd40daac5 100644
--- a/pandas/tests/frame/methods/test_to_csv.py
+++ b/pandas/tests/frame/methods/test_to_csv.py
@@ -514,7 +514,10 @@ def test_to_csv_multiindex(self, float_frame, datetime_frame):
             tsframe.index = MultiIndex.from_arrays(new_index)
 
             tsframe.to_csv(path, index_label=["time", "foo"])
-            recons = self.read_csv(path, index_col=[0, 1])
+            with tm.assert_produces_warning(
+                UserWarning, match="Could not infer format"
+            ):
+                recons = self.read_csv(path, index_col=[0, 1], parse_dates=True)
 
             # TODO to_csv drops column name
             tm.assert_frame_equal(tsframe, recons, check_names=False)
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index cdbb121819c5e..ed63d41a74ae6 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -717,7 +717,8 @@ def test_max_nan_bug():
 -05-06,2013-05-06 00:00:00,,log.log
 -05-07,2013-05-07 00:00:00,OE,xlsx"""
 
-    df = pd.read_csv(StringIO(raw), parse_dates=[0])
+    with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
+        df = pd.read_csv(StringIO(raw), parse_dates=[0])
     gb = df.groupby("Date")
     r = gb[["File"]].max()
     e = gb["File"].max().to_frame()
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index 8a2bd64a3deb0..d52de4d0658ef 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -1070,7 +1070,8 @@ def demean_rename(x):
 @pytest.mark.parametrize("func", [min, max, np.min, np.max, "first", "last"])
 def test_groupby_transform_timezone_column(func):
     # GH 24198
-    ts = pd.to_datetime("now", utc=True).tz_convert("Asia/Singapore")
+    with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
+        ts = pd.to_datetime("now", utc=True).tz_convert("Asia/Singapore")
     result = DataFrame({"end_time": [ts], "id": [1]})
     result["max_end_time"] = result.groupby("id").end_time.transform(func)
     expected = DataFrame([[ts, 1, ts]], columns=["end_time", "id", "max_end_time"])
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index fa1d6bbfd5a7e..8f937ad6b401a 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -888,11 +888,18 @@ def test_reader_seconds(self, request, engine, read_ext):
                 ]
             }
         )
-
-        actual = pd.read_excel("times_1900" + read_ext, sheet_name="Sheet1")
+        if engine == "odf":
+            # odf recognises cell type as time (from its attribute)
+            # so tries to parse it.
+            warning = UserWarning
+        else:
+            warning = None
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            actual = pd.read_excel("times_1900" + read_ext, sheet_name="Sheet1")
         tm.assert_frame_equal(actual, expected)
 
-        actual = pd.read_excel("times_1904" + read_ext, sheet_name="Sheet1")
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            actual = pd.read_excel("times_1904" + read_ext, sheet_name="Sheet1")
         tm.assert_frame_equal(actual, expected)
 
     def test_read_excel_multiindex(self, request, read_ext):
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
index b8d515a67b7fe..c3feb03936686 100644
--- a/pandas/tests/io/parser/test_parse_dates.py
+++ b/pandas/tests/io/parser/test_parse_dates.py
@@ -826,7 +826,13 @@ def test_yy_format_with_year_first(all_parsers, parse_dates):
 090331,0830,5,6
 """
     parser = all_parsers
-    result = parser.read_csv(StringIO(data), index_col=0, parse_dates=parse_dates)
+    result = parser.read_csv_check_warnings(
+        UserWarning,
+        "Could not infer format",
+        StringIO(data),
+        index_col=0,
+        parse_dates=parse_dates,
+    )
     index = DatetimeIndex(
         [
             datetime(2009, 1, 31, 0, 10, 0),
@@ -899,7 +905,13 @@ def test_multi_index_parse_dates(all_parsers, index_col):
         columns=["A", "B", "C"],
         index=index,
     )
-    result = parser.read_csv(StringIO(data), index_col=index_col, parse_dates=True)
+    result = parser.read_csv_check_warnings(
+        UserWarning,
+        "Could not infer format",
+        StringIO(data),
+        index_col=index_col,
+        parse_dates=True,
+    )
     tm.assert_frame_equal(result, expected)
 
 
@@ -1232,19 +1244,55 @@ def test_read_with_parse_dates_invalid_type(all_parsers, parse_dates):
 
 
 @pytest.mark.parametrize("cache_dates", [True, False])
-@pytest.mark.parametrize("value", ["nan", "0", ""])
+@pytest.mark.parametrize("value", ["nan", ""])
 def test_bad_date_parse(all_parsers, cache_dates, value):
     # if we have an invalid date make sure that we handle this with
     # and w/o the cache properly
     parser = all_parsers
     s = StringIO((f"{value},\n") * 50000)
 
-    parser.read_csv(
+    if parser.engine == "pyarrow":
+        # None in input gets converted to 'None', for which
+        # pandas tries to guess the datetime format, triggering
+        # the warning. TODO: parse dates directly in pyarrow, see
+        # https://github.com/pandas-dev/pandas/issues/48017
+        warn = UserWarning
+    else:
+        warn = None
+    parser.read_csv_check_warnings(
+        warn,
+        "Could not infer format",
+        s,
+        header=None,
+        names=["foo", "bar"],
+        parse_dates=["foo"],
+        cache_dates=cache_dates,
+    )
+
+
+@pytest.mark.parametrize("cache_dates", [True, False])
+@pytest.mark.parametrize("value", ["0"])
+def test_bad_date_parse_with_warning(all_parsers, cache_dates, value):
+    # if we have an invalid date make sure that we handle this with
+    # and w/o the cache properly.
+    parser = all_parsers
+    s = StringIO((f"{value},\n") * 50000)
+
+    if parser.engine == "pyarrow":
+        # pyarrow reads "0" as 0 (of type int64), and so
+        # pandas doesn't try to guess the datetime format
+        # TODO: parse dates directly in pyarrow, see
+        # https://github.com/pandas-dev/pandas/issues/48017
+        warn = None
+    else:
+        warn = UserWarning
+    parser.read_csv_check_warnings(
+        warn,
+        "Could not infer format",
         s,
         header=None,
         names=["foo", "bar"],
         parse_dates=["foo"],
-        infer_datetime_format=False,
         cache_dates=cache_dates,
     )
 
@@ -1262,6 +1310,19 @@ def test_parse_dates_empty_string(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+def test_parse_dates_infer_datetime_format_warning(all_parsers):
+    # GH 49024
+    parser = all_parsers
+    data = "Date,test\n2012-01-01,1\n,2"
+    parser.read_csv_check_warnings(
+        UserWarning,
+        "The argument 'infer_datetime_format' is deprecated",
+        StringIO(data),
+        parse_dates=["Date"],
+        infer_datetime_format=True,
+    )
+
+
 @xfail_pyarrow
 @pytest.mark.parametrize(
     "data,kwargs,expected",
@@ -1635,7 +1696,13 @@ def test_parse_timezone(all_parsers):
 def test_invalid_parse_delimited_date(all_parsers, date_string):
     parser = all_parsers
     expected = DataFrame({0: [date_string]}, dtype="object")
-    result = parser.read_csv(StringIO(date_string), header=None, parse_dates=[0])
+    result = parser.read_csv_check_warnings(
+        UserWarning,
+        "Could not infer format",
+        StringIO(date_string),
+        header=None,
+        parse_dates=[0],
+    )
     tm.assert_frame_equal(result, expected)
 
 
@@ -1786,7 +1853,13 @@ def test_date_parser_and_names(all_parsers):
     # GH#33699
     parser = all_parsers
     data = StringIO("""x,y\n1,2""")
-    result = parser.read_csv(data, parse_dates=["B"], names=["B"])
+    result = parser.read_csv_check_warnings(
+        UserWarning,
+        "Could not infer format",
+        data,
+        parse_dates=["B"],
+        names=["B"],
+    )
     expected = DataFrame({"B": ["y", "2"]}, index=["x", "1"])
     tm.assert_frame_equal(result, expected)
 
@@ -1833,7 +1906,9 @@ def test_date_parser_usecols_thousands(all_parsers):
     """
 
     parser = all_parsers
-    result = parser.read_csv(
+    result = parser.read_csv_check_warnings(
+        UserWarning,
+        "Could not infer format",
         StringIO(data),
         parse_dates=[1],
         usecols=[1, 2],
@@ -1947,7 +2022,12 @@ def test_infer_first_column_as_index(all_parsers):
     # GH#11019
     parser = all_parsers
     data = "a,b,c\n1970-01-01,2,3,4"
-    result = parser.read_csv(StringIO(data), parse_dates=["a"])
+    result = parser.read_csv_check_warnings(
+        UserWarning,
+        "Could not infer format",
+        StringIO(data),
+        parse_dates=["a"],
+    )
     expected = DataFrame({"a": "2", "b": 3, "c": 4}, index=["1970-01-01"])
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/io/parser/usecols/test_parse_dates.py b/pandas/tests/io/parser/usecols/test_parse_dates.py
index 6d40435a4107e..4823df1da9959 100644
--- a/pandas/tests/io/parser/usecols/test_parse_dates.py
+++ b/pandas/tests/io/parser/usecols/test_parse_dates.py
@@ -124,7 +124,13 @@ def test_usecols_with_parse_dates4(all_parsers):
     }
     expected = DataFrame(cols, columns=["a_b"] + list("cdefghij"))
 
-    result = parser.read_csv(StringIO(data), usecols=usecols, parse_dates=parse_dates)
+    result = parser.read_csv_check_warnings(
+        UserWarning,
+        "Could not infer format",
+        StringIO(data),
+        usecols=usecols,
+        parse_dates=parse_dates,
+    )
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 80271c13cd35d..b3f0f40be2d78 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -1212,7 +1212,8 @@ def test_value_counts_datetime_outofbounds(self):
         tm.assert_series_equal(res, exp)
 
         # GH 12424
-        res = to_datetime(Series(["2362-01-01", np.nan]), errors="ignore")
+        with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
+            res = to_datetime(Series(["2362-01-01", np.nan]), errors="ignore")
         exp = Series(["2362-01-01", np.nan], dtype=object)
         tm.assert_series_equal(res, exp)
 
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index 286036440073f..a2871e79dc7d9 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -228,6 +228,13 @@ def test_to_datetime_with_NA(self, data, format, expected):
         result = to_datetime(data, format=format)
         tm.assert_index_equal(result, expected)
 
+    def test_to_datetime_with_NA_with_warning(self):
+        # GH#42957
+        with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
+            result = to_datetime(["201010", pd.NA])
+        expected = DatetimeIndex(["2010-10-20", "NaT"])
+        tm.assert_index_equal(result, expected)
+
     def test_to_datetime_format_integer(self, cache):
         # GH 10178
         ser = Series([2000, 2001, 2002])
@@ -345,7 +352,6 @@ def test_to_datetime_with_non_exact(self, cache):
         ],
     )
     def test_parse_nanoseconds_with_formula(self, cache, arg):
-
         # GH8989
         # truncating the nanoseconds when a format was provided
         expected = to_datetime(arg, cache=cache)
@@ -619,15 +625,16 @@ def test_to_datetime_YYYYMMDD(self):
     def test_to_datetime_unparsable_ignore(self):
         # unparsable
         ser = "Month 1, 1999"
-        assert to_datetime(ser, errors="ignore") == ser
+        with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
+            assert to_datetime(ser, errors="ignore") == ser
 
     @td.skip_if_windows  # `tm.set_timezone` does not work in windows
     def test_to_datetime_now(self):
         # See GH#18666
         with tm.set_timezone("US/Eastern"):
-            msg = "The parsing of 'now' in pd.to_datetime"
+            msg = "The parsing of 'now' in pd.to_datetime|Could not infer format"
             with tm.assert_produces_warning(
-                FutureWarning, match=msg, check_stacklevel=False
+                (FutureWarning, UserWarning), match=msg, check_stacklevel=False
             ):
                 # checking stacklevel is tricky because we go through cython code
                 # GH#18705
@@ -654,8 +661,11 @@ def test_to_datetime_today(self, tz):
         # so this test will not detect the regression introduced in #18666.
         with tm.set_timezone(tz):
             nptoday = np.datetime64("today").astype("datetime64[ns]").astype(np.int64)
-            pdtoday = to_datetime("today")
-            pdtoday2 = to_datetime(["today"])[0]
+            with tm.assert_produces_warning(
+                UserWarning, match="Could not infer format"
+            ):
+                pdtoday = to_datetime("today")
+                pdtoday2 = to_datetime(["today"])[0]
 
             tstoday = Timestamp("today")
             tstoday2 = Timestamp.today()
@@ -672,8 +682,8 @@ def test_to_datetime_today(self, tz):
 
     @pytest.mark.parametrize("arg", ["now", "today"])
     def test_to_datetime_today_now_unicode_bytes(self, arg):
-        warn = FutureWarning if arg == "now" else None
-        msg = "The parsing of 'now' in pd.to_datetime"
+        warn = (FutureWarning, UserWarning) if arg == "now" else UserWarning
+        msg = "The parsing of 'now' in pd.to_datetime|Could not infer format"
         with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
             # checking stacklevel is tricky because we go through cython code
             # GH#18705
@@ -946,18 +956,17 @@ def test_datetime_invalid_datatype(self, arg):
             to_datetime(arg)
 
     @pytest.mark.parametrize("value", ["a", "00:01:99"])
-    @pytest.mark.parametrize("infer", [True, False])
-    @pytest.mark.parametrize("format", [None, "H%:M%:S%"])
-    def test_datetime_invalid_scalar(self, value, format, infer):
+    @pytest.mark.parametrize(
+        "format,warning", [(None, UserWarning), ("H%:M%:S%", None)]
+    )
+    def test_datetime_invalid_scalar(self, value, format, warning):
         # GH24763
-        res = to_datetime(
-            value, errors="ignore", format=format, infer_datetime_format=infer
-        )
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            res = to_datetime(value, errors="ignore", format=format)
         assert res == value
 
-        res = to_datetime(
-            value, errors="coerce", format=format, infer_datetime_format=infer
-        )
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            res = to_datetime(value, errors="coerce", format=format)
         assert res is NaT
 
         msg = (
@@ -966,51 +975,46 @@ def test_datetime_invalid_scalar(self, value, format, infer):
             f"Given date string {value} not likely a datetime"
         )
         with pytest.raises(ValueError, match=msg):
-            to_datetime(
-                value, errors="raise", format=format, infer_datetime_format=infer
-            )
+            with tm.assert_produces_warning(warning, match="Could not infer format"):
+                to_datetime(value, errors="raise", format=format)
 
     @pytest.mark.parametrize("value", ["3000/12/11 00:00:00"])
-    @pytest.mark.parametrize("infer", [True, False])
-    @pytest.mark.parametrize("format", [None, "H%:M%:S%"])
-    def test_datetime_outofbounds_scalar(self, value, format, infer):
+    @pytest.mark.parametrize(
+        "format,warning", [(None, UserWarning), ("H%:M%:S%", None)]
+    )
+    def test_datetime_outofbounds_scalar(self, value, format, warning):
         # GH24763
-        res = to_datetime(
-            value, errors="ignore", format=format, infer_datetime_format=infer
-        )
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            res = to_datetime(value, errors="ignore", format=format)
         assert res == value
 
-        res = to_datetime(
-            value, errors="coerce", format=format, infer_datetime_format=infer
-        )
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            res = to_datetime(value, errors="coerce", format=format)
         assert res is NaT
 
         if format is not None:
             msg = "is a bad directive in format|Out of bounds .* present at position 0"
             with pytest.raises(ValueError, match=msg):
-                to_datetime(
-                    value, errors="raise", format=format, infer_datetime_format=infer
-                )
+                to_datetime(value, errors="raise", format=format)
         else:
             msg = "Out of bounds .* present at position 0"
-            with pytest.raises(OutOfBoundsDatetime, match=msg):
-                to_datetime(
-                    value, errors="raise", format=format, infer_datetime_format=infer
-                )
+            with pytest.raises(
+                OutOfBoundsDatetime, match=msg
+            ), tm.assert_produces_warning(warning, match="Could not infer format"):
+                to_datetime(value, errors="raise", format=format)
 
     @pytest.mark.parametrize("values", [["a"], ["00:01:99"], ["a", "b", "99:00:00"]])
-    @pytest.mark.parametrize("infer", [True, False])
-    @pytest.mark.parametrize("format", [None, "H%:M%:S%"])
-    def test_datetime_invalid_index(self, values, format, infer):
+    @pytest.mark.parametrize(
+        "format,warning", [(None, UserWarning), ("H%:M%:S%", None)]
+    )
+    def test_datetime_invalid_index(self, values, format, warning):
         # GH24763
-        res = to_datetime(
-            values, errors="ignore", format=format, infer_datetime_format=infer
-        )
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            res = to_datetime(values, errors="ignore", format=format)
         tm.assert_index_equal(res, Index(values))
 
-        res = to_datetime(
-            values, errors="coerce", format=format, infer_datetime_format=infer
-        )
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            res = to_datetime(values, errors="coerce", format=format)
         tm.assert_index_equal(res, DatetimeIndex([NaT] * len(values)))
 
         msg = (
@@ -1019,9 +1023,8 @@ def test_datetime_invalid_index(self, values, format, infer):
             "second must be in 0..59"
         )
         with pytest.raises(ValueError, match=msg):
-            to_datetime(
-                values, errors="raise", format=format, infer_datetime_format=infer
-            )
+            with tm.assert_produces_warning(warning, match="Could not infer format"):
+                to_datetime(values, errors="raise", format=format)
 
     @pytest.mark.parametrize("utc", [True, None])
     @pytest.mark.parametrize("format", ["%Y%m%d %H:%M:%S", None])
@@ -1161,28 +1164,28 @@ def test_to_datetime_coerce(self):
             ("ignore", Index(["200622-12-31", "111111-24-11"])),
         ],
     )
-    def test_to_datetime_malformed_no_raise(
-        self, errors, expected, infer_datetime_format
-    ):
+    def test_to_datetime_malformed_no_raise(self, errors, expected):
         # GH 28299
         # GH 48633
         ts_strings = ["200622-12-31", "111111-24-11"]
-        result = to_datetime(
-            ts_strings, errors=errors, infer_datetime_format=infer_datetime_format
-        )
+        with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
+            result = to_datetime(ts_strings, errors=errors)
         tm.assert_index_equal(result, expected)
 
-    @pytest.mark.parametrize("infer_datetime_format", [True, False])
-    def test_to_datetime_malformed_raise(self, infer_datetime_format):
+    def test_to_datetime_malformed_raise(self):
         # GH 48633
         ts_strings = ["200622-12-31", "111111-24-11"]
         with pytest.raises(
             ValueError,
             match=r"^hour must be in 0\.\.23: 111111-24-11 present at position 1$",
         ):
-            to_datetime(
-                ts_strings, errors="raise", infer_datetime_format=infer_datetime_format
-            )
+            with tm.assert_produces_warning(
+                UserWarning, match="Could not infer format"
+            ):
+                to_datetime(
+                    ts_strings,
+                    errors="raise",
+                )
 
     def test_iso_8601_strings_with_same_offset(self):
         # GH 17697, 11736
@@ -1283,7 +1286,10 @@ def test_mixed_offsets_with_native_datetime_raises(self):
         tm.assert_series_equal(mixed, expected)
 
         with pytest.raises(ValueError, match="Tz-aware datetime.datetime"):
-            to_datetime(mixed)
+            with tm.assert_produces_warning(
+                UserWarning, match="Could not infer format"
+            ):
+                to_datetime(mixed)
 
     def test_non_iso_strings_with_tz_offset(self):
         result = to_datetime(["March 1, 2018 12:00:00+0400"] * 2)
@@ -1409,23 +1415,26 @@ def test_unit_with_numeric(self, cache, errors, dtype):
         tm.assert_index_equal(result, expected)
 
     @pytest.mark.parametrize(
-        "exp, arr",
+        "exp, arr, warning",
         [
             [
                 ["NaT", "2015-06-19 05:33:20", "2015-05-27 22:33:20"],
                 ["foo", 1.434692e18, 1.432766e18],
+                UserWarning,
             ],
             [
                 ["2015-06-19 05:33:20", "2015-05-27 22:33:20", "NaT", "NaT"],
                 [1.434692e18, 1.432766e18, "foo", "NaT"],
+                None,
             ],
         ],
     )
-    def test_unit_with_numeric_coerce(self, cache, exp, arr):
+    def test_unit_with_numeric_coerce(self, cache, exp, arr, warning):
         # but we want to make sure that we are coercing
         # if we have ints/strings
         expected = DatetimeIndex(exp)
-        result = to_datetime(arr, errors="coerce", cache=cache)
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            result = to_datetime(arr, errors="coerce", cache=cache)
         tm.assert_index_equal(result, expected)
 
     @pytest.mark.parametrize(
@@ -1741,7 +1750,10 @@ def test_to_datetime_barely_out_of_bounds(self):
 
         msg = "Out of bounds .* present at position 0"
         with pytest.raises(OutOfBoundsDatetime, match=msg):
-            to_datetime(arr)
+            with tm.assert_produces_warning(
+                UserWarning, match="Could not infer format"
+            ):
+                to_datetime(arr)
 
     @pytest.mark.parametrize(
         "arg, exp_str",
@@ -1925,15 +1937,22 @@ def test_string_na_nat_conversion_malformed(self, cache):
         # GH 10636, default is now 'raise'
         msg = r"Unknown string format:|day is out of range for month"
         with pytest.raises(ValueError, match=msg):
-            to_datetime(malformed, errors="raise", cache=cache)
+            with tm.assert_produces_warning(
+                UserWarning, match="Could not infer format"
+            ):
+                to_datetime(malformed, errors="raise", cache=cache)
 
-        result = to_datetime(malformed, errors="ignore", cache=cache)
+        with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
+            result = to_datetime(malformed, errors="ignore", cache=cache)
         # GH 21864
         expected = Index(malformed)
         tm.assert_index_equal(result, expected)
 
         with pytest.raises(ValueError, match=msg):
-            to_datetime(malformed, errors="raise", cache=cache)
+            with tm.assert_produces_warning(
+                UserWarning, match="Could not infer format"
+            ):
+                to_datetime(malformed, errors="raise", cache=cache)
 
     def test_string_na_nat_conversion_with_name(self, cache):
         idx = ["a", "b", "c", "d", "e"]
@@ -2114,60 +2133,14 @@ def test_to_datetime_infer_datetime_format_consistent_format(
         tm.assert_series_equal(no_infer, yes_infer)
 
     @pytest.mark.parametrize(
-        "data",
-        [
-            ["01/01/2011 00:00:00", "01-02-2011 00:00:00", "2011-01-03T00:00:00"],
-            ["Jan/01/2011", "Feb/01/2011", "Mar/01/2011"],
-        ],
+        "tz_name, offset, warning",
+        [("UTC", 0, None), ("UTC-3", 180, UserWarning), ("UTC+3", -180, UserWarning)],
     )
-    def test_to_datetime_infer_datetime_format_inconsistent_format(self, cache, data):
-        ser = Series(np.array(data))
-
-        # When the format is inconsistent, infer_datetime_format should just
-        # fallback to the default parsing
-        tm.assert_series_equal(
-            to_datetime(ser, infer_datetime_format=False, cache=cache),
-            to_datetime(ser, infer_datetime_format=True, cache=cache),
-        )
-
-    def test_to_datetime_infer_datetime_format_series_with_nans(self, cache):
-        ser = Series(
-            np.array(
-                ["01/01/2011 00:00:00", np.nan, "01/03/2011 00:00:00", np.nan],
-                dtype=object,
-            )
-        )
-        tm.assert_series_equal(
-            to_datetime(ser, infer_datetime_format=False, cache=cache),
-            to_datetime(ser, infer_datetime_format=True, cache=cache),
-        )
-
-    def test_to_datetime_infer_datetime_format_series_start_with_nans(self, cache):
-        ser = Series(
-            np.array(
-                [
-                    np.nan,
-                    np.nan,
-                    "01/01/2011 00:00:00",
-                    "01/02/2011 00:00:00",
-                    "01/03/2011 00:00:00",
-                ],
-                dtype=object,
-            )
-        )
-
-        tm.assert_series_equal(
-            to_datetime(ser, infer_datetime_format=False, cache=cache),
-            to_datetime(ser, infer_datetime_format=True, cache=cache),
-        )
-
-    @pytest.mark.parametrize(
-        "tz_name, offset", [("UTC", 0), ("UTC-3", 180), ("UTC+3", -180)]
-    )
-    def test_infer_datetime_format_tz_name(self, tz_name, offset):
+    def test_infer_datetime_format_tz_name(self, tz_name, offset, warning):
         # GH 33133
         ser = Series([f"2019-02-02 08:07:13 {tz_name}"])
-        result = to_datetime(ser, infer_datetime_format=True)
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            result = to_datetime(ser)
         expected = Series(
             [Timestamp("2019-02-02 08:07:13").tz_localize(pytz.FixedOffset(offset))]
         )
@@ -2203,26 +2176,38 @@ def test_to_datetime_iso8601_noleading_0s(self, cache, format):
         )
         tm.assert_series_equal(to_datetime(ser, format=format, cache=cache), expected)
 
+    def test_parse_dates_infer_datetime_format_warning(self):
+        # GH 49024
+        with tm.assert_produces_warning(
+            UserWarning,
+            match="The argument 'infer_datetime_format' is deprecated",
+        ):
+            to_datetime(["10-10-2000"], infer_datetime_format=True)
+
 
 class TestDaysInMonth:
     # tests for issue #10154
 
     @pytest.mark.parametrize(
-        "arg, format",
+        "arg, format, warning",
         [
-            ["2015-02-29", None],
-            ["2015-02-29", "%Y-%m-%d"],
-            ["2015-02-32", "%Y-%m-%d"],
-            ["2015-04-31", "%Y-%m-%d"],
+            ["2015-02-29", None, UserWarning],
+            ["2015-02-29", "%Y-%m-%d", None],
+            ["2015-02-32", "%Y-%m-%d", None],
+            ["2015-04-31", "%Y-%m-%d", None],
         ],
     )
-    def test_day_not_in_month_coerce(self, cache, arg, format):
-        assert isna(to_datetime(arg, errors="coerce", format=format, cache=cache))
+    def test_day_not_in_month_coerce(self, cache, arg, format, warning):
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            assert isna(to_datetime(arg, errors="coerce", format=format, cache=cache))
 
     def test_day_not_in_month_raise(self, cache):
         msg = "day is out of range for month"
         with pytest.raises(ValueError, match=msg):
-            to_datetime("2015-02-29", errors="raise", cache=cache)
+            with tm.assert_produces_warning(
+                UserWarning, match="Could not infer format"
+            ):
+                to_datetime("2015-02-29", errors="raise", cache=cache)
 
     @pytest.mark.parametrize("arg", ["2015-02-29", "2015-02-32", "2015-04-31"])
     def test_day_not_in_month_raise_value(self, cache, arg):
@@ -2231,85 +2216,85 @@ def test_day_not_in_month_raise_value(self, cache, arg):
             to_datetime(arg, errors="raise", format="%Y-%m-%d", cache=cache)
 
     @pytest.mark.parametrize(
-        "expected, format",
+        "expected, format, warning",
         [
-            ["2015-02-29", None],
-            ["2015-02-29", "%Y-%m-%d"],
-            ["2015-02-29", "%Y-%m-%d"],
-            ["2015-04-31", "%Y-%m-%d"],
+            ["2015-02-29", None, UserWarning],
+            ["2015-02-29", "%Y-%m-%d", None],
+            ["2015-02-29", "%Y-%m-%d", None],
+            ["2015-04-31", "%Y-%m-%d", None],
         ],
     )
-    def test_day_not_in_month_ignore(self, cache, expected, format):
-        result = to_datetime(expected, errors="ignore", format=format, cache=cache)
+    def test_day_not_in_month_ignore(self, cache, expected, format, warning):
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            result = to_datetime(expected, errors="ignore", format=format, cache=cache)
         assert result == expected
 
 
 class TestDatetimeParsingWrappers:
     @pytest.mark.parametrize(
-        "date_str,expected",
-        list(
-            {
-                "2011-01-01": datetime(2011, 1, 1),
-                "2Q2005": datetime(2005, 4, 1),
-                "2Q05": datetime(2005, 4, 1),
-                "2005Q1": datetime(2005, 1, 1),
-                "05Q1": datetime(2005, 1, 1),
-                "2011Q3": datetime(2011, 7, 1),
-                "11Q3": datetime(2011, 7, 1),
-                "3Q2011": datetime(2011, 7, 1),
-                "3Q11": datetime(2011, 7, 1),
-                # quarterly without space
-                "2000Q4": datetime(2000, 10, 1),
-                "00Q4": datetime(2000, 10, 1),
-                "4Q2000": datetime(2000, 10, 1),
-                "4Q00": datetime(2000, 10, 1),
-                "2000q4": datetime(2000, 10, 1),
-                "2000-Q4": datetime(2000, 10, 1),
-                "00-Q4": datetime(2000, 10, 1),
-                "4Q-2000": datetime(2000, 10, 1),
-                "4Q-00": datetime(2000, 10, 1),
-                "00q4": datetime(2000, 10, 1),
-                "2005": datetime(2005, 1, 1),
-                "2005-11": datetime(2005, 11, 1),
-                "2005 11": datetime(2005, 11, 1),
-                "11-2005": datetime(2005, 11, 1),
-                "11 2005": datetime(2005, 11, 1),
-                "200511": datetime(2020, 5, 11),
-                "20051109": datetime(2005, 11, 9),
-                "20051109 10:15": datetime(2005, 11, 9, 10, 15),
-                "20051109 08H": datetime(2005, 11, 9, 8, 0),
-                "2005-11-09 10:15": datetime(2005, 11, 9, 10, 15),
-                "2005-11-09 08H": datetime(2005, 11, 9, 8, 0),
-                "2005/11/09 10:15": datetime(2005, 11, 9, 10, 15),
-                "2005/11/09 08H": datetime(2005, 11, 9, 8, 0),
-                "Thu Sep 25 10:36:28 2003": datetime(2003, 9, 25, 10, 36, 28),
-                "Thu Sep 25 2003": datetime(2003, 9, 25),
-                "Sep 25 2003": datetime(2003, 9, 25),
-                "January 1 2014": datetime(2014, 1, 1),
-                # GHE10537
-                "2014-06": datetime(2014, 6, 1),
-                "06-2014": datetime(2014, 6, 1),
-                "2014-6": datetime(2014, 6, 1),
-                "6-2014": datetime(2014, 6, 1),
-                "20010101 12": datetime(2001, 1, 1, 12),
-                "20010101 1234": datetime(2001, 1, 1, 12, 34),
-                "20010101 123456": datetime(2001, 1, 1, 12, 34, 56),
-            }.items()
-        ),
+        "date_str, expected, warning",
+        [
+            ("2011-01-01", datetime(2011, 1, 1), None),
+            ("2Q2005", datetime(2005, 4, 1), UserWarning),
+            ("2Q05", datetime(2005, 4, 1), UserWarning),
+            ("2005Q1", datetime(2005, 1, 1), UserWarning),
+            ("05Q1", datetime(2005, 1, 1), UserWarning),
+            ("2011Q3", datetime(2011, 7, 1), UserWarning),
+            ("11Q3", datetime(2011, 7, 1), UserWarning),
+            ("3Q2011", datetime(2011, 7, 1), UserWarning),
+            ("3Q11", datetime(2011, 7, 1), UserWarning),
+            # quarterly without space
+            ("2000Q4", datetime(2000, 10, 1), UserWarning),
+            ("00Q4", datetime(2000, 10, 1), UserWarning),
+            ("4Q2000", datetime(2000, 10, 1), UserWarning),
+            ("4Q00", datetime(2000, 10, 1), UserWarning),
+            ("2000q4", datetime(2000, 10, 1), UserWarning),
+            ("2000-Q4", datetime(2000, 10, 1), UserWarning),
+            ("00-Q4", datetime(2000, 10, 1), UserWarning),
+            ("4Q-2000", datetime(2000, 10, 1), UserWarning),
+            ("4Q-00", datetime(2000, 10, 1), UserWarning),
+            ("00q4", datetime(2000, 10, 1), UserWarning),
+            ("2005", datetime(2005, 1, 1), None),
+            ("2005-11", datetime(2005, 11, 1), UserWarning),
+            ("2005 11", datetime(2005, 11, 1), UserWarning),
+            ("11-2005", datetime(2005, 11, 1), UserWarning),
+            ("11 2005", datetime(2005, 11, 1), UserWarning),
+            ("200511", datetime(2020, 5, 11), UserWarning),
+            ("20051109", datetime(2005, 11, 9), None),
+            ("20051109 10:15", datetime(2005, 11, 9, 10, 15), None),
+            ("20051109 08H", datetime(2005, 11, 9, 8, 0), None),
+            ("2005-11-09 10:15", datetime(2005, 11, 9, 10, 15), None),
+            ("2005-11-09 08H", datetime(2005, 11, 9, 8, 0), None),
+            ("2005/11/09 10:15", datetime(2005, 11, 9, 10, 15), None),
+            ("2005/11/09 08H", datetime(2005, 11, 9, 8, 0), None),
+            ("Thu Sep 25 10:36:28 2003", datetime(2003, 9, 25, 10, 36, 28), None),
+            ("Thu Sep 25 2003", datetime(2003, 9, 25), None),
+            ("Sep 25 2003", datetime(2003, 9, 25), None),
+            ("January 1 2014", datetime(2014, 1, 1), None),
+            # GHE10537
+            ("2014-06", datetime(2014, 6, 1), UserWarning),
+            ("06-2014", datetime(2014, 6, 1), UserWarning),
+            ("2014-6", datetime(2014, 6, 1), UserWarning),
+            ("6-2014", datetime(2014, 6, 1), UserWarning),
+            ("20010101 12", datetime(2001, 1, 1, 12), None),
+            ("20010101 1234", datetime(2001, 1, 1, 12, 34), UserWarning),
+            ("20010101 123456", datetime(2001, 1, 1, 12, 34, 56), UserWarning),
+        ],
     )
-    def test_parsers(self, date_str, expected, cache):
+    def test_parsers(self, date_str, expected, warning, cache):
 
         # dateutil >= 2.5.0 defaults to yearfirst=True
         # https://github.com/dateutil/dateutil/issues/217
         yearfirst = True
 
         result1, _ = parsing.parse_time_string(date_str, yearfirst=yearfirst)
-        result2 = to_datetime(date_str, yearfirst=yearfirst)
-        result3 = to_datetime([date_str], yearfirst=yearfirst)
-        # result5 is used below
-        result4 = to_datetime(
-            np.array([date_str], dtype=object), yearfirst=yearfirst, cache=cache
-        )
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            result2 = to_datetime(date_str, yearfirst=yearfirst)
+            result3 = to_datetime([date_str], yearfirst=yearfirst)
+            # result5 is used below
+            result4 = to_datetime(
+                np.array([date_str], dtype=object), yearfirst=yearfirst, cache=cache
+            )
         result6 = DatetimeIndex([date_str], yearfirst=yearfirst)
         # result7 is used below
         result8 = DatetimeIndex(Index([date_str]), yearfirst=yearfirst)
@@ -2418,9 +2403,10 @@ def test_parsers_dayfirst_yearfirst(
             result2 = Timestamp(date_str)
             assert result2 == expected
 
-        result3 = to_datetime(
-            date_str, dayfirst=dayfirst, yearfirst=yearfirst, cache=cache
-        )
+        with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
+            result3 = to_datetime(
+                date_str, dayfirst=dayfirst, yearfirst=yearfirst, cache=cache
+            )
 
         result4 = DatetimeIndex([date_str], dayfirst=dayfirst, yearfirst=yearfirst)[0]
 
@@ -2437,8 +2423,9 @@ def test_parsers_timestring(self, date_str, exp_def):
         exp_now = parse(date_str)
 
         result1, _ = parsing.parse_time_string(date_str)
-        result2 = to_datetime(date_str)
-        result3 = to_datetime([date_str])
+        with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
+            result2 = to_datetime(date_str)
+            result3 = to_datetime([date_str])
         result4 = Timestamp(date_str)
         result5 = DatetimeIndex([date_str])[0]
         # parse time string return time string based on default date
@@ -2602,17 +2589,23 @@ def test_incorrect_value_exception(self):
         with pytest.raises(
             ValueError, match="Unknown string format: yesterday present at position 1"
         ):
-            to_datetime(["today", "yesterday"])
+            with tm.assert_produces_warning(
+                UserWarning, match="Could not infer format"
+            ):
+                to_datetime(["today", "yesterday"])
 
-    @pytest.mark.parametrize("format", [None, "%Y-%m-%d %H:%M:%S"])
-    def test_to_datetime_out_of_bounds_with_format_arg(self, format):
+    @pytest.mark.parametrize(
+        "format, warning", [(None, UserWarning), ("%Y-%m-%d %H:%M:%S", None)]
+    )
+    def test_to_datetime_out_of_bounds_with_format_arg(self, format, warning):
         # see gh-23830
         msg = (
             "Out of bounds nanosecond timestamp: 2417-10-27 00:00:00 "
             "present at position 0"
         )
         with pytest.raises(OutOfBoundsDatetime, match=msg):
-            to_datetime("2417-10-27 00:00:00", format=format)
+            with tm.assert_produces_warning(warning, match="Could not infer format"):
+                to_datetime("2417-10-27 00:00:00", format=format)
 
     @pytest.mark.parametrize(
         "arg, origin, expected_str",

From bc910b06eb71e4adc103ca7ee2fb952cd68175d0 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 18 Oct 2022 09:35:49 +0100
Subject: [PATCH 06/34] :memo: update docs

---
 doc/source/user_guide/basics.rst     |  2 ++
 doc/source/user_guide/io.rst         | 31 +++++---------------------
 doc/source/user_guide/timeseries.rst | 27 +++++++----------------
 doc/source/whatsnew/v2.0.0.rst       | 33 ++++++++++++++++++++++++++++
 4 files changed, 49 insertions(+), 44 deletions(-)

diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst
index a34d4891b9d77..92fae28d3bdb3 100644
--- a/doc/source/user_guide/basics.rst
+++ b/doc/source/user_guide/basics.rst
@@ -2313,6 +2313,7 @@ useful if you are reading in data which is mostly of the desired dtype (e.g. num
 non-conforming elements intermixed that you want to represent as missing:
 
 .. ipython:: python
+   :okwarning:
 
     import datetime
 
@@ -2329,6 +2330,7 @@ The ``errors`` parameter has a third option of ``errors='ignore'``, which will s
 encounters any errors with the conversion to a desired data type:
 
 .. ipython:: python
+    :okwarning:
 
     import datetime
 
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index cc01270181202..d6e67cd638a7b 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -1009,41 +1009,22 @@ To parse the mixed-timezone values as a datetime column, pass a partially-applie
 Inferring datetime format
 +++++++++++++++++++++++++
 
-If you have ``parse_dates`` enabled for some or all of your columns, and your
-datetime strings are all formatted the same way, you may get a large speed
-up by setting ``infer_datetime_format=True``.  If set, pandas will attempt
-to guess the format of your datetime strings, and then use a faster means
-of parsing the strings.  5-10x parsing speeds have been observed.  pandas
-will fallback to the usual parsing if either the format cannot be guessed
-or the format that was guessed cannot properly parse the entire column
-of strings.  So in general, ``infer_datetime_format`` should not have any
-negative consequences if enabled.
-
-Here are some examples of datetime strings that can be guessed (All
-representing December 30th, 2011 at 00:00:00):
-
-* "20111230"
-* "2011/12/30"
-* "20111230 00:00:00"
-* "12/30/2011 00:00:00"
-* "30/Dec/2011 00:00:00"
-* "30/December/2011 00:00:00"
-
-Note that ``infer_datetime_format`` is sensitive to ``dayfirst``.  With
-``dayfirst=True``, it will guess "01/12/2011" to be December 1st. With
-``dayfirst=False`` (default) it will guess "01/12/2011" to be January 12th.
+If you try to parse a column of date strings, pandas will attempt to guess the format
+from the first non-NaN element, and will then parse the rest of the column with that
+format.
 
 .. ipython:: python
 
-   # Try to infer the format for the index column
    df = pd.read_csv(
        "foo.csv",
        index_col=0,
        parse_dates=True,
-       infer_datetime_format=True,
    )
    df
 
+In the case that you have mixed datetime formats within the same column, you'll need to
+first read it in the file, and then apply :func:`to_datetime` to each element.
+
 .. ipython:: python
    :suppress:
 
diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
index 474068e43a4d4..2710a22ec6161 100644
--- a/doc/source/user_guide/timeseries.rst
+++ b/doc/source/user_guide/timeseries.rst
@@ -13,17 +13,6 @@ a tremendous amount of new functionality for manipulating time series data.
 
 For example, pandas supports:
 
-Parsing time series information from various sources and formats
-
-.. ipython:: python
-
-   import datetime
-
-   dti = pd.to_datetime(
-       ["1/1/2018", np.datetime64("2018-01-01"), datetime.datetime(2018, 1, 1)]
-   )
-   dti
-
 Generate sequences of fixed-frequency dates and time spans
 
 .. ipython:: python
@@ -132,6 +121,8 @@ time.
 
 .. ipython:: python
 
+   import datetime
+
    pd.Timestamp(datetime.datetime(2012, 5, 1))
    pd.Timestamp("2012-05-01")
    pd.Timestamp(2012, 5, 1)
@@ -196,26 +187,24 @@ is converted to a ``DatetimeIndex``:
 
 .. ipython:: python
 
-    pd.to_datetime(pd.Series(["Jul 31, 2009", "2010-01-10", None]))
+    pd.to_datetime(pd.Series(["Jul 31, 2009", "Jan 10, 2010", None]))
 
-    pd.to_datetime(["2005/11/23", "2010.12.31"])
+    pd.to_datetime(["2005/11/23", "2010/12/31"])
 
 If you use dates which start with the day first (i.e. European style),
 you can pass the ``dayfirst`` flag:
 
 .. ipython:: python
-   :okwarning:
+    :okwarning:
 
     pd.to_datetime(["04-01-2012 10:00"], dayfirst=True)
-
-    pd.to_datetime(["14-01-2012", "01-14-2012"], dayfirst=True)
+    pd.to_datetime(["04-14-2012 10:00"], dayfirst=True)
 
 .. warning::
 
    You see in the above example that ``dayfirst`` isn't strict. If a date
    can't be parsed with the day being first it will be parsed as if
-   ``dayfirst`` were False, and in the case of parsing delimited date strings
-   (e.g. ``31-12-2012``) then a warning will also be raised.
+   ``dayfirst`` were False and a warning will also be raised.
 
 If you pass a single string to ``to_datetime``, it returns a single ``Timestamp``.
 ``Timestamp`` can also accept string input, but it doesn't accept string parsing
@@ -768,7 +757,7 @@ partially matching dates:
    rng2 = pd.date_range("2011-01-01", "2012-01-01", freq="W")
    ts2 = pd.Series(np.random.randn(len(rng2)), index=rng2)
 
-   ts2.truncate(before="2011-11", after="2011-12")
+   ts2.truncate(before="2011-11-01", after="2011-12-01")
    ts2["2011-11":"2011-12"]
 
 Even complicated fancy indexing that breaks the ``DatetimeIndex`` frequency
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index a0a7aa94bd287..915a91d71e9eb 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -114,6 +114,39 @@ Optional libraries below the lowest tested version may still work, but are not c
 
 See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more.
 
+Datetimes are now parsed with a consistent format
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+:func:`to_datetime` now parses dates with a consistent format, which is guessed from the first non-NA value
+(unless ``format`` is specified). Previously, it would've guessed the format for each element individually.
+
+*Old behavior*:
+
+  .. code-block:: ipython
+
+     In [1]: ser = pd.Series(['13-01-2000', '12-01-2000'])
+     In [2]: pd.to_datetime(ser)
+     Out[2]:
+     0   2000-01-13
+     1   2000-12-01
+     dtype: datetime64[ns]
+
+*New behavior*:
+
+  .. ipython:: python
+    :okwarning:
+
+     ser = pd.Series(['13-01-2000', '12-01-2000'])
+     pd.to_datetime(ser)
+
+Note that this affects :func:`read_csv` as well.
+
+If you still need to parse dates with inconsistent formats, you'll need to apply :func:`to_datetime`
+to each element individually, e.g. ::
+
+     ser = pd.Series(['13-01-2000', '12 January 2000'])
+     ser.apply(pd.to_datetime)
+
 .. _whatsnew_200.api_breaking.other:
 
 Other API changes

From 7d03503198bd45acd63a236b4175603055362cf6 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Wed, 19 Oct 2022 10:15:01 +0100
Subject: [PATCH 07/34] :memo: add example of reading csv file with mixed
 formats

---
 doc/source/user_guide/io.rst    | 9 ++++++++-
 doc/source/whatsnew/v2.0.0.rst  | 2 +-
 pandas/_libs/tslibs/parsing.pyx | 4 ++--
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index d6e67cd638a7b..844cd70f4866c 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -1023,7 +1023,14 @@ format.
    df
 
 In the case that you have mixed datetime formats within the same column, you'll need to
-first read it in the file, and then apply :func:`to_datetime` to each element.
+first read it in as an object dtype and then apply :func:`to_datetime` to each element.
+
+.. ipython:: python
+
+   data = io.StringIO("date\n12 Jan 2000\n2000-01-13\n")
+   df = pd.read_csv(data)
+   df['date'] = df['date'].apply(pd.to_datetime)
+   df
 
 .. ipython:: python
    :suppress:
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 915a91d71e9eb..95a0a93838216 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -118,7 +118,7 @@ Datetimes are now parsed with a consistent format
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 :func:`to_datetime` now parses dates with a consistent format, which is guessed from the first non-NA value
-(unless ``format`` is specified). Previously, it would've guessed the format for each element individually.
+(unless ``format`` is specified). Previously, it would have guessed the format for each element individually.
 
 *Old behavior*:
 
diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
index 74de3502b73de..c9df9146240da 100644
--- a/pandas/_libs/tslibs/parsing.pyx
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -1117,13 +1117,13 @@ cdef void _maybe_warn_about_dayfirst(format: str, bint dayfirst):
         if (day_index > month_index) and dayfirst:
             warnings.warn(
                 f"Parsing dates in {format} format when dayfirst=True was specified. "
-                f"Pass `dayfirst=False` or specify a format to silence this warning.",
+                "Pass `dayfirst=False` or specify a format to silence this warning.",
                 stacklevel=find_stack_level(),
             )
         if (day_index < month_index) and not dayfirst:
             warnings.warn(
                 f"Parsing dates in {format} format when dayfirst=False was specified. "
-                f"Pass `dayfirst=True` or specify a format to silence this warning.",
+                "Pass `dayfirst=True` or specify a format to silence this warning.",
                 stacklevel=find_stack_level(),
             )
 

From ac825f5dc33e16873b01110591f89b26d1e8ed8a Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Wed, 19 Oct 2022 10:24:42 +0100
Subject: [PATCH 08/34] :wastebasket: removed now outdated tests / clean inputs

---
 .../indexes/datetimes/test_constructors.py    | 22 ++--------
 pandas/tests/tools/test_to_datetime.py        | 43 +------------------
 2 files changed, 6 insertions(+), 59 deletions(-)

diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py
index c1039728f5b5e..a9491f90e80f0 100644
--- a/pandas/tests/indexes/datetimes/test_constructors.py
+++ b/pandas/tests/indexes/datetimes/test_constructors.py
@@ -1042,27 +1042,13 @@ def test_datetimeindex_constructor_misc(self):
         arr = np.array(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"], dtype="O")
         idx4 = DatetimeIndex(arr)
 
-        # Can't be parsed consistently, need to parse each element individually
-        arr = [
-            to_datetime(date_string)
-            for date_string in ["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"]
-        ]
-        idx5 = DatetimeIndex(arr)
-
-        # Can't be parsed consistently, need to parse each element individually
-        arr = [
-            to_datetime(date_string)
-            for date_string in ["1/1/2005", "1/2/2005", "Jan 3, 2005", "2005-01-04"]
-        ]
-        idx6 = DatetimeIndex(arr)
-
-        idx7 = DatetimeIndex(["12/05/2007", "25/01/2008"], dayfirst=True)
-        idx8 = DatetimeIndex(
+        idx5 = DatetimeIndex(["12/05/2007", "25/01/2008"], dayfirst=True)
+        idx6 = DatetimeIndex(
             ["2007/05/12", "2008/01/25"], dayfirst=False, yearfirst=True
         )
-        tm.assert_index_equal(idx7, idx8)
+        tm.assert_index_equal(idx5, idx6)
 
-        for other in [idx2, idx3, idx4, idx5, idx6]:
+        for other in [idx2, idx3, idx4]:
             assert (idx1.values == other.values).all()
 
         sdate = datetime(1999, 12, 25)
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index a2871e79dc7d9..e3b9e30e1923c 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -1225,40 +1225,6 @@ def test_iso_8601_strings_with_different_offsets_utc(self):
         )
         tm.assert_index_equal(result, expected)
 
-    def test_iso8601_strings_mixed_offsets_with_naive(self):
-        # GH 24992
-        # Can't parse consistently, need to parse each element in loop.
-        result = DatetimeIndex(
-            [
-                to_datetime(string, utc=True)
-                for string in [
-                    "2018-11-28T00:00:00",
-                    "2018-11-28T00:00:00+12:00",
-                    "2018-11-28T00:00:00",
-                    "2018-11-28T00:00:00+06:00",
-                    "2018-11-28T00:00:00",
-                ]
-            ]
-        )
-        expected = to_datetime(
-            [
-                "2018-11-28T00:00:00",
-                "2018-11-27T12:00:00",
-                "2018-11-28T00:00:00",
-                "2018-11-27T18:00:00",
-                "2018-11-28T00:00:00",
-            ],
-            utc=True,
-        )
-        tm.assert_index_equal(result, expected)
-
-    def test_iso8601_strings_mixed_offsets_with_naive_reversed(self):
-        items = ["2018-11-28T00:00:00+12:00", "2018-11-28T00:00:00"]
-        # Can't parse consistently, need to parse each element in loop.
-        result = [to_datetime(item, utc=True) for item in items]
-        expected = [to_datetime(item, utc=True) for item in list(reversed(items))][::-1]
-        assert result == expected
-
     def test_mixed_offsets_with_native_datetime_raises(self):
         # GH 25978
 
@@ -1910,9 +1876,7 @@ def test_to_datetime_overflow(self):
     def test_string_na_nat_conversion(self, cache):
         # GH #999, #858
 
-        strings = np.array(
-            ["1/1/2000", "1/2/2000", np.nan, "1/4/2000, 12:34:56"], dtype=object
-        )
+        strings = np.array(["1/1/2000", "1/2/2000", np.nan, "1/4/2000"], dtype=object)
 
         expected = np.empty(4, dtype="M8[ns]")
         for i, val in enumerate(strings):
@@ -1924,10 +1888,7 @@ def test_string_na_nat_conversion(self, cache):
         result = tslib.array_to_datetime(strings)[0]
         tm.assert_almost_equal(result, expected)
 
-        # Can't parse in consistent format, so need to convert each individually.
-        result2 = DatetimeIndex(
-            [to_datetime(string, cache=cache) for string in strings]
-        )
+        result2 = to_datetime(strings, cache=cache)
         assert isinstance(result2, DatetimeIndex)
         tm.assert_numpy_array_equal(result, result2.values)
 

From 2ffcef67c67e1d53d9d36334eefb730b61416f84 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <>
Date: Fri, 21 Oct 2022 19:34:15 +0200
Subject: [PATCH 09/34] :memo: clarify whatsnew and user-guide

---
 doc/source/user_guide/io.rst   | 4 +++-
 doc/source/whatsnew/v2.0.0.rst | 3 +--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index 844cd70f4866c..2f35feaeffec7 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -1011,7 +1011,9 @@ Inferring datetime format
 
 If you try to parse a column of date strings, pandas will attempt to guess the format
 from the first non-NaN element, and will then parse the rest of the column with that
-format.
+format. If pandas fails to guess the format, then a warning will be raised, and each
+row will have its format guessed individually by ``dateutil.parser.parse``. The safest
+way to parse dates is to explicitly set ``format=``.
 
 .. ipython:: python
 
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 95a0a93838216..71afc73598988 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -117,8 +117,7 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor
 Datetimes are now parsed with a consistent format
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-:func:`to_datetime` now parses dates with a consistent format, which is guessed from the first non-NA value
-(unless ``format`` is specified). Previously, it would have guessed the format for each element individually.
+In the past, :func:`to_datetime` guessed the format for each element independently. This was appropriate for some cases where a column had a mixed date format - however, it would regularly cause problems for columns where users expected a consistent format but the function would switch formats row-wise. As of version 2.0.0, this behavior is consistent column-wise, and the format is determined by the first non-NA value in the column (unless the user specifies a format, in which case that is used).
 
 *Old behavior*:
 

From b3e32ac646b117f15a9f32c4d95f271926ec5f0e Mon Sep 17 00:00:00 2001
From: Marco Gorelli <>
Date: Fri, 28 Oct 2022 10:54:43 +0200
Subject: [PATCH 10/34] :art:

---
 pandas/tests/tools/test_to_datetime.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index c1c6811b82317..1b052f60e0dc5 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -635,7 +635,7 @@ def test_to_datetime_now(self):
             # GH#18705
             now = Timestamp("now")
             with tm.assert_produces_warning(
-                UserWarning, match="Could not infer format",
+                UserWarning, match="Could not infer format"
             ):
                 pdnow = to_datetime("now")
                 pdnow2 = to_datetime(["now"])[0]

From d3adfe5a3aea0cad36b578f65d03c4f559909403 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Sat, 29 Oct 2022 13:48:05 +0100
Subject: [PATCH 11/34] guess %Y-%m format

---
 pandas/_libs/tslibs/parsing.pyx     | 5 +++--
 pandas/tests/tslibs/test_parsing.py | 2 ++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
index a0d0fd7bc67b0..a335b0bdefdcc 100644
--- a/pandas/_libs/tslibs/parsing.pyx
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -1011,10 +1011,11 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None:
                 break
 
     # Only consider it a valid guess if we have a year, month and day,
-    # unless it's %Y which is both common and unambiguous.
+    # unless it's %Y or %Y-%m which conform with ISO8601. Note that we don't
+    # make an exception for %Y%m because it's explicitly not considered ISO8601.
     if (
         len({'year', 'month', 'day'} & found_attrs) != 3
-        and format_guess != ['%Y']
+        and format_guess not in (['%Y'], ['%Y', None, '%m'])
     ):
         return None
 
diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py
index 49d83a8fa5c56..972bb2d8126e5 100644
--- a/pandas/tests/tslibs/test_parsing.py
+++ b/pandas/tests/tslibs/test_parsing.py
@@ -148,6 +148,7 @@ def test_parsers_month_freq(date_str, expected):
         ("20111230", "%Y%m%d"),
         ("2011-12-30", "%Y-%m-%d"),
         ("2011", "%Y"),
+        ("2011-01", "%Y-%m"),
         ("30-12-2011", "%d-%m-%Y"),
         ("2011-12-30 00:00:00", "%Y-%m-%d %H:%M:%S"),
         ("2011-12-30T00:00:00", "%Y-%m-%dT%H:%M:%S"),
@@ -215,6 +216,7 @@ def test_guess_datetime_format_with_locale_specific_formats(string, fmt):
         "this_is_not_a_datetime",
         "51a",
         "13/2019",
+        "202001",  # YYYYMM isn't ISO8601
     ],
 )
 def test_guess_datetime_format_invalid_inputs(invalid_dt):

From affa7f32aa6cf26b05df1cee7a54591015f708aa Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Sat, 29 Oct 2022 13:37:16 +0100
Subject: [PATCH 12/34] Detect format from first non-na, but also exclude now
 and today

---
 pandas/_libs/tslib.pyx                 |  2 +-
 pandas/core/tools/datetimes.py         |  3 ---
 pandas/tests/tools/test_to_datetime.py | 25 +++++++++----------------
 3 files changed, 10 insertions(+), 20 deletions(-)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index d7c0c91332e02..705b5440b74a0 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -429,7 +429,7 @@ def first_non_null(values: ndarray) -> int:
         val = values[i]
         if checknull_with_nat_and_na(val):
             continue
-        if isinstance(val, str) and (len(val) == 0 or val in nat_strings):
+        if isinstance(val, str) and (len(val) == 0 or val in ("now", "today", *nat_strings)):
             continue
         return i
     else:
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 27ca210fb0ece..1b7f2c2236a06 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -485,9 +485,6 @@ def _array_strptime_with_fallback(
         else:
             result = arg
     except ValueError:
-        # if fmt was inferred, try falling back
-        # to array_to_datetime - terminate here
-        # for specified formats
         if errors == "raise":
             raise
         elif errors == "coerce":
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index 1b052f60e0dc5..90a13f6bc46c3 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -634,11 +634,8 @@ def test_to_datetime_now(self):
         with tm.set_timezone("US/Eastern"):
             # GH#18705
             now = Timestamp("now")
-            with tm.assert_produces_warning(
-                UserWarning, match="Could not infer format"
-            ):
-                pdnow = to_datetime("now")
-                pdnow2 = to_datetime(["now"])[0]
+            pdnow = to_datetime("now")
+            pdnow2 = to_datetime(["now"])[0]
 
             # These should all be equal with infinite perf; this gives
             # a generous margin of 10 seconds
@@ -659,11 +656,8 @@ def test_to_datetime_today(self, tz):
         # so this test will not detect the regression introduced in #18666.
         with tm.set_timezone(tz):
             nptoday = np.datetime64("today").astype("datetime64[ns]").astype(np.int64)
-            with tm.assert_produces_warning(
-                UserWarning, match="Could not infer format"
-            ):
-                pdtoday = to_datetime("today")
-                pdtoday2 = to_datetime(["today"])[0]
+            pdtoday = to_datetime("today")
+            pdtoday2 = to_datetime(["today"])[0]
 
             tstoday = Timestamp("today")
             tstoday2 = Timestamp.today()
@@ -680,8 +674,7 @@ def test_to_datetime_today(self, tz):
 
     @pytest.mark.parametrize("arg", ["now", "today"])
     def test_to_datetime_today_now_unicode_bytes(self, arg):
-        with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
-            to_datetime([arg])
+        to_datetime([arg])
 
     @pytest.mark.parametrize(
         "dt", [np.datetime64("2000-01-01"), np.datetime64("2000-01-02")]
@@ -2210,8 +2203,8 @@ class TestDatetimeParsingWrappers:
             ("4Q-00", datetime(2000, 10, 1), UserWarning),
             ("00q4", datetime(2000, 10, 1), UserWarning),
             ("2005", datetime(2005, 1, 1), None),
-            ("2005-11", datetime(2005, 11, 1), UserWarning),
-            ("2005 11", datetime(2005, 11, 1), UserWarning),
+            ("2005-11", datetime(2005, 11, 1), None),
+            ("2005 11", datetime(2005, 11, 1), None),
             ("11-2005", datetime(2005, 11, 1), UserWarning),
             ("11 2005", datetime(2005, 11, 1), UserWarning),
             ("200511", datetime(2020, 5, 11), UserWarning),
@@ -2227,9 +2220,9 @@ class TestDatetimeParsingWrappers:
             ("Sep 25 2003", datetime(2003, 9, 25), None),
             ("January 1 2014", datetime(2014, 1, 1), None),
             # GHE10537
-            ("2014-06", datetime(2014, 6, 1), UserWarning),
+            ("2014-06", datetime(2014, 6, 1), None),
             ("06-2014", datetime(2014, 6, 1), UserWarning),
-            ("2014-6", datetime(2014, 6, 1), UserWarning),
+            ("2014-6", datetime(2014, 6, 1), None),
             ("6-2014", datetime(2014, 6, 1), UserWarning),
             ("20010101 12", datetime(2001, 1, 1, 12), None),
             ("20010101 1234", datetime(2001, 1, 1, 12, 34), UserWarning),

From 575b215ca339536fdda63bc6d41cafadf6bf926b Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Sat, 29 Oct 2022 14:41:27 +0100
Subject: [PATCH 13/34] :white_check_mark: fixup tests based on now and today
 parsing

---
 pandas/tests/groupby/transform/test_transform.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index b0858cbcf67d5..2b4eba539ec82 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -1052,8 +1052,7 @@ def demean_rename(x):
 @pytest.mark.parametrize("func", [min, max, np.min, np.max, "first", "last"])
 def test_groupby_transform_timezone_column(func):
     # GH 24198
-    with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
-        ts = pd.to_datetime("now", utc=True).tz_convert("Asia/Singapore")
+    ts = pd.to_datetime("now", utc=True).tz_convert("Asia/Singapore")
     result = DataFrame({"end_time": [ts], "id": [1]})
     result["max_end_time"] = result.groupby("id").end_time.transform(func)
     expected = DataFrame([[ts, 1, ts]], columns=["end_time", "id", "max_end_time"])

From 1d255e07a3278576c3d35eec961a9d8db97d626f Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Thu, 17 Nov 2022 13:30:18 +0000
Subject: [PATCH 14/34] fixup after merge

---
 pandas/core/tools/datetimes.py         | 9 ++-------
 pandas/tests/tools/test_to_datetime.py | 2 +-
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 070ddbb87a553..02988754450a6 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -429,13 +429,8 @@ def _convert_listlike_datetimes(
     if format is None:
         format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)
 
-    # There is a special fast-path for iso8601 formatted
-    # datetime strings, so in those cases don't use the inferred
-    # format because this path makes process slower in this
-    # special case
-    if format is not None and format_is_iso(format):
-        require_iso8601 = True
-        format = None
+    # There is a special fast-path for iso8601 formatted datetime strings
+    require_iso8601 = format is not None and format_is_iso(format)
 
     if format is not None and not require_iso8601:
         return _to_datetime_with_format(
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index 5b21ee316e5da..99899dacc68df 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -2333,7 +2333,7 @@ class TestDatetimeParsingWrappers:
             ("00q4", datetime(2000, 10, 1), UserWarning),
             ("2005", datetime(2005, 1, 1), None),
             ("2005-11", datetime(2005, 11, 1), None),
-            ("2005 11", datetime(2005, 11, 1), None),
+            ("2005 11", datetime(2005, 11, 1), UserWarning),
             ("11-2005", datetime(2005, 11, 1), UserWarning),
             ("11 2005", datetime(2005, 11, 1), UserWarning),
             ("200511", datetime(2020, 5, 11), UserWarning),

From 285b1ff8dfa93b60be32280183f71811797729de Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Thu, 17 Nov 2022 17:58:12 +0000
Subject: [PATCH 15/34] fixup after merge

---
 pandas/tests/io/excel/test_readers.py  | 13 +++----------
 pandas/tests/tools/test_to_datetime.py | 24 ------------------------
 2 files changed, 3 insertions(+), 34 deletions(-)

diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index 8e92fa10049e0..bff4c98fe2842 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -943,18 +943,11 @@ def test_reader_seconds(self, request, engine, read_ext):
                 ]
             }
         )
-        if engine == "odf":
-            # odf recognises cell type as time (from its attribute)
-            # so tries to parse it.
-            warning = UserWarning
-        else:
-            warning = None
-        with tm.assert_produces_warning(warning, match="Could not infer format"):
-            actual = pd.read_excel("times_1900" + read_ext, sheet_name="Sheet1")
+
+        actual = pd.read_excel("times_1900" + read_ext, sheet_name="Sheet1")
         tm.assert_frame_equal(actual, expected)
 
-        with tm.assert_produces_warning(warning, match="Could not infer format"):
-            actual = pd.read_excel("times_1904" + read_ext, sheet_name="Sheet1")
+        actual = pd.read_excel("times_1904" + read_ext, sheet_name="Sheet1")
         tm.assert_frame_equal(actual, expected)
 
     def test_read_excel_multiindex(self, request, read_ext):
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index 0c713d3f4267b..a6737b3b8fb3e 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -2898,30 +2898,6 @@ def test_to_datetime_cache_coerce_50_lines_outofbounds(series_length):
         to_datetime(s, errors="raise", utc=True)
 
 
-@pytest.mark.parametrize(
-    "arg",
-    [
-        ["1724-12-20 20:20:20+00:00", "2022-01-01 00:00:00"],
-        [
-            Timestamp("1724-12-20 20:20:20+00:00"),
-            Timestamp("2022-01-01 00:00:00"),
-        ],
-        [datetime(1724, 12, 20, 20, 20, 20, tzinfo=timezone.utc), datetime(2022, 1, 1)],
-    ],
-    ids=["string", "pd.Timestamp", "datetime.datetime"],
-)
-@pytest.mark.parametrize("tz_aware_first", [True, False])
-def test_to_datetime_mixed_tzaware_timestamp_utc_true(arg, tz_aware_first):
-    # GH 48678
-    exp_arg = ["1724-12-20 20:20:20", "2022-01-01 00:00:00"]
-    if not tz_aware_first:
-        arg.reverse()
-        exp_arg.reverse()
-    result = to_datetime(arg, utc=True)
-    expected = DatetimeIndex(exp_arg).tz_localize("UTC")
-    tm.assert_index_equal(result, expected)
-
-
 def test_to_datetime_format_f_parse_nanos():
     # GH 48767
     timestamp = "15/02/2020 02:03:04.123456789"

From 963b62bf510b29f4a9f0aff2e011cf3a6b15d943 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Thu, 17 Nov 2022 19:30:51 +0000
Subject: [PATCH 16/34] fixup test

---
 pandas/tests/io/parser/dtypes/test_categorical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/parser/dtypes/test_categorical.py b/pandas/tests/io/parser/dtypes/test_categorical.py
index 3b8c520004f12..a0deebecdfff8 100644
--- a/pandas/tests/io/parser/dtypes/test_categorical.py
+++ b/pandas/tests/io/parser/dtypes/test_categorical.py
@@ -262,7 +262,7 @@ def test_categorical_coerces_timestamp(all_parsers):
     parser = all_parsers
     dtype = {"b": CategoricalDtype([Timestamp("2014")])}
 
-    data = "b\n2014-01-01\n2014-01-01T00:00:00"
+    data = "b\n2014-01-01\n2014-01-01"
     expected = DataFrame({"b": Categorical([Timestamp("2014")] * 2)})
 
     result = parser.read_csv(StringIO(data), dtype=dtype)

From c90a8a525f35fd6a4ebee81d319c2fc662a01a68 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Thu, 17 Nov 2022 19:34:03 +0000
Subject: [PATCH 17/34] remove outdated doctest

---
 pandas/core/tools/datetimes.py | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 02988754450a6..0f4fd77e87e1e 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -964,7 +964,7 @@ def to_datetime(
 
     - Timezone-naive inputs are converted to timezone-naive :class:`DatetimeIndex`:
 
-    >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 13:00:15'])
+    >>> pd.to_datetime(['2018-10-26 12:00:00', '2018-10-26 13:00:15'])
     DatetimeIndex(['2018-10-26 12:00:00', '2018-10-26 13:00:15'],
                   dtype='datetime64[ns]', freq=None)
 
@@ -1011,19 +1011,6 @@ def to_datetime(
     ...                utc=True)
     DatetimeIndex(['2018-10-26 17:30:00+00:00', '2018-10-26 17:00:00+00:00'],
                   dtype='datetime64[ns, UTC]', freq=None)
-
-    - Inputs can contain both naive and aware, string or datetime, the above
-      rules still apply
-
-    >>> from datetime import timezone, timedelta
-    >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 12:00 -0530',
-    ...                datetime(2020, 1, 1, 18),
-    ...                datetime(2020, 1, 1, 18,
-    ...                tzinfo=timezone(-timedelta(hours=1)))],
-    ...                utc=True)
-    DatetimeIndex(['2018-10-26 12:00:00+00:00', '2018-10-26 17:30:00+00:00',
-                   '2020-01-01 18:00:00+00:00', '2020-01-01 19:00:00+00:00'],
-                  dtype='datetime64[ns, UTC]', freq=None)
     """
     if infer_datetime_format is not lib.no_default:
         warnings.warn(

From cdfa355b1f02adf6c51f78723bb2ce4c3ecbaa6c Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Sat, 19 Nov 2022 11:25:15 +0000
Subject: [PATCH 18/34] xfail test based on issue 49767

---
 pandas/tests/extension/test_arrow.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index d094a7731c417..0dfc9cd14562a 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -664,6 +664,14 @@ def test_EA_types(self, engine, data, request):
                     reason=f"Parameterized types with tz={pa_dtype.tz} not supported.",
                 )
             )
+        elif pa.types.is_timestamp(pa_dtype):
+            request.node.add_marker(
+                pytest.mark.xfail(
+                    raises=ValueError,
+                    strict=False,
+                    reason="https://github.com/pandas-dev/pandas/issues/49767",
+                )
+            )
         elif pa.types.is_binary(pa_dtype):
             request.node.add_marker(
                 pytest.mark.xfail(reason="CSV parsers don't correctly handle binary")

From 5755032ea606db1d492cccb3dd25844d3e3ee4df Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Fri, 2 Dec 2022 14:47:18 +0000
Subject: [PATCH 19/34] wip

---
 pandas/tests/tools/test_to_datetime.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index f3f487e488848..bd3506a5fece5 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -352,6 +352,7 @@ def test_to_datetime_with_non_exact(self, cache):
         ],
     )
     def test_parse_nanoseconds_with_formula(self, cache, arg):
+
         # GH8989
         # truncating the nanoseconds when a format was provided
         expected = to_datetime(arg, cache=cache)
@@ -468,12 +469,11 @@ def test_to_datetime_parse_timezone_keeps_name(self):
 class TestToDatetime:
     def test_to_datetime_mixed_datetime_and_string(self):
         # GH#47018 adapted old doctest with new behavior
-        d2 = datetime(2020, 1, 1, 18, tzinfo=timezone(-timedelta(hours=1)))
-        with pytest.raises(
-            ValueError,
-            match=r"time data '.*' does not match format '%Y-%m-%d %H:%M %z' \(match\)",
-        ):
-            to_datetime(["2020-01-01 17:00 -0100", d2])
+        d1 = datetime(2020, 1, 1, 17, tzinfo=pytz.FixedOffset(-60))
+        d2 = datetime(2020, 1, 1, 18, tzinfo=pytz.FixedOffset(-60))
+        res = to_datetime(["2020-01-01 17:00 -0100", d2])
+        expected = to_datetime([d1, d2])
+        tm.assert_index_equal(res, expected)
 
     @pytest.mark.parametrize(
         "fmt",
@@ -1145,7 +1145,8 @@ def test_to_datetime_cache_scalar(self):
                 (None,)
                 + (NaT,) * start_caching_at
                 + ("2012 July 26", Timestamp("2012-07-26")),
-                (NaT,) * (start_caching_at + 1) + (Timestamp("2012-07-26"), NaT),
+                (NaT,) * (start_caching_at + 1)
+                + (Timestamp("2012-07-26"), Timestamp("2012-07-26")),
             ),
         ),
     )

From 0a86705c3572d6f72a4532b27b37a7f038c41e8f Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Sat, 3 Dec 2022 12:50:08 +0000
Subject: [PATCH 20/34] add back examples of formats which can be guessed

---
 doc/source/user_guide/io.rst | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index c38b7d8418d60..330e42e27ea7d 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -970,6 +970,19 @@ To parse the mixed-timezone values as a datetime column, pass a partially-applie
 Inferring datetime format
 +++++++++++++++++++++++++
 
+Here are some examples of datetime strings that can be guessed (all
+representing December 30th, 2011 at 00:00:00):
+* "20111230"
+* "2011/12/30"
+* "20111230 00:00:00"
+* "12/30/2011 00:00:00"
+* "30/Dec/2011 00:00:00"
+* "30/December/2011 00:00:00"
+
+Note that format inference is sensitive to ``dayfirst``.  With
+``dayfirst=True``, it will guess "01/12/2011" to be December 1st. With
+``dayfirst=False`` (default) it will guess "01/12/2011" to be January 12th.
+
 If you try to parse a column of date strings, pandas will attempt to guess the format
 from the first non-NaN element, and will then parse the rest of the column with that
 format. If pandas fails to guess the format, then a warning will be raised, and each

From 86e9bcfe27df1686a4106385da73daf3f6536689 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 6 Dec 2022 09:39:19 +0000
Subject: [PATCH 21/34] start fixing up

---
 doc/source/user_guide/io.rst         |  1 +
 doc/source/user_guide/timeseries.rst | 11 +++++++++++
 pandas/core/tools/datetimes.py       |  9 ++++++++-
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index 327fd8cd35956..81e7a7e3e9abf 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -970,6 +970,7 @@ Inferring datetime format
 
 Here are some examples of datetime strings that can be guessed (all
 representing December 30th, 2011 at 00:00:00):
+
 * "20111230"
 * "2011/12/30"
 * "20111230 00:00:00"
diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
index 100ed1a889131..197a3e19b94f5 100644
--- a/doc/source/user_guide/timeseries.rst
+++ b/doc/source/user_guide/timeseries.rst
@@ -13,6 +13,17 @@ a tremendous amount of new functionality for manipulating time series data.
 
 For example, pandas supports:
 
+Parsing time series information from various sources and formats
+
+.. ipython:: python
+
+   import datetime
+
+   dti = pd.to_datetime(
+       ["1/1/2018", np.datetime64("2018-01-01"), datetime.datetime(2018, 1, 1)]
+   )
+   dti
+
 Generate sequences of fixed-frequency dates and time spans
 
 .. ipython:: python
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 44118725db84b..d82cb1ff571fd 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -993,7 +993,7 @@ def to_datetime(
       are constant:
 
     >>> from datetime import datetime
-    >>> pd.to_datetime(["2020-01-01 01:00:00-01:00", datetime(2020, 1, 1, 3, 0)])
+    >>> pd.to_datetime(["2020-01-01 01:00 -01:00", datetime(2020, 1, 1, 3, 0)])
     DatetimeIndex(['2020-01-01 01:00:00-01:00', '2020-01-01 02:00:00-01:00'],
                   dtype='datetime64[ns, pytz.FixedOffset(-60)]', freq=None)
 
@@ -1014,6 +1014,13 @@ def to_datetime(
     ...                utc=True)
     DatetimeIndex(['2018-10-26 17:30:00+00:00', '2018-10-26 17:00:00+00:00'],
                   dtype='datetime64[ns, UTC]', freq=None)
+
+    - Inputs can contain both string or datetime, the above
+      rules still apply
+    >>> from datetime import timezone, timedelta
+    >>> pd.to_datetime(['2018-10-26 12:00', datetime(2020, 1, 1, 18)], utc=True)
+    DatetimeIndex(['2018-10-26 12:00:00+00:00', '2020-01-01 18:00:00+00:00'],
+                  dtype='datetime64[ns, UTC]', freq=None)
     """
     if infer_datetime_format is not lib.no_default:
         warnings.warn(

From f92a8cb8259f04a57ae178f71d74130e2b8b3b7c Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 6 Dec 2022 10:32:52 +0000
Subject: [PATCH 22/34] fixups from reviews

---
 doc/source/whatsnew/v2.0.0.rst         |  2 +-
 pandas/core/tools/datetimes.py         |  2 +-
 pandas/tests/apply/test_frame_apply.py |  3 +--
 pandas/tests/extension/test_arrow.py   |  3 +--
 pandas/tests/tools/test_to_datetime.py | 21 ++++++++-------------
 5 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index e90d7b1e61d7b..79078aebceeb4 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -409,7 +409,7 @@ Other API changes
 
 Deprecations
 ~~~~~~~~~~~~
--
+- Deprecated argument ``infer_datetime_format`` in :func:`to_datetime` and :func:`read_csv`, as a strict version of it is now the default (:issue:`48621`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index d82cb1ff571fd..3064dcd7ac7ad 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -993,7 +993,7 @@ def to_datetime(
       are constant:
 
     >>> from datetime import datetime
-    >>> pd.to_datetime(["2020-01-01 01:00 -01:00", datetime(2020, 1, 1, 3, 0)])
+    >>> pd.to_datetime(["2020-01-01 01:00:00-01:00", datetime(2020, 1, 1, 3, 0)])
     DatetimeIndex(['2020-01-01 01:00:00-01:00', '2020-01-01 02:00:00-01:00'],
                   dtype='datetime64[ns, pytz.FixedOffset(-60)]', freq=None)
 
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index aa65fa5b29034..e7c2618d388c2 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -836,8 +836,7 @@ def test_with_dictlike_columns_with_datetime():
     df["author"] = ["X", "Y", "Z"]
     df["publisher"] = ["BBC", "NBC", "N24"]
     df["date"] = pd.to_datetime(
-        ["17-10-2010 07:15:30", "13-05-2011 08:20:35", "15-01-2013 09:09:09"],
-        dayfirst=True,
+        ["17-10-2010 07:15:30", "13-05-2011 08:20:35", "15-01-2013 09:09:09"]
     )
     result = df.apply(lambda x: {}, axis=1)
     expected = Series([{}, {}, {}])
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 3b2ef95a6c320..3d34b304a2588 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -692,11 +692,10 @@ def test_EA_types(self, engine, data, request):
                     reason=f"Parameterized types with tz={pa_dtype.tz} not supported.",
                 )
             )
-        elif pa.types.is_timestamp(pa_dtype):
+        elif pa.types.is_timestamp(pa_dtype) and pa_dtype.unit in ("us", "ns"):
             request.node.add_marker(
                 pytest.mark.xfail(
                     raises=ValueError,
-                    strict=False,
                     reason="https://github.com/pandas-dev/pandas/issues/49767",
                 )
             )
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index 18eb20de03f03..2ed415b4613ad 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -469,10 +469,10 @@ def test_to_datetime_parse_timezone_keeps_name(self):
 class TestToDatetime:
     def test_to_datetime_mixed_datetime_and_string(self):
         # GH#47018 adapted old doctest with new behavior
-        d1 = datetime(2020, 1, 1, 17, tzinfo=pytz.FixedOffset(-60))
-        d2 = datetime(2020, 1, 1, 18, tzinfo=pytz.FixedOffset(-60))
-        res = to_datetime(["2020-01-01 17:00 -0100", d2])
-        expected = to_datetime([d1, d2])
+        d1 = datetime(2020, 1, 1, 17, tzinfo=timezone(-timedelta(hours=1)))
+        d2 = datetime(2020, 1, 1, 18, tzinfo=timezone(-timedelta(hours=1)))
+        res = to_datetime(["2020-01-01 17:00:00-01:00", d2])
+        expected = to_datetime([d1, d2]).tz_convert(pytz.FixedOffset(-60))
         tm.assert_index_equal(res, expected)
 
     @pytest.mark.parametrize(
@@ -1335,10 +1335,7 @@ def test_mixed_offsets_with_native_datetime_raises(self):
         tm.assert_series_equal(mixed, expected)
 
         with pytest.raises(ValueError, match="Tz-aware datetime.datetime"):
-            with tm.assert_produces_warning(
-                UserWarning, match="Could not infer format"
-            ):
-                to_datetime(mixed)
+            to_datetime(mixed)
 
     def test_non_iso_strings_with_tz_offset(self):
         result = to_datetime(["March 1, 2018 12:00:00+0400"] * 2)
@@ -2304,13 +2301,11 @@ def test_to_datetime_infer_datetime_format_consistent_format(
         s_as_dt_strings = ser.apply(lambda x: x.strftime(test_format))
 
         with_format = to_datetime(s_as_dt_strings, format=test_format, cache=cache)
-        no_infer = to_datetime(s_as_dt_strings, cache=cache)
-        yes_infer = to_datetime(s_as_dt_strings, cache=cache)
+        without_format = to_datetime(s_as_dt_strings, cache=cache)
 
-        # Whether the format is explicitly passed, it is inferred, or
+        # Whether the format is explicitly passed, or
         # it is not inferred, the results should all be the same
-        tm.assert_series_equal(with_format, no_infer)
-        tm.assert_series_equal(no_infer, yes_infer)
+        tm.assert_series_equal(with_format, without_format)
 
     @pytest.mark.parametrize(
         "tz_name, offset, warning",

From fd215df755004b3251c8d200b1a85aee526d11fa Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 6 Dec 2022 10:40:21 +0000
Subject: [PATCH 23/34] lint

---
 pandas/_libs/tslibs/parsing.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
index c0efb48562780..c525ed6ba496e 100644
--- a/pandas/_libs/tslibs/parsing.pyx
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -1075,8 +1075,8 @@ cdef str _fill_token(token: str, padding: int):
 cdef void _maybe_warn_about_dayfirst(format: str, bint dayfirst):
     """Warn if guessed datetime format doesn't respect dayfirst argument."""
     cdef:
-        int day_index = format.find('%d')
-        int month_index = format.find('%m')
+        int day_index = format.find("%d")
+        int month_index = format.find("%m")
 
     if (day_index != -1) and (month_index != -1):
         if (day_index > month_index) and dayfirst:

From 0a5c466381ce5f9748a8ed1401f097c49c260d7f Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 6 Dec 2022 10:49:12 +0000
Subject: [PATCH 24/34] put tests back

---
 pandas/tests/tools/test_to_datetime.py | 48 ++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index 2ed415b4613ad..33e45932544d6 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -2307,6 +2307,54 @@ def test_to_datetime_infer_datetime_format_consistent_format(
         # it is not inferred, the results should all be the same
         tm.assert_series_equal(with_format, without_format)
 
+    def test_to_datetime_inconsistent_format(self, cache):
+        data = ["01/01/2011 00:00:00", "01-02-2011 00:00:00", "2011-01-03T00:00:00"]
+        ser = Series(np.array(data))
+        with pytest.raises(ValueError, match="does not match format"):
+            to_datetime(ser, cache=cache)
+
+    def test_to_datetime_consistent_format(self, cache):
+        data = ["Jan/01/2011", "Feb/01/2011", "Mar/01/2011"]
+        ser = Series(np.array(data))
+        result = to_datetime(ser, cache=cache)
+        expected = Series(
+            ["2011-01-01", "2011-02-01", "2011-03-01"], dtype="datetime64[ns]"
+        )
+        tm.assert_series_equal(result, expected)
+
+    def test_to_datetime_series_with_nans(self, cache):
+        ser = Series(
+            np.array(
+                ["01/01/2011 00:00:00", np.nan, "01/03/2011 00:00:00", np.nan],
+                dtype=object,
+            )
+        )
+        result = to_datetime(ser, cache=cache)
+        expected = Series(
+            ["2011-01-01", NaT, "2011-01-03", NaT], dtype="datetime64[ns]"
+        )
+        tm.assert_series_equal(result, expected)
+
+    def test_to_datetime_series_start_with_nans(self, cache):
+        ser = Series(
+            np.array(
+                [
+                    np.nan,
+                    np.nan,
+                    "01/01/2011 00:00:00",
+                    "01/02/2011 00:00:00",
+                    "01/03/2011 00:00:00",
+                ],
+                dtype=object,
+            )
+        )
+
+        result = to_datetime(ser, cache=cache)
+        expected = Series(
+            [NaT, NaT, "2011-01-01", "2011-01-02", "2011-01-03"], dtype="datetime64[ns]"
+        )
+        tm.assert_series_equal(result, expected)
+
     @pytest.mark.parametrize(
         "tz_name, offset, warning",
         [("UTC", 0, None), ("UTC-3", 180, UserWarning), ("UTC+3", -180, UserWarning)],

From 772dd6c457e8a656abc928dac447c39cb3b15abd Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 6 Dec 2022 11:28:34 +0000
Subject: [PATCH 25/34] shorten diff

---
 doc/source/user_guide/timeseries.rst | 2 +-
 pandas/core/tools/datetimes.py       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
index 197a3e19b94f5..74536eb975e70 100644
--- a/doc/source/user_guide/timeseries.rst
+++ b/doc/source/user_guide/timeseries.rst
@@ -766,7 +766,7 @@ partially matching dates:
    rng2 = pd.date_range("2011-01-01", "2012-01-01", freq="W")
    ts2 = pd.Series(np.random.randn(len(rng2)), index=rng2)
 
-   ts2.truncate(before="2011-11-01", after="2011-12-01")
+   ts2.truncate(before="2011-11", after="2011-12")
    ts2["2011-11":"2011-12"]
 
 Even complicated fancy indexing that breaks the ``DatetimeIndex`` frequency
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 3064dcd7ac7ad..c85b2f4987834 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -1017,7 +1017,7 @@ def to_datetime(
 
     - Inputs can contain both string or datetime, the above
       rules still apply
-    >>> from datetime import timezone, timedelta
+
     >>> pd.to_datetime(['2018-10-26 12:00', datetime(2020, 1, 1, 18)], utc=True)
     DatetimeIndex(['2018-10-26 12:00:00+00:00', '2020-01-01 18:00:00+00:00'],
                   dtype='datetime64[ns, UTC]', freq=None)

From b49b7cf186b889030fce5772c9d9bb06a66060ef Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 6 Dec 2022 13:39:39 +0000
Subject: [PATCH 26/34] add example of string which cannot be guessed

---
 doc/source/user_guide/io.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index 81e7a7e3e9abf..0d27dda3bb8ff 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -984,8 +984,9 @@ Note that format inference is sensitive to ``dayfirst``.  With
 
 If you try to parse a column of date strings, pandas will attempt to guess the format
 from the first non-NaN element, and will then parse the rest of the column with that
-format. If pandas fails to guess the format, then a warning will be raised, and each
-row will have its format guessed individually by ``dateutil.parser.parse``. The safest
+format. If pandas fails to guess the format (for example if your first string is
+``'01 December US/Pacific 2000'``), then a warning will be raised and each
+row will be parsed individually by ``dateutil.parser.parse``. The safest
 way to parse dates is to explicitly set ``format=``.
 
 .. ipython:: python

From d17d8195fa6a3a5edbe546ed0445b9d3a9089c60 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 6 Dec 2022 16:30:48 +0000
Subject: [PATCH 27/34] add deprecated directive, construct expected
 explicitly, explicit UserWarning, reword row-wise and column-wise

---
 doc/source/whatsnew/v2.0.0.rst         |  2 +-
 pandas/core/tools/datetimes.py         |  8 ++++++--
 pandas/io/parsers/readers.py           |  9 +++++++++
 pandas/tests/tools/test_to_datetime.py | 12 ++++++++----
 4 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index a09828fe24275..30efed2ecdb64 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -345,7 +345,7 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor
 Datetimes are now parsed with a consistent format
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-In the past, :func:`to_datetime` guessed the format for each element independently. This was appropriate for some cases where a column had a mixed date format - however, it would regularly cause problems for columns where users expected a consistent format but the function would switch formats row-wise. As of version 2.0.0, this behavior is consistent column-wise, and the format is determined by the first non-NA value in the column (unless the user specifies a format, in which case that is used).
+In the past, :func:`to_datetime` guessed the format for each element independently. This was appropriate for some cases where elements had mixed date formats - however, it would regularly cause problems when users expected a consistent format but the function would switch formats between elements. As of version 2.0.0, parsing will use a consistent format, determined by the first non-NA value (unless the user specifies a format, in which case that is used).
 
 *Old behavior*:
 
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index c85b2f4987834..27f58aab93f87 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -139,6 +139,7 @@ def _guess_datetime_format_for_array(arr, dayfirst: bool | None = False) -> str
             warnings.warn(
                 "Could not infer format - "
                 "to ensure consistent parsing, specify a format.",
+                UserWarning,
                 stacklevel=find_stack_level(),
             )
     return None
@@ -371,8 +372,6 @@ def _convert_listlike_datetimes(
         None or string of the frequency of the passed data
     errors : str
         error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore'
-    infer_datetime_format : bool, default False
-        inferring format behavior from to_datetime
     dayfirst : bool
         dayfirst parsing behavior from to_datetime
     yearfirst : bool
@@ -804,6 +803,11 @@ def to_datetime(
         of the datetime strings based on the first non-NaN element,
         and if it can be inferred, switch to a faster method of parsing them.
         In some cases this can increase the parsing speed by ~5-10x.
+
+        .. deprecated:: 2.0.0
+            A strict version of this argument is now the default, passing it has
+            no effect.
+
     origin : scalar, default 'unix'
         Define the reference date. The numeric values would be parsed as number
         of units (defined by `unit`) since this reference date.
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index 67c9391704c0d..c73cdcf1f847c 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -242,6 +242,15 @@
     :ref:`io.csv.mixed_timezones` for more.
 
     Note: A fast-path exists for iso8601-formatted dates.
+infer_datetime_format : bool, default False
+    If True and `parse_dates` is enabled, pandas will attempt to infer the
+    format of the datetime strings in the columns, and if it can be inferred,
+    switch to a faster method of parsing them. In some cases this can increase
+    the parsing speed by 5-10x.
+
+    .. deprecated:: 2.0.0
+        A strict version of this argument is now the default, passing it has no effect.
+
 keep_date_col : bool, default False
     If True and `parse_dates` specifies combining multiple columns then
     keep the original columns.
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index 33e45932544d6..a0d66942de533 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -469,10 +469,14 @@ def test_to_datetime_parse_timezone_keeps_name(self):
 class TestToDatetime:
     def test_to_datetime_mixed_datetime_and_string(self):
         # GH#47018 adapted old doctest with new behavior
-        d1 = datetime(2020, 1, 1, 17, tzinfo=timezone(-timedelta(hours=1)))
-        d2 = datetime(2020, 1, 1, 18, tzinfo=timezone(-timedelta(hours=1)))
-        res = to_datetime(["2020-01-01 17:00:00-01:00", d2])
-        expected = to_datetime([d1, d2]).tz_convert(pytz.FixedOffset(-60))
+        py_dt = datetime(2020, 1, 1, 18, tzinfo=timezone(-timedelta(hours=1)))
+        res = to_datetime(["2020-01-01 17:00 -0100", py_dt])
+        expected = Index(
+            [
+                Timestamp("2020-01-01 17:00:00-0100", tz=pytz.FixedOffset(-60)),
+                Timestamp("2020-01-01 18:00:00-0100", tz="UTC-01:00"),
+            ],
+        )
         tm.assert_index_equal(res, expected)
 
     @pytest.mark.parametrize(

From f4520e9816ebda411a8ec813f450356745fefc4c Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 6 Dec 2022 17:26:36 +0000
Subject: [PATCH 28/34] remove redundant example

---
 doc/source/user_guide/timeseries.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
index 74536eb975e70..1b7acc12f0dcb 100644
--- a/doc/source/user_guide/timeseries.rst
+++ b/doc/source/user_guide/timeseries.rst
@@ -208,7 +208,6 @@ you can pass the ``dayfirst`` flag:
 .. ipython:: python
     :okwarning:
 
-    pd.to_datetime(["04-01-2012 10:00"], dayfirst=True)
     pd.to_datetime(["04-14-2012 10:00"], dayfirst=True)
 
 .. warning::

From fcb515f8daa61c2c501a3de6cd475dd82b5e6d22 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 6 Dec 2022 17:34:33 +0000
Subject: [PATCH 29/34] restore newline

---
 doc/source/user_guide/timeseries.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
index 1b7acc12f0dcb..6f9fec0ff81b9 100644
--- a/doc/source/user_guide/timeseries.rst
+++ b/doc/source/user_guide/timeseries.rst
@@ -208,6 +208,8 @@ you can pass the ``dayfirst`` flag:
 .. ipython:: python
     :okwarning:
 
+    pd.to_datetime(["04-01-2012 10:00"], dayfirst=True)
+
     pd.to_datetime(["04-14-2012 10:00"], dayfirst=True)
 
 .. warning::

From 22156529156f542d8bc8c6fc7545976d3ff24c0a Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Fri, 9 Dec 2022 08:45:05 +0000
Subject: [PATCH 30/34] double backticks around False, explicitly raise
 UserWarning

---
 doc/source/user_guide/timeseries.rst | 2 +-
 pandas/_libs/tslibs/parsing.pyx      | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
index 6f9fec0ff81b9..7e1368061322b 100644
--- a/doc/source/user_guide/timeseries.rst
+++ b/doc/source/user_guide/timeseries.rst
@@ -216,7 +216,7 @@ you can pass the ``dayfirst`` flag:
 
    You see in the above example that ``dayfirst`` isn't strict. If a date
    can't be parsed with the day being first it will be parsed as if
-   ``dayfirst`` were False and a warning will also be raised.
+   ``dayfirst`` were ``False`` and a warning will also be raised.
 
 If you pass a single string to ``to_datetime``, it returns a single ``Timestamp``.
 ``Timestamp`` can also accept string input, but it doesn't accept string parsing
diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
index cefab6256ce5d..1d5916a98922a 100644
--- a/pandas/_libs/tslibs/parsing.pyx
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -1083,12 +1083,14 @@ cdef void _maybe_warn_about_dayfirst(format: str, bint dayfirst):
             warnings.warn(
                 f"Parsing dates in {format} format when dayfirst=True was specified. "
                 "Pass `dayfirst=False` or specify a format to silence this warning.",
+                UserWarning,
                 stacklevel=find_stack_level(),
             )
         if (day_index < month_index) and not dayfirst:
             warnings.warn(
                 f"Parsing dates in {format} format when dayfirst=False was specified. "
                 "Pass `dayfirst=True` or specify a format to silence this warning.",
+                UserWarning,
                 stacklevel=find_stack_level(),
             )
 

From 7d11f593b9ea863b5ba4750fb677299ab2aea8ca Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Mon, 12 Dec 2022 17:47:18 +0000
Subject: [PATCH 31/34] reword warning

---
 pandas/core/tools/datetimes.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 9138bfed5679a..2b63836119895 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -137,8 +137,9 @@ def _guess_datetime_format_for_array(arr, dayfirst: bool | None = False) -> str
             if guessed_format is not None:
                 return guessed_format
             warnings.warn(
-                "Could not infer format - "
-                "to ensure consistent parsing, specify a format.",
+                "Could not infer format, so each element will be parsed "
+                "individually by `dateutil`. To ensure parsing is "
+                "consistent and as-expected, please specify a format.",
                 UserWarning,
                 stacklevel=find_stack_level(),
             )

From f0ac4585f623a41299a15ca74c7930563ee08286 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Mon, 12 Dec 2022 18:34:49 +0000
Subject: [PATCH 32/34] test both dayfirst True and False

---
 pandas/tests/tslibs/test_parsing.py | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py
index 4b6f899f1fbae..a4c79e77d2eed 100644
--- a/pandas/tests/tslibs/test_parsing.py
+++ b/pandas/tests/tslibs/test_parsing.py
@@ -238,19 +238,30 @@ def test_guess_datetime_format_wrong_type_inputs(invalid_type_dt):
 
 
 @pytest.mark.parametrize(
-    "string,fmt,dayfirst",
+    "string,fmt,dayfirst,warning",
     [
-        ("2011-1-1", "%Y-%m-%d", False),
-        ("1/1/2011", "%m/%d/%Y", False),
-        ("30-1-2011", "%d-%m-%Y", True),
-        ("2011-1-1 0:0:0", "%Y-%m-%d %H:%M:%S", False),
-        ("2011-1-3T00:00:0", "%Y-%m-%dT%H:%M:%S", False),
-        ("2011-1-1 00:00:00", "%Y-%m-%d %H:%M:%S", False),
+        ("2011-1-1", "%Y-%m-%d", False, None),
+        ("2011-1-1", "%Y-%d-%m", True, None),
+        ("1/1/2011", "%m/%d/%Y", False, None),
+        ("1/1/2011", "%d/%m/%Y", True, None),
+        ("30-1-2011", "%d-%m-%Y", False, UserWarning),
+        ("30-1-2011", "%d-%m-%Y", True, None),
+        ("2011-1-1 0:0:0", "%Y-%m-%d %H:%M:%S", False, None),
+        ("2011-1-1 0:0:0", "%Y-%d-%m %H:%M:%S", True, None),
+        ("2011-1-3T00:00:0", "%Y-%m-%dT%H:%M:%S", False, None),
+        ("2011-1-3T00:00:0", "%Y-%d-%mT%H:%M:%S", True, None),
+        ("2011-1-1 00:00:00", "%Y-%m-%d %H:%M:%S", False, None),
+        ("2011-1-1 00:00:00", "%Y-%d-%m %H:%M:%S", True, None),
     ],
 )
-def test_guess_datetime_format_no_padding(string, fmt, dayfirst):
+def test_guess_datetime_format_no_padding(string, fmt, dayfirst, warning):
     # see gh-11142
-    result = parsing.guess_datetime_format(string, dayfirst=dayfirst)
+    msg = (
+        f"Parsing dates in {fmt} format when dayfirst=False was specified. "
+        "Pass `dayfirst=True` or specify a format to silence this warning."
+    )
+    with tm.assert_produces_warning(warning, match=msg):
+        result = parsing.guess_datetime_format(string, dayfirst=dayfirst)
     assert result == fmt
 
 

From 4a5dd1cdb927fb2279e7e5033a3bb33a6d8fa7ad Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 13 Dec 2022 08:24:08 +0000
Subject: [PATCH 33/34] postmerge fixup

---
 pandas/io/parsers/readers.py           | 2 --
 pandas/tests/tools/test_to_datetime.py | 3 ++-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index c73cdcf1f847c..96c2fd08bbc59 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -1749,8 +1749,6 @@ def TextParser(*args, **kwds) -> TextFileReader:
         transformed content.
     encoding : str, optional
         Encoding to use for UTF when reading/writing (ex. 'utf-8')
-    squeeze : bool, default False
-        returns Series if only one column.
     float_precision : str, optional
         Specifies which converter the C engine should use for floating-point
         values. The options are `None` or `high` for the ordinary converter,
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index f4cf069046b1d..1ad9324e93406 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -2387,7 +2387,8 @@ def test_to_datetime_series_start_with_nans(self, cache):
     def test_infer_datetime_format_tz_name(self, tz_name, offset, warning):
         # GH 33133
         ser = Series([f"2019-02-02 08:07:13 {tz_name}"])
-        result = to_datetime(ser)
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            result = to_datetime(ser)
         tz = timezone(timedelta(minutes=offset))
         expected = Series([Timestamp("2019-02-02 08:07:13").tz_localize(tz)])
         tm.assert_series_equal(result, expected)

From 917b31b85c2be25b16269a53aa2815ee949ab04c Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 13 Dec 2022 09:05:14 +0000
Subject: [PATCH 34/34] unimportant typo to restart CI

---
 pandas/tests/tools/test_to_datetime.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index 1ad9324e93406..48844beed30f4 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -2329,7 +2329,7 @@ def test_to_datetime_infer_datetime_format_consistent_format(
         without_format = to_datetime(s_as_dt_strings, cache=cache)
 
         # Whether the format is explicitly passed, or
-        # it is not inferred, the results should all be the same
+        # it is inferred, the results should all be the same
         tm.assert_series_equal(with_format, without_format)
 
     def test_to_datetime_inconsistent_format(self, cache):