From 1104a9276b40233710d5ba8a9d790e57d4cdcd4f Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 18 Oct 2022 08:41:08 +0100
Subject: [PATCH 01/12] :wastebasket: deprecate infer_datetime_format, make
 strict

---
 pandas/core/tools/datetimes.py   | 92 ++++++++++++--------------------
 pandas/io/parsers/base_parser.py |  5 --
 pandas/io/parsers/readers.py     | 39 +++++++-------
 3 files changed, 53 insertions(+), 83 deletions(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 7791ea804a52a..5760952ba7324 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -19,7 +19,10 @@
 
 import numpy as np
 
-from pandas._libs import tslib
+from pandas._libs import (
+    lib,
+    tslib,
+)
 from pandas._libs.tslibs import (
     OutOfBoundsDatetime,
     Timedelta,
@@ -331,7 +334,6 @@ def _convert_listlike_datetimes(
     tz: Timezone | None = None,
     unit: str | None = None,
     errors: DateTimeErrorChoices = "raise",
-    infer_datetime_format: bool = False,
     dayfirst: bool | None = None,
     yearfirst: bool | None = None,
     exact: bool = True,
@@ -415,27 +417,19 @@ def _convert_listlike_datetimes(
     arg = ensure_object(arg)
     require_iso8601 = False
 
-    if infer_datetime_format and format is None:
+    if format is None:
         format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)
 
+    # There is a special fast-path for iso8601 formatted
+    # datetime strings, so in those cases don't use the inferred
+    # format because this path makes process slower in this
+    # special case
+    if format is not None and format_is_iso(format):
+        require_iso8601 = True
+        format = None
     if format is not None:
-        # There is a special fast-path for iso8601 formatted
-        # datetime strings, so in those cases don't use the inferred
-        # format because this path makes process slower in this
-        # special case
-        format_is_iso8601 = format_is_iso(format)
-        if format_is_iso8601:
-            require_iso8601 = not infer_datetime_format
-            format = None
-
-    if format is not None:
-        res = _to_datetime_with_format(
-            arg, orig_arg, name, tz, format, exact, errors, infer_datetime_format
-        )
-        if res is not None:
-            return res
+        return _to_datetime_with_format(arg, orig_arg, name, tz, format, exact, errors)
 
-    assert format is None or infer_datetime_format
     utc = tz == "utc"
     result, tz_parsed = objects_to_datetime64ns(
         arg,
@@ -464,8 +458,7 @@ def _array_strptime_with_fallback(
     fmt: str,
     exact: bool,
     errors: str,
-    infer_datetime_format: bool,
-) -> Index | None:
+) -> Index:
     """
     Call array_strptime, with fallback behavior depending on 'errors'.
     """
@@ -486,18 +479,14 @@ def _array_strptime_with_fallback(
         # if fmt was inferred, try falling back
         # to array_to_datetime - terminate here
         # for specified formats
-        if not infer_datetime_format:
-            if errors == "raise":
-                raise
-            elif errors == "coerce":
-                result = np.empty(arg.shape, dtype="M8[ns]")
-                iresult = result.view("i8")
-                iresult.fill(iNaT)
-            else:
-                result = arg
+        if errors == "raise":
+            raise
+        elif errors == "coerce":
+            result = np.empty(arg.shape, dtype="M8[ns]")
+            iresult = result.view("i8")
+            iresult.fill(iNaT)
         else:
-            # Indicates to the caller to fallback to objects_to_datetime64ns
-            return None
+            result = arg
     else:
         if "%Z" in fmt or "%z" in fmt:
             return _return_parsed_timezone_results(result, timezones, tz, name)
@@ -513,10 +502,9 @@ def _to_datetime_with_format(
     fmt: str,
     exact: bool,
     errors: str,
-    infer_datetime_format: bool,
-) -> Index | None:
+) -> Index:
     """
-    Try parsing with the given format, returning None on failure.
+    Try parsing with the given format.
     """
     result = None
 
@@ -537,9 +525,7 @@ def _to_datetime_with_format(
             return _box_as_indexlike(result, utc=utc, name=name)
 
     # fallback
-    res = _array_strptime_with_fallback(
-        arg, name, tz, fmt, exact, errors, infer_datetime_format
-    )
+    res = _array_strptime_with_fallback(arg, name, tz, fmt, exact, errors)
     return res
 
 
@@ -713,7 +699,7 @@ def to_datetime(
     format: str | None = None,
     exact: bool = True,
     unit: str | None = None,
-    infer_datetime_format: bool = False,
+    infer_datetime_format: lib.NoDefault | bool = lib.no_default,
     origin: str = "unix",
     cache: bool = True,
 ) -> DatetimeIndex | Series | DatetimeScalar | NaTType | None:
@@ -926,24 +912,6 @@ def to_datetime(
     1   2016-03-05
     dtype: datetime64[ns]
 
-    Passing ``infer_datetime_format=True`` can often-times speedup a parsing
-    if its not an ISO8601 format exactly, but in a regular format.
-
-    >>> s = pd.Series(['3/11/2000', '3/12/2000', '3/13/2000'] * 1000)
-    >>> s.head()
-    0    3/11/2000
-    1    3/12/2000
-    2    3/13/2000
-    3    3/11/2000
-    4    3/12/2000
-    dtype: object
-
-    >>> %timeit pd.to_datetime(s, infer_datetime_format=True)  # doctest: +SKIP
-    100 loops, best of 3: 10.4 ms per loop
-
-    >>> %timeit pd.to_datetime(s, infer_datetime_format=False)  # doctest: +SKIP
-    1 loop, best of 3: 471 ms per loop
-
     Using a unix epoch time
 
     >>> pd.to_datetime(1490195805, unit='s')
@@ -1060,6 +1028,15 @@ def to_datetime(
                    '2020-01-01 18:00:00+00:00', '2020-01-01 19:00:00+00:00'],
                   dtype='datetime64[ns, UTC]', freq=None)
     """
+    if infer_datetime_format is not lib.no_default:
+        warnings.warn(
+            "The argument 'infer_datetime_format' is deprecated and will "
+            "be removed in a future version. "
+            "A strict version of it is now the default, see "
+            "https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. "
+            "You can safely remove this argument.",
+            stacklevel=find_stack_level(),
+        )
     if arg is None:
         return None
 
@@ -1075,7 +1052,6 @@ def to_datetime(
         yearfirst=yearfirst,
         errors=errors,
         exact=exact,
-        infer_datetime_format=infer_datetime_format,
     )
 
     result: Timestamp | NaTType | Series | Index
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index 45f6469a31f4f..5080c15153ced 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -122,13 +122,11 @@ def __init__(self, kwds) -> None:
         self.true_values = kwds.get("true_values")
         self.false_values = kwds.get("false_values")
         self.mangle_dupe_cols = kwds.get("mangle_dupe_cols", True)
-        self.infer_datetime_format = kwds.pop("infer_datetime_format", False)
         self.cache_dates = kwds.pop("cache_dates", True)
 
         self._date_conv = _make_date_converter(
             date_parser=self.date_parser,
             dayfirst=self.dayfirst,
-            infer_datetime_format=self.infer_datetime_format,
             cache_dates=self.cache_dates,
         )
 
@@ -1105,7 +1103,6 @@ def _get_empty_meta(
 def _make_date_converter(
     date_parser=None,
     dayfirst: bool = False,
-    infer_datetime_format: bool = False,
     cache_dates: bool = True,
 ):
     def converter(*date_cols):
@@ -1118,7 +1115,6 @@ def converter(*date_cols):
                     utc=None,
                     dayfirst=dayfirst,
                     errors="ignore",
-                    infer_datetime_format=infer_datetime_format,
                     cache=cache_dates,
                 ).to_numpy()
 
@@ -1188,7 +1184,6 @@ def converter(*date_cols):
     "squeeze": None,
     "compression": None,
     "mangle_dupe_cols": True,
-    "infer_datetime_format": False,
     "skip_blank_lines": True,
     "encoding_errors": "strict",
     "on_bad_lines": ParserBase.BadLineHandleMethod.ERROR,
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index c1698c68ce465..6ed73bd1de1e8 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -262,11 +262,6 @@
     :ref:`io.csv.mixed_timezones` for more.
 
     Note: A fast-path exists for iso8601-formatted dates.
-infer_datetime_format : bool, default False
-    If True and `parse_dates` is enabled, pandas will attempt to infer the
-    format of the datetime strings in the columns, and if it can be inferred,
-    switch to a faster method of parsing them. In some cases this can increase
-    the parsing speed by 5-10x.
 keep_date_col : bool, default False
     If True and `parse_dates` specifies combining multiple columns then
     keep the original columns.
@@ -483,7 +478,6 @@
     "decimal",
     "iterator",
     "dayfirst",
-    "infer_datetime_format",
     "verbose",
     "skipinitialspace",
     "low_memory",
@@ -648,7 +642,7 @@ def read_csv(
     verbose: bool = ...,
     skip_blank_lines: bool = ...,
     parse_dates: bool | Sequence[Hashable] | None = ...,
-    infer_datetime_format: bool = ...,
+    infer_datetime_format: bool | lib.NoDefault = ...,
     keep_date_col: bool = ...,
     date_parser=...,
     dayfirst: bool = ...,
@@ -709,7 +703,7 @@ def read_csv(
     verbose: bool = ...,
     skip_blank_lines: bool = ...,
     parse_dates: bool | Sequence[Hashable] | None = ...,
-    infer_datetime_format: bool = ...,
+    infer_datetime_format: bool | lib.NoDefault = ...,
     keep_date_col: bool = ...,
     date_parser=...,
     dayfirst: bool = ...,
@@ -770,7 +764,7 @@ def read_csv(
     verbose: bool = ...,
     skip_blank_lines: bool = ...,
     parse_dates: bool | Sequence[Hashable] | None = ...,
-    infer_datetime_format: bool = ...,
+    infer_datetime_format: bool | lib.NoDefault = ...,
     keep_date_col: bool = ...,
     date_parser=...,
     dayfirst: bool = ...,
@@ -831,7 +825,7 @@ def read_csv(
     verbose: bool = ...,
     skip_blank_lines: bool = ...,
     parse_dates: bool | Sequence[Hashable] | None = ...,
-    infer_datetime_format: bool = ...,
+    infer_datetime_format: bool | lib.NoDefault = ...,
     keep_date_col: bool = ...,
     date_parser=...,
     dayfirst: bool = ...,
@@ -905,7 +899,7 @@ def read_csv(
     skip_blank_lines: bool = True,
     # Datetime Handling
     parse_dates: bool | Sequence[Hashable] | None = None,
-    infer_datetime_format: bool = False,
+    infer_datetime_format: bool | lib.NoDefault = lib.no_default,
     keep_date_col: bool = False,
     date_parser=None,
     dayfirst: bool = False,
@@ -940,6 +934,15 @@ def read_csv(
     storage_options: StorageOptions = None,
     use_nullable_dtypes: bool = False,
 ) -> DataFrame | TextFileReader:
+    if infer_datetime_format is not lib.no_default:
+        warnings.warn(
+            "The argument 'infer_datetime_format' is deprecated and will "
+            "be removed in a future version. "
+            "A strict version of it is now the default, see "
+            "https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. "
+            "You can safely remove this argument.",
+            stacklevel=find_stack_level(),
+        )
     # locals() should never be modified
     kwds = locals().copy()
     del kwds["filepath_or_buffer"]
@@ -992,7 +995,7 @@ def read_table(
     verbose: bool = ...,
     skip_blank_lines: bool = ...,
     parse_dates: bool | Sequence[Hashable] = ...,
-    infer_datetime_format: bool = ...,
+    infer_datetime_format: bool | lib.NoDefault = ...,
     keep_date_col: bool = ...,
     date_parser=...,
     dayfirst: bool = ...,
@@ -1053,7 +1056,7 @@ def read_table(
     verbose: bool = ...,
     skip_blank_lines: bool = ...,
     parse_dates: bool | Sequence[Hashable] = ...,
-    infer_datetime_format: bool = ...,
+    infer_datetime_format: bool | lib.NoDefault = ...,
     keep_date_col: bool = ...,
     date_parser=...,
     dayfirst: bool = ...,
@@ -1114,7 +1117,7 @@ def read_table(
     verbose: bool = ...,
     skip_blank_lines: bool = ...,
     parse_dates: bool | Sequence[Hashable] = ...,
-    infer_datetime_format: bool = ...,
+    infer_datetime_format: bool | lib.NoDefault = ...,
     keep_date_col: bool = ...,
     date_parser=...,
     dayfirst: bool = ...,
@@ -1175,7 +1178,7 @@ def read_table(
     verbose: bool = ...,
     skip_blank_lines: bool = ...,
     parse_dates: bool | Sequence[Hashable] = ...,
-    infer_datetime_format: bool = ...,
+    infer_datetime_format: bool | lib.NoDefault = ...,
     keep_date_col: bool = ...,
     date_parser=...,
     dayfirst: bool = ...,
@@ -1249,7 +1252,7 @@ def read_table(
     skip_blank_lines: bool = True,
     # Datetime Handling
     parse_dates: bool | Sequence[Hashable] = False,
-    infer_datetime_format: bool = False,
+    infer_datetime_format: bool | lib.NoDefault = lib.no_default,
     keep_date_col: bool = False,
     date_parser=None,
     dayfirst: bool = False,
@@ -1883,10 +1886,6 @@ def TextParser(*args, **kwds) -> TextFileReader:
         Encoding to use for UTF when reading/writing (ex. 'utf-8')
     squeeze : bool, default False
         returns Series if only one column.
-    infer_datetime_format: bool, default False
-        If True and `parse_dates` is True for a column, try to infer the
-        datetime format based on the first datetime string. If the format
-        can be inferred, there often will be a large parsing speed-up.
     float_precision : str, optional
         Specifies which converter the C engine should use for floating-point
         values. The options are `None` or `high` for the ordinary converter,

From ab78002b251961f393a76f542e23582d72a6d309 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 18 Oct 2022 08:43:02 +0100
Subject: [PATCH 02/12] :rotating_light: add warning about dayfirst

---
 pandas/_libs/tslibs/parsing.pyx | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
index 5c93edfee79f2..74de3502b73de 100644
--- a/pandas/_libs/tslibs/parsing.pyx
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -1088,6 +1088,7 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None:
     # rebuild string, capturing any inferred padding
     dt_str = ''.join(tokens)
     if parsed_datetime.strftime(guessed_format) == dt_str:
+        _maybe_warn_about_dayfirst(guessed_format, dayfirst)
         return guessed_format
     else:
         return None
@@ -1106,6 +1107,26 @@ cdef str _fill_token(token: str, padding: int):
         token_filled = f'{seconds}.{nanoseconds}'
     return token_filled
 
+cdef void _maybe_warn_about_dayfirst(format: str, bint dayfirst):
+    """Warn if guessed datetime format doesn't respect dayfirst argument."""
+    cdef:
+        int day_index = format.find('%d')
+        int month_index = format.find('%m')
+
+    if (day_index != -1) and (month_index != -1):
+        if (day_index > month_index) and dayfirst:
+            warnings.warn(
+                f"Parsing dates in {format} format when dayfirst=True was specified. "
+                f"Pass `dayfirst=False` or specify a format to silence this warning.",
+                stacklevel=find_stack_level(),
+            )
+        if (day_index < month_index) and not dayfirst:
+            warnings.warn(
+                f"Parsing dates in {format} format when dayfirst=False was specified. "
+                f"Pass `dayfirst=True` or specify a format to silence this warning.",
+                stacklevel=find_stack_level(),
+            )
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
 cdef inline object convert_to_unicode(object item, bint keep_trivial_numbers):

From d1cdfd29076044beee8d2dac22dd0a5af7678129 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 18 Oct 2022 08:48:37 +0100
Subject: [PATCH 03/12] :white_check_mark: add/update tests

---
 pandas/tests/apply/test_frame_apply.py        |   3 +-
 pandas/tests/frame/methods/test_drop.py       |  10 +-
 pandas/tests/frame/methods/test_to_csv.py     |  10 +-
 .../indexes/datetimes/test_constructors.py    |  12 +-
 pandas/tests/indexes/test_base.py             |  12 +-
 .../io/parser/common/test_common_basic.py     |   4 +-
 pandas/tests/io/parser/test_parse_dates.py    | 101 ++++--------
 .../io/parser/usecols/test_parse_dates.py     |   8 +-
 pandas/tests/io/test_sql.py                   |   4 +-
 pandas/tests/io/xml/test_xml_dtypes.py        |   2 +-
 pandas/tests/plotting/test_converter.py       |   4 +-
 pandas/tests/series/methods/test_to_csv.py    |   6 +-
 pandas/tests/tools/test_to_datetime.py        | 148 +++++++-----------
 pandas/tests/tslibs/test_parsing.py           |  18 +--
 14 files changed, 138 insertions(+), 204 deletions(-)

diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index 3bcb7d964fad1..28a9871b76985 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -836,7 +836,8 @@ def test_with_dictlike_columns_with_datetime():
     df["author"] = ["X", "Y", "Z"]
     df["publisher"] = ["BBC", "NBC", "N24"]
     df["date"] = pd.to_datetime(
-        ["17-10-2010 07:15:30", "13-05-2011 08:20:35", "15-01-2013 09:09:09"]
+        ["17-10-2010 07:15:30", "13-05-2011 08:20:35", "15-01-2013 09:09:09"],
+        dayfirst=True,
     )
     result = df.apply(lambda x: {}, axis=1)
     expected = Series([{}, {}, {}])
diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py
index 6e5b97af7c297..1b295fd10c9d5 100644
--- a/pandas/tests/frame/methods/test_drop.py
+++ b/pandas/tests/frame/methods/test_drop.py
@@ -405,11 +405,11 @@ def test_drop_level_nonunique_datetime(self):
         idx = Index([2, 3, 4, 4, 5], name="id")
         idxdt = pd.to_datetime(
             [
-                "201603231400",
-                "201603231500",
-                "201603231600",
-                "201603231600",
-                "201603231700",
+                "2016-03-23 14:00",
+                "2016-03-23 15:00",
+                "2016-03-23 16:00",
+                "2016-03-23 16:00",
+                "2016-03-23 17:00",
             ]
         )
         df = DataFrame(np.arange(10).reshape(5, 2), columns=list("ab"), index=idx)
diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
index 1933278efb443..3b4dec8bff7f1 100644
--- a/pandas/tests/frame/methods/test_to_csv.py
+++ b/pandas/tests/frame/methods/test_to_csv.py
@@ -27,7 +27,7 @@
 
 class TestDataFrameToCSV:
     def read_csv(self, path, **kwargs):
-        params = {"index_col": 0, "parse_dates": True}
+        params = {"index_col": 0}
         params.update(**kwargs)
 
         return read_csv(path, **params)
@@ -46,17 +46,17 @@ def test_to_csv_from_csv1(self, float_frame, datetime_frame):
             # freq does not roundtrip
             datetime_frame.index = datetime_frame.index._with_freq(None)
             datetime_frame.to_csv(path)
-            recons = self.read_csv(path)
+            recons = self.read_csv(path, parse_dates=True)
             tm.assert_frame_equal(datetime_frame, recons)
 
             datetime_frame.to_csv(path, index_label="index")
-            recons = self.read_csv(path, index_col=None)
+            recons = self.read_csv(path, index_col=None, parse_dates=True)
 
             assert len(recons.columns) == len(datetime_frame.columns) + 1
 
             # no index
             datetime_frame.to_csv(path, index=False)
-            recons = self.read_csv(path, index_col=None)
+            recons = self.read_csv(path, index_col=None, parse_dates=True)
             tm.assert_almost_equal(datetime_frame.values, recons.values)
 
             # corner case
@@ -1056,7 +1056,7 @@ def test_to_csv_date_format(self, datetime_frame):
 
             # test NaTs
             nat_index = to_datetime(
-                ["NaT"] * 10 + ["2000-01-01", "1/1/2000", "1-1-2000"]
+                ["NaT"] * 10 + ["2000-01-01", "2000-01-01", "2000-01-01"]
             )
             nat_frame = DataFrame({"A": nat_index}, index=nat_index)
             nat_frame.to_csv(path, date_format="%Y-%m-%d")
diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py
index 9914f4357cee4..c1039728f5b5e 100644
--- a/pandas/tests/indexes/datetimes/test_constructors.py
+++ b/pandas/tests/indexes/datetimes/test_constructors.py
@@ -1042,10 +1042,18 @@ def test_datetimeindex_constructor_misc(self):
         arr = np.array(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"], dtype="O")
         idx4 = DatetimeIndex(arr)
 
-        arr = to_datetime(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"])
+        # Can't be parsed consistently, need to parse each element individually
+        arr = [
+            to_datetime(date_string)
+            for date_string in ["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"]
+        ]
         idx5 = DatetimeIndex(arr)
 
-        arr = to_datetime(["1/1/2005", "1/2/2005", "Jan 3, 2005", "2005-01-04"])
+        # Can't be parsed consistently, need to parse each element individually
+        arr = [
+            to_datetime(date_string)
+            for date_string in ["1/1/2005", "1/2/2005", "Jan 3, 2005", "2005-01-04"]
+        ]
         idx6 = DatetimeIndex(arr)
 
         idx7 = DatetimeIndex(["12/05/2007", "25/01/2008"], dayfirst=True)
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index ac76953c66a24..512ce164f40f4 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -1185,10 +1185,16 @@ def test_equals_op_index_vs_mi_same_length(self):
         expected = np.array([False, False, False])
         tm.assert_numpy_array_equal(result, expected)
 
-    @pytest.mark.parametrize("dt_conv", [pd.to_datetime, pd.to_timedelta])
-    def test_dt_conversion_preserves_name(self, dt_conv):
+    @pytest.mark.parametrize(
+        "dt_conv, arg",
+        [
+            (pd.to_datetime, ["2000-01-01", "2000-01-02"]),
+            (pd.to_timedelta, ["01:02:03", "01:02:04"]),
+        ],
+    )
+    def test_dt_conversion_preserves_name(self, dt_conv, arg):
         # GH 10875
-        index = Index(["01:02:03", "01:02:04"], name="label")
+        index = Index(arg, name="label")
         assert index.name == dt_conv(index).name
 
     def test_cached_properties_not_settable(self):
diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py
index 359b059252556..de45b8e9564d0 100644
--- a/pandas/tests/io/parser/common/test_common_basic.py
+++ b/pandas/tests/io/parser/common/test_common_basic.py
@@ -58,8 +58,8 @@ def _set_noconvert_columns(self):
             return CParserWrapper._set_noconvert_columns(self)
 
     data = """a,b,c,d,e
-0,1,20140101,0900,4
-0,1,20140102,1000,4"""
+0,1,2014-01-01,09:00,4
+0,1,2014-01-02,10:00,4"""
 
     parse_dates = [[1, 2]]
     cols = {
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
index 9c8809b6099f9..b8d515a67b7fe 100644
--- a/pandas/tests/io/parser/test_parse_dates.py
+++ b/pandas/tests/io/parser/test_parse_dates.py
@@ -1666,9 +1666,9 @@ def test_parse_delimited_date_swap_no_warning(
 @pytest.mark.parametrize(
     "date_string,dayfirst,expected",
     [
-        # %d/%m/%Y; month > 12 thus replacement
+        # %d/%m/%Y; month > 12
         ("13/02/2019", False, datetime(2019, 2, 13)),
-        # %m/%d/%Y; day > 12 thus there will be no replacement
+        # %m/%d/%Y; day > 12
         ("02/13/2019", True, datetime(2019, 2, 13)),
     ],
 )
@@ -1677,7 +1677,10 @@ def test_parse_delimited_date_swap_with_warning(
 ):
     parser = all_parsers
     expected = DataFrame({0: [expected]}, dtype="datetime64[ns]")
-    warning_msg = "Specify a format to ensure consistent parsing"
+    warning_msg = (
+        "Parsing dates in .* format when dayfirst=.* was specified. "
+        "Pass `dayfirst=.*` or specify a format to silence this warning."
+    )
     result = parser.read_csv_check_warnings(
         UserWarning,
         warning_msg,
@@ -1691,13 +1694,11 @@ def test_parse_delimited_date_swap_with_warning(
 
 def test_parse_multiple_delimited_dates_with_swap_warnings():
     # GH46210
-    warning_msg = "Specify a format to ensure consistent parsing"
-    with tm.assert_produces_warning(UserWarning, match=warning_msg) as record:
+    with pytest.raises(
+        ValueError,
+        match=r"^time data '31/05/2000' does not match format '%m/%d/%Y' \(match\)$",
+    ):
         pd.to_datetime(["01/01/2000", "31/05/2000", "31/05/2001", "01/02/2000"])
-    assert len({str(warning.message) for warning in record}) == 1
-    # Using set(record) as repetitions of the same warning are suppressed
-    # https://docs.python.org/3/library/warnings.html
-    # and here we care to check that the warning is only shows once to users.
 
 
 def _helper_hypothesis_delimited_date(call, date_string, **kwargs):
@@ -1860,97 +1861,51 @@ def test_parse_dates_and_keep_orgin_column(all_parsers):
 
 def test_dayfirst_warnings():
     # GH 12585
-    warning_msg_day_first = (
-        r"Parsing dates in DD/MM/YYYY format when dayfirst=False \(the default\) was "
-        r"specified. This may lead to inconsistently parsed dates! Specify a format "
-        r"to ensure consistent parsing."
-    )
-    warning_msg_month_first = (
-        "Parsing dates in MM/DD/YYYY format when dayfirst=True was "
-        "specified. This may lead to inconsistently parsed dates! Specify a format "
-        "to ensure consistent parsing."
-    )
 
     # CASE 1: valid input
     input = "date\n31/12/2014\n10/03/2011"
-    expected_consistent = DatetimeIndex(
+    expected = DatetimeIndex(
         ["2014-12-31", "2011-03-10"], dtype="datetime64[ns]", freq=None, name="date"
     )
-    expected_inconsistent = DatetimeIndex(
-        ["2014-12-31", "2011-10-03"], dtype="datetime64[ns]", freq=None, name="date"
+    warning_msg = (
+        "Parsing dates in .* format when dayfirst=.* was specified. "
+        "Pass `dayfirst=.*` or specify a format to silence this warning."
     )
 
     # A. dayfirst arg correct, no warning
     res1 = read_csv(
         StringIO(input), parse_dates=["date"], dayfirst=True, index_col="date"
     ).index
-    tm.assert_index_equal(expected_consistent, res1)
+    tm.assert_index_equal(expected, res1)
 
-    # B. dayfirst arg incorrect, warning + incorrect output
-    with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
+    # B. dayfirst arg incorrect, warning
+    with tm.assert_produces_warning(UserWarning, match=warning_msg):
         res2 = read_csv(
             StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date"
         ).index
-    tm.assert_index_equal(expected_inconsistent, res2)
-
-    # C. dayfirst default arg, same as B
-    with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
-        res3 = read_csv(
-            StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date"
-        ).index
-    tm.assert_index_equal(expected_inconsistent, res3)
-
-    # D. infer_datetime_format=True overrides dayfirst default
-    # no warning + correct result
-    res4 = read_csv(
-        StringIO(input),
-        parse_dates=["date"],
-        infer_datetime_format=True,
-        index_col="date",
-    ).index
-    tm.assert_index_equal(expected_consistent, res4)
+    tm.assert_index_equal(expected, res2)
 
     # CASE 2: invalid input
     # cannot consistently process with single format
-    # warnings *always* raised
+    # return to user unaltered
 
     # first in DD/MM/YYYY, second in MM/DD/YYYY
     input = "date\n31/12/2014\n03/30/2011"
-    expected = DatetimeIndex(
-        ["2014-12-31", "2011-03-30"], dtype="datetime64[ns]", freq=None, name="date"
-    )
+    expected = Index(["31/12/2014", "03/30/2011"], dtype="object", name="date")
 
     # A. use dayfirst=True
-    with tm.assert_produces_warning(UserWarning, match=warning_msg_month_first):
-        res5 = read_csv(
-            StringIO(input), parse_dates=["date"], dayfirst=True, index_col="date"
-        ).index
+    res5 = read_csv(
+        StringIO(input), parse_dates=["date"], dayfirst=True, index_col="date"
+    ).index
     tm.assert_index_equal(expected, res5)
 
     # B. use dayfirst=False
-    with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
+    with tm.assert_produces_warning(UserWarning, match=warning_msg):
         res6 = read_csv(
             StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date"
         ).index
     tm.assert_index_equal(expected, res6)
 
-    # C. use dayfirst default arg, same as B
-    with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
-        res7 = read_csv(
-            StringIO(input), parse_dates=["date"], dayfirst=False, index_col="date"
-        ).index
-    tm.assert_index_equal(expected, res7)
-
-    # D. use infer_datetime_format=True
-    with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
-        res8 = read_csv(
-            StringIO(input),
-            parse_dates=["date"],
-            infer_datetime_format=True,
-            index_col="date",
-        ).index
-    tm.assert_index_equal(expected, res8)
-
 
 @pytest.mark.parametrize(
     "date_string, dayfirst",
@@ -1973,9 +1928,11 @@ def test_dayfirst_warnings_no_leading_zero(date_string, dayfirst):
     expected = DatetimeIndex(
         ["2014-01-31"], dtype="datetime64[ns]", freq=None, name="date"
     )
-    with tm.assert_produces_warning(
-        UserWarning, match=r"may lead to inconsistently parsed dates"
-    ):
+    warning_msg = (
+        "Parsing dates in .* format when dayfirst=.* was specified. "
+        "Pass `dayfirst=.*` or specify a format to silence this warning."
+    )
+    with tm.assert_produces_warning(UserWarning, match=warning_msg):
         res = read_csv(
             StringIO(initial_value),
             parse_dates=["date"],
diff --git a/pandas/tests/io/parser/usecols/test_parse_dates.py b/pandas/tests/io/parser/usecols/test_parse_dates.py
index 50000dab8a7aa..6d40435a4107e 100644
--- a/pandas/tests/io/parser/usecols/test_parse_dates.py
+++ b/pandas/tests/io/parser/usecols/test_parse_dates.py
@@ -31,8 +31,8 @@
 def test_usecols_with_parse_dates(all_parsers, usecols):
     # see gh-9755
     data = """a,b,c,d,e
-0,1,20140101,0900,4
-0,1,20140102,1000,4"""
+0,1,2014-01-01,09:00,4
+0,1,2014-01-02,10:00,4"""
     parser = all_parsers
     parse_dates = [[1, 2]]
 
@@ -138,8 +138,8 @@ def test_usecols_with_parse_dates4(all_parsers):
 )
 def test_usecols_with_parse_dates_and_names(all_parsers, usecols, names):
     # see gh-9755
-    s = """0,1,20140101,0900,4
-0,1,20140102,1000,4"""
+    s = """0,1,2014-01-01,09:00,4
+0,1,2014-01-02,10:00,4"""
     parse_dates = [[1, 2]]
     parser = all_parsers
 
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 9adada8afb2c2..129d6f89fd019 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -1386,7 +1386,7 @@ def test_sqlalchemy_type_mapping(self):
 
         # Test Timestamp objects (no datetime64 because of timezone) (GH9085)
         df = DataFrame(
-            {"time": to_datetime(["201412120154", "201412110254"], utc=True)}
+            {"time": to_datetime(["2014-12-12 01:54", "2014-12-11 02:54"], utc=True)}
         )
         db = sql.SQLDatabase(self.conn)
         table = sql.SQLTable("test_type", db, frame=df)
@@ -1595,7 +1595,7 @@ def test_sqlite_type_mapping(self):
 
         # Test Timestamp objects (no datetime64 because of timezone) (GH9085)
         df = DataFrame(
-            {"time": to_datetime(["201412120154", "201412110254"], utc=True)}
+            {"time": to_datetime(["2014-12-12 01:54", "2014-12-11 02:54"], utc=True)}
         )
         db = sql.SQLiteDatabase(self.conn)
         table = sql.SQLiteTable("test_type", db, frame=df)
diff --git a/pandas/tests/io/xml/test_xml_dtypes.py b/pandas/tests/io/xml/test_xml_dtypes.py
index 5629830767c3c..7b2ffbc7cda5e 100644
--- a/pandas/tests/io/xml/test_xml_dtypes.py
+++ b/pandas/tests/io/xml/test_xml_dtypes.py
@@ -457,7 +457,7 @@ def test_day_first_parse_dates(parser):
     )
 
     with tm.assert_produces_warning(
-        UserWarning, match="Parsing dates in DD/MM/YYYY format"
+        UserWarning, match="Parsing dates in %d/%m/%Y format"
     ):
         df_result = read_xml(xml, parse_dates=["date"], parser=parser)
         df_iter = read_xml_iterparse(
diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py
index 9a6fed1afad1f..87d5aaf0c3205 100644
--- a/pandas/tests/plotting/test_converter.py
+++ b/pandas/tests/plotting/test_converter.py
@@ -161,8 +161,8 @@ def dtc(self):
         return converter.DatetimeConverter()
 
     def test_convert_accepts_unicode(self, dtc):
-        r1 = dtc.convert("12:22", None, None)
-        r2 = dtc.convert("12:22", None, None)
+        r1 = dtc.convert("2000-01-01 12:22", None, None)
+        r2 = dtc.convert("2000-01-01 12:22", None, None)
         assert r1 == r2, "DatetimeConverter.convert should accept unicode"
 
     def test_conversion(self, dtc):
diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py
index 28519fc9b529f..7827483644634 100644
--- a/pandas/tests/series/methods/test_to_csv.py
+++ b/pandas/tests/series/methods/test_to_csv.py
@@ -13,7 +13,7 @@
 
 class TestSeriesToCSV:
     def read_csv(self, path, **kwargs):
-        params = {"index_col": 0, "header": None, "parse_dates": True}
+        params = {"index_col": 0, "header": None}
         params.update(**kwargs)
 
         header = params.get("header")
@@ -30,7 +30,7 @@ def test_from_csv(self, datetime_series, string_series):
 
         with tm.ensure_clean() as path:
             datetime_series.to_csv(path, header=False)
-            ts = self.read_csv(path)
+            ts = self.read_csv(path, parse_dates=True)
             tm.assert_series_equal(datetime_series, ts, check_names=False)
 
             assert ts.name is None
@@ -55,7 +55,7 @@ def test_from_csv(self, datetime_series, string_series):
             with open(path, "w") as outfile:
                 outfile.write("1998-01-01|1.0\n1999-01-01|2.0")
 
-            series = self.read_csv(path, sep="|")
+            series = self.read_csv(path, sep="|", parse_dates=True)
             check_series = Series(
                 {datetime(1998, 1, 1): 1.0, datetime(1999, 1, 1): 2.0}
             )
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index f524bc18793d8..286036440073f 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -219,7 +219,6 @@ def test_to_datetime_format_YYYYMMDD_overflow(self, input_s, expected):
             ),
             (["201010", pd.NA], "%y%m%d", DatetimeIndex(["2020-10-10", "NaT"])),
             (["201010", pd.NA], "%d%m%y", DatetimeIndex(["2010-10-20", "NaT"])),
-            (["201010", pd.NA], None, DatetimeIndex(["2010-10-20", "NaT"])),
             ([None, np.nan, pd.NA], None, DatetimeIndex(["NaT", "NaT", "NaT"])),
             ([None, np.nan, pd.NA], "%Y%m%d", DatetimeIndex(["NaT", "NaT", "NaT"])),
         ],
@@ -463,14 +462,14 @@ def test_to_datetime_parse_timezone_keeps_name(self):
 class TestToDatetime:
     def test_to_datetime_mixed_datetime_and_string(self):
         # GH#47018 adapted old doctest with new behavior
-        d1 = datetime(2020, 1, 1, 17, tzinfo=timezone(-timedelta(hours=1)))
         d2 = datetime(2020, 1, 1, 18, tzinfo=timezone(-timedelta(hours=1)))
-        res = to_datetime(["2020-01-01 17:00 -0100", d2])
-        expected = to_datetime([d1, d2]).tz_convert(pytz.FixedOffset(-60))
-        tm.assert_index_equal(res, expected)
+        with pytest.raises(
+            ValueError,
+            match=r"time data '.*' does not match format '%Y-%m-%d %H:%M %z' \(match\)",
+        ):
+            to_datetime(["2020-01-01 17:00 -0100", d2])
 
-    @pytest.mark.parametrize("infer_datetime_format", [True, False])
-    def test_to_datetime_np_str(self, infer_datetime_format):
+    def test_to_datetime_np_str(self):
         # GH#32264
         # GH#48969
         value = np.str_("2019-02-04 10:18:46.297000+0000")
@@ -482,11 +481,11 @@ def test_to_datetime_np_str(self, infer_datetime_format):
         assert to_datetime(value) == exp
         assert to_datetime(ser.iloc[0]) == exp
 
-        res = to_datetime([value], infer_datetime_format=infer_datetime_format)
+        res = to_datetime([value])
         expected = Index([exp])
         tm.assert_index_equal(res, expected)
 
-        res = to_datetime(ser, infer_datetime_format=infer_datetime_format)
+        res = to_datetime(ser)
         expected = Series(expected)
         tm.assert_series_equal(res, expected)
 
@@ -927,7 +926,10 @@ def test_datetime_bool_arrays_mixed(self, cache):
         msg = f"{type(cache)} is not convertible to datetime"
         with pytest.raises(TypeError, match=msg):
             to_datetime([False, datetime.today()], cache=cache)
-        with pytest.raises(TypeError, match=msg):
+        with pytest.raises(
+            ValueError,
+            match=r"^time data 'True' does not match format '%Y%m%d' \(match\)$",
+        ):
             to_datetime(["20130101", True], cache=cache)
         tm.assert_index_equal(
             to_datetime([0, False, NaT, 0.0], errors="coerce", cache=cache),
@@ -1071,8 +1073,7 @@ def test_to_datetime_cache_scalar(self):
                 (None,)
                 + (NaT,) * start_caching_at
                 + ("2012 July 26", Timestamp("2012-07-26")),
-                (NaT,) * (start_caching_at + 1)
-                + (Timestamp("2012-07-26"), Timestamp("2012-07-26")),
+                (NaT,) * (start_caching_at + 1) + (Timestamp("2012-07-26"), NaT),
             ),
         ),
     )
@@ -1153,7 +1154,6 @@ def test_to_datetime_coerce(self):
         )
         tm.assert_index_equal(result, expected)
 
-    @pytest.mark.parametrize("infer_datetime_format", [True, False])
     @pytest.mark.parametrize(
         "errors, expected",
         [
@@ -1224,15 +1224,18 @@ def test_iso_8601_strings_with_different_offsets_utc(self):
 
     def test_iso8601_strings_mixed_offsets_with_naive(self):
         # GH 24992
-        result = to_datetime(
+        # Can't parse consistently, need to parse each element in loop.
+        result = DatetimeIndex(
             [
-                "2018-11-28T00:00:00",
-                "2018-11-28T00:00:00+12:00",
-                "2018-11-28T00:00:00",
-                "2018-11-28T00:00:00+06:00",
-                "2018-11-28T00:00:00",
-            ],
-            utc=True,
+                to_datetime(string, utc=True)
+                for string in [
+                    "2018-11-28T00:00:00",
+                    "2018-11-28T00:00:00+12:00",
+                    "2018-11-28T00:00:00",
+                    "2018-11-28T00:00:00+06:00",
+                    "2018-11-28T00:00:00",
+                ]
+            ]
         )
         expected = to_datetime(
             [
@@ -1248,9 +1251,10 @@ def test_iso8601_strings_mixed_offsets_with_naive(self):
 
     def test_iso8601_strings_mixed_offsets_with_naive_reversed(self):
         items = ["2018-11-28T00:00:00+12:00", "2018-11-28T00:00:00"]
-        result = to_datetime(items, utc=True)
-        expected = to_datetime(list(reversed(items)), utc=True)[::-1]
-        tm.assert_index_equal(result, expected)
+        # Can't parse consistently, need to parse each element in loop.
+        result = [to_datetime(item, utc=True) for item in items]
+        expected = [to_datetime(item, utc=True) for item in list(reversed(items))][::-1]
+        assert result == expected
 
     def test_mixed_offsets_with_native_datetime_raises(self):
         # GH 25978
@@ -1778,7 +1782,7 @@ def test_to_datetime_on_datetime64_series(self, cache):
     def test_to_datetime_with_space_in_series(self, cache):
         # GH 6428
         ser = Series(["10/18/2006", "10/18/2008", " "])
-        msg = r"(\(')?String does not contain a date(:', ' '\))?"
+        msg = r"^time data ' ' does not match format '%m/%d/%Y' \(match\)$"
         with pytest.raises(ValueError, match=msg):
             to_datetime(ser, errors="raise", cache=cache)
         result_coerce = to_datetime(ser, errors="coerce", cache=cache)
@@ -1838,7 +1842,7 @@ def test_to_datetime_strings(self, cache):
 
     def test_to_datetime_strings_variation(self, cache):
         array = ["2012", "20120101", "20120101 12:01:01"]
-        expected = list(to_datetime(array, cache=cache))
+        expected = [to_datetime(dt_str, cache=cache) for dt_str in array]
         result = [Timestamp(date_str) for date_str in array]
         tm.assert_almost_equal(result, expected)
 
@@ -1908,7 +1912,10 @@ def test_string_na_nat_conversion(self, cache):
         result = tslib.array_to_datetime(strings)[0]
         tm.assert_almost_equal(result, expected)
 
-        result2 = to_datetime(strings, cache=cache)
+        # Can't parse in consistent format, so need to convert each individually.
+        result2 = DatetimeIndex(
+            [to_datetime(string, cache=cache) for string in strings]
+        )
         assert isinstance(result2, DatetimeIndex)
         tm.assert_numpy_array_equal(result, result2.values)
 
@@ -2011,80 +2018,39 @@ def test_dayfirst(self, cache):
 
     def test_dayfirst_warnings_valid_input(self):
         # GH 12585
-        warning_msg_day_first = (
-            r"Parsing dates in DD/MM/YYYY format when dayfirst=False \(the default\) "
-            "was specified. This may lead to inconsistently parsed dates! Specify a "
-            "format to ensure consistent parsing."
+        warning_msg = (
+            "Parsing dates in .* format when dayfirst=.* was specified. "
+            "Pass `dayfirst=.*` or specify a format to silence this warning."
         )
 
         # CASE 1: valid input
         arr = ["31/12/2014", "10/03/2011"]
-        expected_consistent = DatetimeIndex(
+        expected = DatetimeIndex(
             ["2014-12-31", "2011-03-10"], dtype="datetime64[ns]", freq=None
         )
-        expected_inconsistent = DatetimeIndex(
-            ["2014-12-31", "2011-10-03"], dtype="datetime64[ns]", freq=None
-        )
 
         # A. dayfirst arg correct, no warning
         res1 = to_datetime(arr, dayfirst=True)
-        tm.assert_index_equal(expected_consistent, res1)
+        tm.assert_index_equal(expected, res1)
 
-        # B. dayfirst arg incorrect, warning + incorrect output
-        with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
+        # B. dayfirst arg incorrect, warning
+        with tm.assert_produces_warning(UserWarning, match=warning_msg):
             res2 = to_datetime(arr, dayfirst=False)
-        tm.assert_index_equal(expected_inconsistent, res2)
-
-        # C. dayfirst default arg, same as B
-        with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
-            res3 = to_datetime(arr, dayfirst=False)
-        tm.assert_index_equal(expected_inconsistent, res3)
-
-        # D. infer_datetime_format=True overrides dayfirst default
-        # no warning + correct result
-        res4 = to_datetime(arr, infer_datetime_format=True)
-        tm.assert_index_equal(expected_consistent, res4)
+        tm.assert_index_equal(expected, res2)
 
     def test_dayfirst_warnings_invalid_input(self):
         # CASE 2: invalid input
         # cannot consistently process with single format
-        # warnings *always* raised
-        warning_msg_day_first = (
-            r"Parsing dates in DD/MM/YYYY format when dayfirst=False \(the default\) "
-            "was specified. This may lead to inconsistently parsed dates! Specify a "
-            "format to ensure consistent parsing."
-        )
-        warning_msg_month_first = (
-            r"Parsing dates in MM/DD/YYYY format when dayfirst=True "
-            "was specified. This may lead to inconsistently parsed dates! Specify a "
-            "format to ensure consistent parsing."
-        )
+        # ValueError *always* raised
 
-        arr = ["31/12/2014", "03/30/2011"]
         # first in DD/MM/YYYY, second in MM/DD/YYYY
-        expected = DatetimeIndex(
-            ["2014-12-31", "2011-03-30"], dtype="datetime64[ns]", freq=None
-        )
-
-        # A. use dayfirst=True
-        with tm.assert_produces_warning(UserWarning, match=warning_msg_month_first):
-            res5 = to_datetime(arr, dayfirst=True)
-        tm.assert_index_equal(expected, res5)
-
-        # B. use dayfirst=False
-        with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
-            res6 = to_datetime(arr, dayfirst=False)
-        tm.assert_index_equal(expected, res6)
-
-        # C. use dayfirst default arg, same as B
-        with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
-            res7 = to_datetime(arr, dayfirst=False)
-        tm.assert_index_equal(expected, res7)
+        arr = ["31/12/2014", "03/30/2011"]
 
-        # D. use infer_datetime_format=True
-        with tm.assert_produces_warning(UserWarning, match=warning_msg_day_first):
-            res8 = to_datetime(arr, infer_datetime_format=True)
-        tm.assert_index_equal(expected, res8)
+        with pytest.raises(
+            ValueError,
+            match=r"time data '03/30/2011' does not match format '%d/%m/%Y' \(match\)$",
+        ):
+            to_datetime(arr, dayfirst=True)
 
     @pytest.mark.parametrize("klass", [DatetimeIndex, DatetimeArray])
     def test_to_datetime_dta_tz(self, klass):
@@ -2139,12 +2105,8 @@ def test_to_datetime_infer_datetime_format_consistent_format(
         s_as_dt_strings = ser.apply(lambda x: x.strftime(test_format))
 
         with_format = to_datetime(s_as_dt_strings, format=test_format, cache=cache)
-        no_infer = to_datetime(
-            s_as_dt_strings, infer_datetime_format=False, cache=cache
-        )
-        yes_infer = to_datetime(
-            s_as_dt_strings, infer_datetime_format=True, cache=cache
-        )
+        no_infer = to_datetime(s_as_dt_strings, cache=cache)
+        yes_infer = to_datetime(s_as_dt_strings, cache=cache)
 
         # Whether the format is explicitly passed, it is inferred, or
         # it is not inferred, the results should all be the same
@@ -2223,7 +2185,7 @@ def test_infer_datetime_format_tz_name(self, tz_name, offset):
     def test_infer_datetime_format_zero_tz(self, ts, zero_tz):
         # GH 41047
         ser = Series([ts + zero_tz])
-        result = to_datetime(ser, infer_datetime_format=True)
+        result = to_datetime(ser)
         tz = pytz.utc if zero_tz == "Z" else None
         expected = Series([Timestamp(ts, tz=tz)])
         tm.assert_series_equal(result, expected)
@@ -2782,9 +2744,9 @@ def test_empty_string_datetime_coerce_format():
     with pytest.raises(ValueError, match="does not match format"):
         to_datetime(td, format=format, errors="raise")
 
-    # don't raise an exception in case no format is given
-    result = to_datetime(td, errors="raise")
-    tm.assert_series_equal(result, expected)
+    # still raise an exception in case no format is given
+    with pytest.raises(ValueError, match="does not match format"):
+        to_datetime(td, errors="raise")
 
 
 def test_empty_string_datetime_coerce__unit():
diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py
index a4e12315d34e0..49d83a8fa5c56 100644
--- a/pandas/tests/tslibs/test_parsing.py
+++ b/pandas/tests/tslibs/test_parsing.py
@@ -235,19 +235,19 @@ def test_guess_datetime_format_wrong_type_inputs(invalid_type_dt):
 
 
 @pytest.mark.parametrize(
-    "string,fmt",
+    "string,fmt,dayfirst",
     [
-        ("2011-1-1", "%Y-%m-%d"),
-        ("1/1/2011", "%m/%d/%Y"),
-        ("30-1-2011", "%d-%m-%Y"),
-        ("2011-1-1 0:0:0", "%Y-%m-%d %H:%M:%S"),
-        ("2011-1-3T00:00:0", "%Y-%m-%dT%H:%M:%S"),
-        ("2011-1-1 00:00:00", "%Y-%m-%d %H:%M:%S"),
+        ("2011-1-1", "%Y-%m-%d", False),
+        ("1/1/2011", "%m/%d/%Y", False),
+        ("30-1-2011", "%d-%m-%Y", True),
+        ("2011-1-1 0:0:0", "%Y-%m-%d %H:%M:%S", False),
+        ("2011-1-3T00:00:0", "%Y-%m-%dT%H:%M:%S", False),
+        ("2011-1-1 00:00:00", "%Y-%m-%d %H:%M:%S", False),
     ],
 )
-def test_guess_datetime_format_no_padding(string, fmt):
+def test_guess_datetime_format_no_padding(string, fmt, dayfirst):
     # see gh-11142
-    result = parsing.guess_datetime_format(string)
+    result = parsing.guess_datetime_format(string, dayfirst=dayfirst)
     assert result == fmt
 
 

From 632ea9d73fb67c27d98d97c080f7c5e6c35c9919 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 18 Oct 2022 09:16:00 +0100
Subject: [PATCH 04/12] :rotating_light: add warning if format cant be guessed

---
 pandas/core/tools/datetimes.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 5760952ba7324..09729c2aab22c 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -132,7 +132,16 @@ def _guess_datetime_format_for_array(arr, dayfirst: bool | None = False) -> str
     if (first_non_null := tslib.first_non_null(arr)) != -1:
         if type(first_non_nan_element := arr[first_non_null]) is str:
             # GH#32264 np.str_ object
-            return guess_datetime_format(first_non_nan_element, dayfirst=dayfirst)
+            guessed_format = guess_datetime_format(
+                first_non_nan_element, dayfirst=dayfirst
+            )
+            if guessed_format is not None:
+                return guessed_format
+            warnings.warn(
+                "Could not infer format - "
+                "to ensure consistent parsing, specify a format.",
+                stacklevel=find_stack_level(),
+            )
     return None
 
 

From dadb44b9e47e64db09b443d5388550cfddfc981a Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 18 Oct 2022 09:22:00 +0100
Subject: [PATCH 05/12] :goal_net: catch warnings

---
 pandas/core/tools/datetimes.py                |   2 +-
 pandas/tests/frame/methods/test_to_csv.py     |   5 +-
 pandas/tests/groupby/test_function.py         |   3 +-
 .../tests/groupby/transform/test_transform.py |   3 +-
 pandas/tests/io/excel/test_readers.py         |  13 +-
 pandas/tests/io/parser/test_parse_dates.py    |  98 ++++-
 .../io/parser/usecols/test_parse_dates.py     |   8 +-
 pandas/tests/test_algos.py                    |   3 +-
 pandas/tests/tools/test_to_datetime.py        | 391 +++++++++---------
 9 files changed, 309 insertions(+), 217 deletions(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 09729c2aab22c..41feb153978d4 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -1002,7 +1002,7 @@ def to_datetime(
       are constant:
 
     >>> from datetime import datetime
-    >>> pd.to_datetime(["2020-01-01 01:00 -01:00", datetime(2020, 1, 1, 3, 0)])
+    >>> pd.to_datetime(["2020-01-01 01:00:00-01:00", datetime(2020, 1, 1, 3, 0)])
     DatetimeIndex(['2020-01-01 01:00:00-01:00', '2020-01-01 02:00:00-01:00'],
                   dtype='datetime64[ns, pytz.FixedOffset(-60)]', freq=None)
 
diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
index 3b4dec8bff7f1..3985bd40daac5 100644
--- a/pandas/tests/frame/methods/test_to_csv.py
+++ b/pandas/tests/frame/methods/test_to_csv.py
@@ -514,7 +514,10 @@ def test_to_csv_multiindex(self, float_frame, datetime_frame):
             tsframe.index = MultiIndex.from_arrays(new_index)
 
             tsframe.to_csv(path, index_label=["time", "foo"])
-            recons = self.read_csv(path, index_col=[0, 1])
+            with tm.assert_produces_warning(
+                UserWarning, match="Could not infer format"
+            ):
+                recons = self.read_csv(path, index_col=[0, 1], parse_dates=True)
 
             # TODO to_csv drops column name
             tm.assert_frame_equal(tsframe, recons, check_names=False)
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index cdbb121819c5e..ed63d41a74ae6 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -717,7 +717,8 @@ def test_max_nan_bug():
 -05-06,2013-05-06 00:00:00,,log.log
 -05-07,2013-05-07 00:00:00,OE,xlsx"""
 
-    df = pd.read_csv(StringIO(raw), parse_dates=[0])
+    with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
+        df = pd.read_csv(StringIO(raw), parse_dates=[0])
     gb = df.groupby("Date")
     r = gb[["File"]].max()
     e = gb["File"].max().to_frame()
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index 8a2bd64a3deb0..d52de4d0658ef 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -1070,7 +1070,8 @@ def demean_rename(x):
 @pytest.mark.parametrize("func", [min, max, np.min, np.max, "first", "last"])
 def test_groupby_transform_timezone_column(func):
     # GH 24198
-    ts = pd.to_datetime("now", utc=True).tz_convert("Asia/Singapore")
+    with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
+        ts = pd.to_datetime("now", utc=True).tz_convert("Asia/Singapore")
     result = DataFrame({"end_time": [ts], "id": [1]})
     result["max_end_time"] = result.groupby("id").end_time.transform(func)
     expected = DataFrame([[ts, 1, ts]], columns=["end_time", "id", "max_end_time"])
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index fa1d6bbfd5a7e..8f937ad6b401a 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -888,11 +888,18 @@ def test_reader_seconds(self, request, engine, read_ext):
                 ]
             }
         )
-
-        actual = pd.read_excel("times_1900" + read_ext, sheet_name="Sheet1")
+        if engine == "odf":
+            # odf recognises cell type as time (from its attribute)
+            # so tries to parse it.
+            warning = UserWarning
+        else:
+            warning = None
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            actual = pd.read_excel("times_1900" + read_ext, sheet_name="Sheet1")
         tm.assert_frame_equal(actual, expected)
 
-        actual = pd.read_excel("times_1904" + read_ext, sheet_name="Sheet1")
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            actual = pd.read_excel("times_1904" + read_ext, sheet_name="Sheet1")
         tm.assert_frame_equal(actual, expected)
 
     def test_read_excel_multiindex(self, request, read_ext):
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
index b8d515a67b7fe..c3feb03936686 100644
--- a/pandas/tests/io/parser/test_parse_dates.py
+++ b/pandas/tests/io/parser/test_parse_dates.py
@@ -826,7 +826,13 @@ def test_yy_format_with_year_first(all_parsers, parse_dates):
 090331,0830,5,6
 """
     parser = all_parsers
-    result = parser.read_csv(StringIO(data), index_col=0, parse_dates=parse_dates)
+    result = parser.read_csv_check_warnings(
+        UserWarning,
+        "Could not infer format",
+        StringIO(data),
+        index_col=0,
+        parse_dates=parse_dates,
+    )
     index = DatetimeIndex(
         [
             datetime(2009, 1, 31, 0, 10, 0),
@@ -899,7 +905,13 @@ def test_multi_index_parse_dates(all_parsers, index_col):
         columns=["A", "B", "C"],
         index=index,
     )
-    result = parser.read_csv(StringIO(data), index_col=index_col, parse_dates=True)
+    result = parser.read_csv_check_warnings(
+        UserWarning,
+        "Could not infer format",
+        StringIO(data),
+        index_col=index_col,
+        parse_dates=True,
+    )
     tm.assert_frame_equal(result, expected)
 
 
@@ -1232,19 +1244,55 @@ def test_read_with_parse_dates_invalid_type(all_parsers, parse_dates):
 
 
 @pytest.mark.parametrize("cache_dates", [True, False])
-@pytest.mark.parametrize("value", ["nan", "0", ""])
+@pytest.mark.parametrize("value", ["nan", ""])
 def test_bad_date_parse(all_parsers, cache_dates, value):
     # if we have an invalid date make sure that we handle this with
     # and w/o the cache properly
     parser = all_parsers
     s = StringIO((f"{value},\n") * 50000)
 
-    parser.read_csv(
+    if parser.engine == "pyarrow":
+        # None in input gets converted to 'None', for which
+        # pandas tries to guess the datetime format, triggering
+        # the warning. TODO: parse dates directly in pyarrow, see
+        # https://github.com/pandas-dev/pandas/issues/48017
+        warn = UserWarning
+    else:
+        warn = None
+    parser.read_csv_check_warnings(
+        warn,
+        "Could not infer format",
+        s,
+        header=None,
+        names=["foo", "bar"],
+        parse_dates=["foo"],
+        cache_dates=cache_dates,
+    )
+
+
+@pytest.mark.parametrize("cache_dates", [True, False])
+@pytest.mark.parametrize("value", ["0"])
+def test_bad_date_parse_with_warning(all_parsers, cache_dates, value):
+    # if we have an invalid date make sure that we handle this with
+    # and w/o the cache properly.
+    parser = all_parsers
+    s = StringIO((f"{value},\n") * 50000)
+
+    if parser.engine == "pyarrow":
+        # pyarrow reads "0" as 0 (of type int64), and so
+        # pandas doesn't try to guess the datetime format
+        # TODO: parse dates directly in pyarrow, see
+        # https://github.com/pandas-dev/pandas/issues/48017
+        warn = None
+    else:
+        warn = UserWarning
+    parser.read_csv_check_warnings(
+        warn,
+        "Could not infer format",
         s,
         header=None,
         names=["foo", "bar"],
         parse_dates=["foo"],
-        infer_datetime_format=False,
         cache_dates=cache_dates,
     )
 
@@ -1262,6 +1310,19 @@ def test_parse_dates_empty_string(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+def test_parse_dates_infer_datetime_format_warning(all_parsers):
+    # GH 49024
+    parser = all_parsers
+    data = "Date,test\n2012-01-01,1\n,2"
+    parser.read_csv_check_warnings(
+        UserWarning,
+        "The argument 'infer_datetime_format' is deprecated",
+        StringIO(data),
+        parse_dates=["Date"],
+        infer_datetime_format=True,
+    )
+
+
 @xfail_pyarrow
 @pytest.mark.parametrize(
     "data,kwargs,expected",
@@ -1635,7 +1696,13 @@ def test_parse_timezone(all_parsers):
 def test_invalid_parse_delimited_date(all_parsers, date_string):
     parser = all_parsers
     expected = DataFrame({0: [date_string]}, dtype="object")
-    result = parser.read_csv(StringIO(date_string), header=None, parse_dates=[0])
+    result = parser.read_csv_check_warnings(
+        UserWarning,
+        "Could not infer format",
+        StringIO(date_string),
+        header=None,
+        parse_dates=[0],
+    )
     tm.assert_frame_equal(result, expected)
 
 
@@ -1786,7 +1853,13 @@ def test_date_parser_and_names(all_parsers):
     # GH#33699
     parser = all_parsers
     data = StringIO("""x,y\n1,2""")
-    result = parser.read_csv(data, parse_dates=["B"], names=["B"])
+    result = parser.read_csv_check_warnings(
+        UserWarning,
+        "Could not infer format",
+        data,
+        parse_dates=["B"],
+        names=["B"],
+    )
     expected = DataFrame({"B": ["y", "2"]}, index=["x", "1"])
     tm.assert_frame_equal(result, expected)
 
@@ -1833,7 +1906,9 @@ def test_date_parser_usecols_thousands(all_parsers):
     """
 
     parser = all_parsers
-    result = parser.read_csv(
+    result = parser.read_csv_check_warnings(
+        UserWarning,
+        "Could not infer format",
         StringIO(data),
         parse_dates=[1],
         usecols=[1, 2],
@@ -1947,7 +2022,12 @@ def test_infer_first_column_as_index(all_parsers):
     # GH#11019
     parser = all_parsers
     data = "a,b,c\n1970-01-01,2,3,4"
-    result = parser.read_csv(StringIO(data), parse_dates=["a"])
+    result = parser.read_csv_check_warnings(
+        UserWarning,
+        "Could not infer format",
+        StringIO(data),
+        parse_dates=["a"],
+    )
     expected = DataFrame({"a": "2", "b": 3, "c": 4}, index=["1970-01-01"])
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/io/parser/usecols/test_parse_dates.py b/pandas/tests/io/parser/usecols/test_parse_dates.py
index 6d40435a4107e..4823df1da9959 100644
--- a/pandas/tests/io/parser/usecols/test_parse_dates.py
+++ b/pandas/tests/io/parser/usecols/test_parse_dates.py
@@ -124,7 +124,13 @@ def test_usecols_with_parse_dates4(all_parsers):
     }
     expected = DataFrame(cols, columns=["a_b"] + list("cdefghij"))
 
-    result = parser.read_csv(StringIO(data), usecols=usecols, parse_dates=parse_dates)
+    result = parser.read_csv_check_warnings(
+        UserWarning,
+        "Could not infer format",
+        StringIO(data),
+        usecols=usecols,
+        parse_dates=parse_dates,
+    )
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 80271c13cd35d..b3f0f40be2d78 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -1212,7 +1212,8 @@ def test_value_counts_datetime_outofbounds(self):
         tm.assert_series_equal(res, exp)
 
         # GH 12424
-        res = to_datetime(Series(["2362-01-01", np.nan]), errors="ignore")
+        with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
+            res = to_datetime(Series(["2362-01-01", np.nan]), errors="ignore")
         exp = Series(["2362-01-01", np.nan], dtype=object)
         tm.assert_series_equal(res, exp)
 
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index 286036440073f..a2871e79dc7d9 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -228,6 +228,13 @@ def test_to_datetime_with_NA(self, data, format, expected):
         result = to_datetime(data, format=format)
         tm.assert_index_equal(result, expected)
 
+    def test_to_datetime_with_NA_with_warning(self):
+        # GH#42957
+        with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
+            result = to_datetime(["201010", pd.NA])
+        expected = DatetimeIndex(["2010-10-20", "NaT"])
+        tm.assert_index_equal(result, expected)
+
     def test_to_datetime_format_integer(self, cache):
         # GH 10178
         ser = Series([2000, 2001, 2002])
@@ -345,7 +352,6 @@ def test_to_datetime_with_non_exact(self, cache):
         ],
     )
     def test_parse_nanoseconds_with_formula(self, cache, arg):
-
         # GH8989
         # truncating the nanoseconds when a format was provided
         expected = to_datetime(arg, cache=cache)
@@ -619,15 +625,16 @@ def test_to_datetime_YYYYMMDD(self):
     def test_to_datetime_unparsable_ignore(self):
         # unparsable
         ser = "Month 1, 1999"
-        assert to_datetime(ser, errors="ignore") == ser
+        with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
+            assert to_datetime(ser, errors="ignore") == ser
 
     @td.skip_if_windows  # `tm.set_timezone` does not work in windows
     def test_to_datetime_now(self):
         # See GH#18666
         with tm.set_timezone("US/Eastern"):
-            msg = "The parsing of 'now' in pd.to_datetime"
+            msg = "The parsing of 'now' in pd.to_datetime|Could not infer format"
             with tm.assert_produces_warning(
-                FutureWarning, match=msg, check_stacklevel=False
+                (FutureWarning, UserWarning), match=msg, check_stacklevel=False
             ):
                 # checking stacklevel is tricky because we go through cython code
                 # GH#18705
@@ -654,8 +661,11 @@ def test_to_datetime_today(self, tz):
         # so this test will not detect the regression introduced in #18666.
         with tm.set_timezone(tz):
             nptoday = np.datetime64("today").astype("datetime64[ns]").astype(np.int64)
-            pdtoday = to_datetime("today")
-            pdtoday2 = to_datetime(["today"])[0]
+            with tm.assert_produces_warning(
+                UserWarning, match="Could not infer format"
+            ):
+                pdtoday = to_datetime("today")
+                pdtoday2 = to_datetime(["today"])[0]
 
             tstoday = Timestamp("today")
             tstoday2 = Timestamp.today()
@@ -672,8 +682,8 @@ def test_to_datetime_today(self, tz):
 
     @pytest.mark.parametrize("arg", ["now", "today"])
     def test_to_datetime_today_now_unicode_bytes(self, arg):
-        warn = FutureWarning if arg == "now" else None
-        msg = "The parsing of 'now' in pd.to_datetime"
+        warn = (FutureWarning, UserWarning) if arg == "now" else UserWarning
+        msg = "The parsing of 'now' in pd.to_datetime|Could not infer format"
         with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
             # checking stacklevel is tricky because we go through cython code
             # GH#18705
@@ -946,18 +956,17 @@ def test_datetime_invalid_datatype(self, arg):
             to_datetime(arg)
 
     @pytest.mark.parametrize("value", ["a", "00:01:99"])
-    @pytest.mark.parametrize("infer", [True, False])
-    @pytest.mark.parametrize("format", [None, "H%:M%:S%"])
-    def test_datetime_invalid_scalar(self, value, format, infer):
+    @pytest.mark.parametrize(
+        "format,warning", [(None, UserWarning), ("H%:M%:S%", None)]
+    )
+    def test_datetime_invalid_scalar(self, value, format, warning):
         # GH24763
-        res = to_datetime(
-            value, errors="ignore", format=format, infer_datetime_format=infer
-        )
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            res = to_datetime(value, errors="ignore", format=format)
         assert res == value
 
-        res = to_datetime(
-            value, errors="coerce", format=format, infer_datetime_format=infer
-        )
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            res = to_datetime(value, errors="coerce", format=format)
         assert res is NaT
 
         msg = (
@@ -966,51 +975,46 @@ def test_datetime_invalid_scalar(self, value, format, infer):
             f"Given date string {value} not likely a datetime"
         )
         with pytest.raises(ValueError, match=msg):
-            to_datetime(
-                value, errors="raise", format=format, infer_datetime_format=infer
-            )
+            with tm.assert_produces_warning(warning, match="Could not infer format"):
+                to_datetime(value, errors="raise", format=format)
 
     @pytest.mark.parametrize("value", ["3000/12/11 00:00:00"])
-    @pytest.mark.parametrize("infer", [True, False])
-    @pytest.mark.parametrize("format", [None, "H%:M%:S%"])
-    def test_datetime_outofbounds_scalar(self, value, format, infer):
+    @pytest.mark.parametrize(
+        "format,warning", [(None, UserWarning), ("H%:M%:S%", None)]
+    )
+    def test_datetime_outofbounds_scalar(self, value, format, warning):
         # GH24763
-        res = to_datetime(
-            value, errors="ignore", format=format, infer_datetime_format=infer
-        )
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            res = to_datetime(value, errors="ignore", format=format)
         assert res == value
 
-        res = to_datetime(
-            value, errors="coerce", format=format, infer_datetime_format=infer
-        )
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            res = to_datetime(value, errors="coerce", format=format)
         assert res is NaT
 
         if format is not None:
             msg = "is a bad directive in format|Out of bounds .* present at position 0"
             with pytest.raises(ValueError, match=msg):
-                to_datetime(
-                    value, errors="raise", format=format, infer_datetime_format=infer
-                )
+                to_datetime(value, errors="raise", format=format)
         else:
             msg = "Out of bounds .* present at position 0"
-            with pytest.raises(OutOfBoundsDatetime, match=msg):
-                to_datetime(
-                    value, errors="raise", format=format, infer_datetime_format=infer
-                )
+            with pytest.raises(
+                OutOfBoundsDatetime, match=msg
+            ), tm.assert_produces_warning(warning, match="Could not infer format"):
+                to_datetime(value, errors="raise", format=format)
 
     @pytest.mark.parametrize("values", [["a"], ["00:01:99"], ["a", "b", "99:00:00"]])
-    @pytest.mark.parametrize("infer", [True, False])
-    @pytest.mark.parametrize("format", [None, "H%:M%:S%"])
-    def test_datetime_invalid_index(self, values, format, infer):
+    @pytest.mark.parametrize(
+        "format,warning", [(None, UserWarning), ("H%:M%:S%", None)]
+    )
+    def test_datetime_invalid_index(self, values, format, warning):
         # GH24763
-        res = to_datetime(
-            values, errors="ignore", format=format, infer_datetime_format=infer
-        )
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            res = to_datetime(values, errors="ignore", format=format)
         tm.assert_index_equal(res, Index(values))
 
-        res = to_datetime(
-            values, errors="coerce", format=format, infer_datetime_format=infer
-        )
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            res = to_datetime(values, errors="coerce", format=format)
         tm.assert_index_equal(res, DatetimeIndex([NaT] * len(values)))
 
         msg = (
@@ -1019,9 +1023,8 @@ def test_datetime_invalid_index(self, values, format, infer):
             "second must be in 0..59"
         )
         with pytest.raises(ValueError, match=msg):
-            to_datetime(
-                values, errors="raise", format=format, infer_datetime_format=infer
-            )
+            with tm.assert_produces_warning(warning, match="Could not infer format"):
+                to_datetime(values, errors="raise", format=format)
 
     @pytest.mark.parametrize("utc", [True, None])
     @pytest.mark.parametrize("format", ["%Y%m%d %H:%M:%S", None])
@@ -1161,28 +1164,28 @@ def test_to_datetime_coerce(self):
             ("ignore", Index(["200622-12-31", "111111-24-11"])),
         ],
     )
-    def test_to_datetime_malformed_no_raise(
-        self, errors, expected, infer_datetime_format
-    ):
+    def test_to_datetime_malformed_no_raise(self, errors, expected):
         # GH 28299
         # GH 48633
         ts_strings = ["200622-12-31", "111111-24-11"]
-        result = to_datetime(
-            ts_strings, errors=errors, infer_datetime_format=infer_datetime_format
-        )
+        with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
+            result = to_datetime(ts_strings, errors=errors)
         tm.assert_index_equal(result, expected)
 
-    @pytest.mark.parametrize("infer_datetime_format", [True, False])
-    def test_to_datetime_malformed_raise(self, infer_datetime_format):
+    def test_to_datetime_malformed_raise(self):
         # GH 48633
         ts_strings = ["200622-12-31", "111111-24-11"]
         with pytest.raises(
             ValueError,
             match=r"^hour must be in 0\.\.23: 111111-24-11 present at position 1$",
         ):
-            to_datetime(
-                ts_strings, errors="raise", infer_datetime_format=infer_datetime_format
-            )
+            with tm.assert_produces_warning(
+                UserWarning, match="Could not infer format"
+            ):
+                to_datetime(
+                    ts_strings,
+                    errors="raise",
+                )
 
     def test_iso_8601_strings_with_same_offset(self):
         # GH 17697, 11736
@@ -1283,7 +1286,10 @@ def test_mixed_offsets_with_native_datetime_raises(self):
         tm.assert_series_equal(mixed, expected)
 
         with pytest.raises(ValueError, match="Tz-aware datetime.datetime"):
-            to_datetime(mixed)
+            with tm.assert_produces_warning(
+                UserWarning, match="Could not infer format"
+            ):
+                to_datetime(mixed)
 
     def test_non_iso_strings_with_tz_offset(self):
         result = to_datetime(["March 1, 2018 12:00:00+0400"] * 2)
@@ -1409,23 +1415,26 @@ def test_unit_with_numeric(self, cache, errors, dtype):
         tm.assert_index_equal(result, expected)
 
     @pytest.mark.parametrize(
-        "exp, arr",
+        "exp, arr, warning",
         [
             [
                 ["NaT", "2015-06-19 05:33:20", "2015-05-27 22:33:20"],
                 ["foo", 1.434692e18, 1.432766e18],
+                UserWarning,
             ],
             [
                 ["2015-06-19 05:33:20", "2015-05-27 22:33:20", "NaT", "NaT"],
                 [1.434692e18, 1.432766e18, "foo", "NaT"],
+                None,
             ],
         ],
     )
-    def test_unit_with_numeric_coerce(self, cache, exp, arr):
+    def test_unit_with_numeric_coerce(self, cache, exp, arr, warning):
         # but we want to make sure that we are coercing
         # if we have ints/strings
         expected = DatetimeIndex(exp)
-        result = to_datetime(arr, errors="coerce", cache=cache)
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            result = to_datetime(arr, errors="coerce", cache=cache)
         tm.assert_index_equal(result, expected)
 
     @pytest.mark.parametrize(
@@ -1741,7 +1750,10 @@ def test_to_datetime_barely_out_of_bounds(self):
 
         msg = "Out of bounds .* present at position 0"
         with pytest.raises(OutOfBoundsDatetime, match=msg):
-            to_datetime(arr)
+            with tm.assert_produces_warning(
+                UserWarning, match="Could not infer format"
+            ):
+                to_datetime(arr)
 
     @pytest.mark.parametrize(
         "arg, exp_str",
@@ -1925,15 +1937,22 @@ def test_string_na_nat_conversion_malformed(self, cache):
         # GH 10636, default is now 'raise'
         msg = r"Unknown string format:|day is out of range for month"
         with pytest.raises(ValueError, match=msg):
-            to_datetime(malformed, errors="raise", cache=cache)
+            with tm.assert_produces_warning(
+                UserWarning, match="Could not infer format"
+            ):
+                to_datetime(malformed, errors="raise", cache=cache)
 
-        result = to_datetime(malformed, errors="ignore", cache=cache)
+        with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
+            result = to_datetime(malformed, errors="ignore", cache=cache)
         # GH 21864
         expected = Index(malformed)
         tm.assert_index_equal(result, expected)
 
         with pytest.raises(ValueError, match=msg):
-            to_datetime(malformed, errors="raise", cache=cache)
+            with tm.assert_produces_warning(
+                UserWarning, match="Could not infer format"
+            ):
+                to_datetime(malformed, errors="raise", cache=cache)
 
     def test_string_na_nat_conversion_with_name(self, cache):
         idx = ["a", "b", "c", "d", "e"]
@@ -2114,60 +2133,14 @@ def test_to_datetime_infer_datetime_format_consistent_format(
         tm.assert_series_equal(no_infer, yes_infer)
 
     @pytest.mark.parametrize(
-        "data",
-        [
-            ["01/01/2011 00:00:00", "01-02-2011 00:00:00", "2011-01-03T00:00:00"],
-            ["Jan/01/2011", "Feb/01/2011", "Mar/01/2011"],
-        ],
+        "tz_name, offset, warning",
+        [("UTC", 0, None), ("UTC-3", 180, UserWarning), ("UTC+3", -180, UserWarning)],
     )
-    def test_to_datetime_infer_datetime_format_inconsistent_format(self, cache, data):
-        ser = Series(np.array(data))
-
-        # When the format is inconsistent, infer_datetime_format should just
-        # fallback to the default parsing
-        tm.assert_series_equal(
-            to_datetime(ser, infer_datetime_format=False, cache=cache),
-            to_datetime(ser, infer_datetime_format=True, cache=cache),
-        )
-
-    def test_to_datetime_infer_datetime_format_series_with_nans(self, cache):
-        ser = Series(
-            np.array(
-                ["01/01/2011 00:00:00", np.nan, "01/03/2011 00:00:00", np.nan],
-                dtype=object,
-            )
-        )
-        tm.assert_series_equal(
-            to_datetime(ser, infer_datetime_format=False, cache=cache),
-            to_datetime(ser, infer_datetime_format=True, cache=cache),
-        )
-
-    def test_to_datetime_infer_datetime_format_series_start_with_nans(self, cache):
-        ser = Series(
-            np.array(
-                [
-                    np.nan,
-                    np.nan,
-                    "01/01/2011 00:00:00",
-                    "01/02/2011 00:00:00",
-                    "01/03/2011 00:00:00",
-                ],
-                dtype=object,
-            )
-        )
-
-        tm.assert_series_equal(
-            to_datetime(ser, infer_datetime_format=False, cache=cache),
-            to_datetime(ser, infer_datetime_format=True, cache=cache),
-        )
-
-    @pytest.mark.parametrize(
-        "tz_name, offset", [("UTC", 0), ("UTC-3", 180), ("UTC+3", -180)]
-    )
-    def test_infer_datetime_format_tz_name(self, tz_name, offset):
+    def test_infer_datetime_format_tz_name(self, tz_name, offset, warning):
         # GH 33133
         ser = Series([f"2019-02-02 08:07:13 {tz_name}"])
-        result = to_datetime(ser, infer_datetime_format=True)
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            result = to_datetime(ser)
         expected = Series(
             [Timestamp("2019-02-02 08:07:13").tz_localize(pytz.FixedOffset(offset))]
         )
@@ -2203,26 +2176,38 @@ def test_to_datetime_iso8601_noleading_0s(self, cache, format):
         )
         tm.assert_series_equal(to_datetime(ser, format=format, cache=cache), expected)
 
+    def test_parse_dates_infer_datetime_format_warning(self):
+        # GH 49024
+        with tm.assert_produces_warning(
+            UserWarning,
+            match="The argument 'infer_datetime_format' is deprecated",
+        ):
+            to_datetime(["10-10-2000"], infer_datetime_format=True)
+
 
 class TestDaysInMonth:
     # tests for issue #10154
 
     @pytest.mark.parametrize(
-        "arg, format",
+        "arg, format, warning",
         [
-            ["2015-02-29", None],
-            ["2015-02-29", "%Y-%m-%d"],
-            ["2015-02-32", "%Y-%m-%d"],
-            ["2015-04-31", "%Y-%m-%d"],
+            ["2015-02-29", None, UserWarning],
+            ["2015-02-29", "%Y-%m-%d", None],
+            ["2015-02-32", "%Y-%m-%d", None],
+            ["2015-04-31", "%Y-%m-%d", None],
         ],
     )
-    def test_day_not_in_month_coerce(self, cache, arg, format):
-        assert isna(to_datetime(arg, errors="coerce", format=format, cache=cache))
+    def test_day_not_in_month_coerce(self, cache, arg, format, warning):
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            assert isna(to_datetime(arg, errors="coerce", format=format, cache=cache))
 
     def test_day_not_in_month_raise(self, cache):
         msg = "day is out of range for month"
         with pytest.raises(ValueError, match=msg):
-            to_datetime("2015-02-29", errors="raise", cache=cache)
+            with tm.assert_produces_warning(
+                UserWarning, match="Could not infer format"
+            ):
+                to_datetime("2015-02-29", errors="raise", cache=cache)
 
     @pytest.mark.parametrize("arg", ["2015-02-29", "2015-02-32", "2015-04-31"])
     def test_day_not_in_month_raise_value(self, cache, arg):
@@ -2231,85 +2216,85 @@ def test_day_not_in_month_raise_value(self, cache, arg):
             to_datetime(arg, errors="raise", format="%Y-%m-%d", cache=cache)
 
     @pytest.mark.parametrize(
-        "expected, format",
+        "expected, format, warning",
         [
-            ["2015-02-29", None],
-            ["2015-02-29", "%Y-%m-%d"],
-            ["2015-02-29", "%Y-%m-%d"],
-            ["2015-04-31", "%Y-%m-%d"],
+            ["2015-02-29", None, UserWarning],
+            ["2015-02-29", "%Y-%m-%d", None],
+            ["2015-02-29", "%Y-%m-%d", None],
+            ["2015-04-31", "%Y-%m-%d", None],
         ],
     )
-    def test_day_not_in_month_ignore(self, cache, expected, format):
-        result = to_datetime(expected, errors="ignore", format=format, cache=cache)
+    def test_day_not_in_month_ignore(self, cache, expected, format, warning):
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            result = to_datetime(expected, errors="ignore", format=format, cache=cache)
         assert result == expected
 
 
 class TestDatetimeParsingWrappers:
     @pytest.mark.parametrize(
-        "date_str,expected",
-        list(
-            {
-                "2011-01-01": datetime(2011, 1, 1),
-                "2Q2005": datetime(2005, 4, 1),
-                "2Q05": datetime(2005, 4, 1),
-                "2005Q1": datetime(2005, 1, 1),
-                "05Q1": datetime(2005, 1, 1),
-                "2011Q3": datetime(2011, 7, 1),
-                "11Q3": datetime(2011, 7, 1),
-                "3Q2011": datetime(2011, 7, 1),
-                "3Q11": datetime(2011, 7, 1),
-                # quarterly without space
-                "2000Q4": datetime(2000, 10, 1),
-                "00Q4": datetime(2000, 10, 1),
-                "4Q2000": datetime(2000, 10, 1),
-                "4Q00": datetime(2000, 10, 1),
-                "2000q4": datetime(2000, 10, 1),
-                "2000-Q4": datetime(2000, 10, 1),
-                "00-Q4": datetime(2000, 10, 1),
-                "4Q-2000": datetime(2000, 10, 1),
-                "4Q-00": datetime(2000, 10, 1),
-                "00q4": datetime(2000, 10, 1),
-                "2005": datetime(2005, 1, 1),
-                "2005-11": datetime(2005, 11, 1),
-                "2005 11": datetime(2005, 11, 1),
-                "11-2005": datetime(2005, 11, 1),
-                "11 2005": datetime(2005, 11, 1),
-                "200511": datetime(2020, 5, 11),
-                "20051109": datetime(2005, 11, 9),
-                "20051109 10:15": datetime(2005, 11, 9, 10, 15),
-                "20051109 08H": datetime(2005, 11, 9, 8, 0),
-                "2005-11-09 10:15": datetime(2005, 11, 9, 10, 15),
-                "2005-11-09 08H": datetime(2005, 11, 9, 8, 0),
-                "2005/11/09 10:15": datetime(2005, 11, 9, 10, 15),
-                "2005/11/09 08H": datetime(2005, 11, 9, 8, 0),
-                "Thu Sep 25 10:36:28 2003": datetime(2003, 9, 25, 10, 36, 28),
-                "Thu Sep 25 2003": datetime(2003, 9, 25),
-                "Sep 25 2003": datetime(2003, 9, 25),
-                "January 1 2014": datetime(2014, 1, 1),
-                # GHE10537
-                "2014-06": datetime(2014, 6, 1),
-                "06-2014": datetime(2014, 6, 1),
-                "2014-6": datetime(2014, 6, 1),
-                "6-2014": datetime(2014, 6, 1),
-                "20010101 12": datetime(2001, 1, 1, 12),
-                "20010101 1234": datetime(2001, 1, 1, 12, 34),
-                "20010101 123456": datetime(2001, 1, 1, 12, 34, 56),
-            }.items()
-        ),
+        "date_str, expected, warning",
+        [
+            ("2011-01-01", datetime(2011, 1, 1), None),
+            ("2Q2005", datetime(2005, 4, 1), UserWarning),
+            ("2Q05", datetime(2005, 4, 1), UserWarning),
+            ("2005Q1", datetime(2005, 1, 1), UserWarning),
+            ("05Q1", datetime(2005, 1, 1), UserWarning),
+            ("2011Q3", datetime(2011, 7, 1), UserWarning),
+            ("11Q3", datetime(2011, 7, 1), UserWarning),
+            ("3Q2011", datetime(2011, 7, 1), UserWarning),
+            ("3Q11", datetime(2011, 7, 1), UserWarning),
+            # quarterly without space
+            ("2000Q4", datetime(2000, 10, 1), UserWarning),
+            ("00Q4", datetime(2000, 10, 1), UserWarning),
+            ("4Q2000", datetime(2000, 10, 1), UserWarning),
+            ("4Q00", datetime(2000, 10, 1), UserWarning),
+            ("2000q4", datetime(2000, 10, 1), UserWarning),
+            ("2000-Q4", datetime(2000, 10, 1), UserWarning),
+            ("00-Q4", datetime(2000, 10, 1), UserWarning),
+            ("4Q-2000", datetime(2000, 10, 1), UserWarning),
+            ("4Q-00", datetime(2000, 10, 1), UserWarning),
+            ("00q4", datetime(2000, 10, 1), UserWarning),
+            ("2005", datetime(2005, 1, 1), None),
+            ("2005-11", datetime(2005, 11, 1), UserWarning),
+            ("2005 11", datetime(2005, 11, 1), UserWarning),
+            ("11-2005", datetime(2005, 11, 1), UserWarning),
+            ("11 2005", datetime(2005, 11, 1), UserWarning),
+            ("200511", datetime(2020, 5, 11), UserWarning),
+            ("20051109", datetime(2005, 11, 9), None),
+            ("20051109 10:15", datetime(2005, 11, 9, 10, 15), None),
+            ("20051109 08H", datetime(2005, 11, 9, 8, 0), None),
+            ("2005-11-09 10:15", datetime(2005, 11, 9, 10, 15), None),
+            ("2005-11-09 08H", datetime(2005, 11, 9, 8, 0), None),
+            ("2005/11/09 10:15", datetime(2005, 11, 9, 10, 15), None),
+            ("2005/11/09 08H", datetime(2005, 11, 9, 8, 0), None),
+            ("Thu Sep 25 10:36:28 2003", datetime(2003, 9, 25, 10, 36, 28), None),
+            ("Thu Sep 25 2003", datetime(2003, 9, 25), None),
+            ("Sep 25 2003", datetime(2003, 9, 25), None),
+            ("January 1 2014", datetime(2014, 1, 1), None),
+            # GHE10537
+            ("2014-06", datetime(2014, 6, 1), UserWarning),
+            ("06-2014", datetime(2014, 6, 1), UserWarning),
+            ("2014-6", datetime(2014, 6, 1), UserWarning),
+            ("6-2014", datetime(2014, 6, 1), UserWarning),
+            ("20010101 12", datetime(2001, 1, 1, 12), None),
+            ("20010101 1234", datetime(2001, 1, 1, 12, 34), UserWarning),
+            ("20010101 123456", datetime(2001, 1, 1, 12, 34, 56), UserWarning),
+        ],
     )
-    def test_parsers(self, date_str, expected, cache):
+    def test_parsers(self, date_str, expected, warning, cache):
 
         # dateutil >= 2.5.0 defaults to yearfirst=True
         # https://github.com/dateutil/dateutil/issues/217
         yearfirst = True
 
         result1, _ = parsing.parse_time_string(date_str, yearfirst=yearfirst)
-        result2 = to_datetime(date_str, yearfirst=yearfirst)
-        result3 = to_datetime([date_str], yearfirst=yearfirst)
-        # result5 is used below
-        result4 = to_datetime(
-            np.array([date_str], dtype=object), yearfirst=yearfirst, cache=cache
-        )
+        with tm.assert_produces_warning(warning, match="Could not infer format"):
+            result2 = to_datetime(date_str, yearfirst=yearfirst)
+            result3 = to_datetime([date_str], yearfirst=yearfirst)
+            # result5 is used below
+            result4 = to_datetime(
+                np.array([date_str], dtype=object), yearfirst=yearfirst, cache=cache
+            )
         result6 = DatetimeIndex([date_str], yearfirst=yearfirst)
         # result7 is used below
         result8 = DatetimeIndex(Index([date_str]), yearfirst=yearfirst)
@@ -2418,9 +2403,10 @@ def test_parsers_dayfirst_yearfirst(
             result2 = Timestamp(date_str)
             assert result2 == expected
 
-        result3 = to_datetime(
-            date_str, dayfirst=dayfirst, yearfirst=yearfirst, cache=cache
-        )
+        with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
+            result3 = to_datetime(
+                date_str, dayfirst=dayfirst, yearfirst=yearfirst, cache=cache
+            )
 
         result4 = DatetimeIndex([date_str], dayfirst=dayfirst, yearfirst=yearfirst)[0]
 
@@ -2437,8 +2423,9 @@ def test_parsers_timestring(self, date_str, exp_def):
         exp_now = parse(date_str)
 
         result1, _ = parsing.parse_time_string(date_str)
-        result2 = to_datetime(date_str)
-        result3 = to_datetime([date_str])
+        with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
+            result2 = to_datetime(date_str)
+            result3 = to_datetime([date_str])
         result4 = Timestamp(date_str)
         result5 = DatetimeIndex([date_str])[0]
         # parse time string return time string based on default date
@@ -2602,17 +2589,23 @@ def test_incorrect_value_exception(self):
         with pytest.raises(
             ValueError, match="Unknown string format: yesterday present at position 1"
         ):
-            to_datetime(["today", "yesterday"])
+            with tm.assert_produces_warning(
+                UserWarning, match="Could not infer format"
+            ):
+                to_datetime(["today", "yesterday"])
 
-    @pytest.mark.parametrize("format", [None, "%Y-%m-%d %H:%M:%S"])
-    def test_to_datetime_out_of_bounds_with_format_arg(self, format):
+    @pytest.mark.parametrize(
+        "format, warning", [(None, UserWarning), ("%Y-%m-%d %H:%M:%S", None)]
+    )
+    def test_to_datetime_out_of_bounds_with_format_arg(self, format, warning):
         # see gh-23830
         msg = (
             "Out of bounds nanosecond timestamp: 2417-10-27 00:00:00 "
             "present at position 0"
         )
         with pytest.raises(OutOfBoundsDatetime, match=msg):
-            to_datetime("2417-10-27 00:00:00", format=format)
+            with tm.assert_produces_warning(warning, match="Could not infer format"):
+                to_datetime("2417-10-27 00:00:00", format=format)
 
     @pytest.mark.parametrize(
         "arg, origin, expected_str",

From d67bd3576215ee65108f913375b3e353adc1a720 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Tue, 18 Oct 2022 09:35:49 +0100
Subject: [PATCH 06/12] :memo: update docs

---
 doc/source/user_guide/basics.rst     |  2 ++
 doc/source/user_guide/io.rst         | 31 +++++---------------------
 doc/source/user_guide/timeseries.rst | 27 +++++++----------------
 doc/source/whatsnew/v2.0.0.rst       | 33 ++++++++++++++++++++++++++++
 4 files changed, 49 insertions(+), 44 deletions(-)

diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst
index a34d4891b9d77..92fae28d3bdb3 100644
--- a/doc/source/user_guide/basics.rst
+++ b/doc/source/user_guide/basics.rst
@@ -2313,6 +2313,7 @@ useful if you are reading in data which is mostly of the desired dtype (e.g. num
 non-conforming elements intermixed that you want to represent as missing:
 
 .. ipython:: python
+   :okwarning:
 
     import datetime
 
@@ -2329,6 +2330,7 @@ The ``errors`` parameter has a third option of ``errors='ignore'``, which will s
 encounters any errors with the conversion to a desired data type:
 
 .. ipython:: python
+    :okwarning:
 
     import datetime
 
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index 63e6b007f77a8..3f7abac3e5582 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -1009,41 +1009,22 @@ To parse the mixed-timezone values as a datetime column, pass a partially-applie
 Inferring datetime format
 +++++++++++++++++++++++++
 
-If you have ``parse_dates`` enabled for some or all of your columns, and your
-datetime strings are all formatted the same way, you may get a large speed
-up by setting ``infer_datetime_format=True``.  If set, pandas will attempt
-to guess the format of your datetime strings, and then use a faster means
-of parsing the strings.  5-10x parsing speeds have been observed.  pandas
-will fallback to the usual parsing if either the format cannot be guessed
-or the format that was guessed cannot properly parse the entire column
-of strings.  So in general, ``infer_datetime_format`` should not have any
-negative consequences if enabled.
-
-Here are some examples of datetime strings that can be guessed (All
-representing December 30th, 2011 at 00:00:00):
-
-* "20111230"
-* "2011/12/30"
-* "20111230 00:00:00"
-* "12/30/2011 00:00:00"
-* "30/Dec/2011 00:00:00"
-* "30/December/2011 00:00:00"
-
-Note that ``infer_datetime_format`` is sensitive to ``dayfirst``.  With
-``dayfirst=True``, it will guess "01/12/2011" to be December 1st. With
-``dayfirst=False`` (default) it will guess "01/12/2011" to be January 12th.
+If you try to parse a column of date strings, pandas will attempt to guess the format
+from the first non-NaN element, and will then parse the rest of the column with that
+format.
 
 .. ipython:: python
 
-   # Try to infer the format for the index column
    df = pd.read_csv(
        "foo.csv",
        index_col=0,
        parse_dates=True,
-       infer_datetime_format=True,
    )
    df
 
+In the case that you have mixed datetime formats within the same column, you'll need to
+first read it in the file, and then apply :func:`to_datetime` to each element.
+
 .. ipython:: python
    :suppress:
 
diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
index 474068e43a4d4..2710a22ec6161 100644
--- a/doc/source/user_guide/timeseries.rst
+++ b/doc/source/user_guide/timeseries.rst
@@ -13,17 +13,6 @@ a tremendous amount of new functionality for manipulating time series data.
 
 For example, pandas supports:
 
-Parsing time series information from various sources and formats
-
-.. ipython:: python
-
-   import datetime
-
-   dti = pd.to_datetime(
-       ["1/1/2018", np.datetime64("2018-01-01"), datetime.datetime(2018, 1, 1)]
-   )
-   dti
-
 Generate sequences of fixed-frequency dates and time spans
 
 .. ipython:: python
@@ -132,6 +121,8 @@ time.
 
 .. ipython:: python
 
+   import datetime
+
    pd.Timestamp(datetime.datetime(2012, 5, 1))
    pd.Timestamp("2012-05-01")
    pd.Timestamp(2012, 5, 1)
@@ -196,26 +187,24 @@ is converted to a ``DatetimeIndex``:
 
 .. ipython:: python
 
-    pd.to_datetime(pd.Series(["Jul 31, 2009", "2010-01-10", None]))
+    pd.to_datetime(pd.Series(["Jul 31, 2009", "Jan 10, 2010", None]))
 
-    pd.to_datetime(["2005/11/23", "2010.12.31"])
+    pd.to_datetime(["2005/11/23", "2010/12/31"])
 
 If you use dates which start with the day first (i.e. European style),
 you can pass the ``dayfirst`` flag:
 
 .. ipython:: python
-   :okwarning:
+    :okwarning:
 
     pd.to_datetime(["04-01-2012 10:00"], dayfirst=True)
-
-    pd.to_datetime(["14-01-2012", "01-14-2012"], dayfirst=True)
+    pd.to_datetime(["04-14-2012 10:00"], dayfirst=True)
 
 .. warning::
 
    You see in the above example that ``dayfirst`` isn't strict. If a date
    can't be parsed with the day being first it will be parsed as if
-   ``dayfirst`` were False, and in the case of parsing delimited date strings
-   (e.g. ``31-12-2012``) then a warning will also be raised.
+   ``dayfirst`` were False and a warning will also be raised.
 
 If you pass a single string to ``to_datetime``, it returns a single ``Timestamp``.
 ``Timestamp`` can also accept string input, but it doesn't accept string parsing
@@ -768,7 +757,7 @@ partially matching dates:
    rng2 = pd.date_range("2011-01-01", "2012-01-01", freq="W")
    ts2 = pd.Series(np.random.randn(len(rng2)), index=rng2)
 
-   ts2.truncate(before="2011-11", after="2011-12")
+   ts2.truncate(before="2011-11-01", after="2011-12-01")
    ts2["2011-11":"2011-12"]
 
 Even complicated fancy indexing that breaks the ``DatetimeIndex`` frequency
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 80c8ad9a8b2eb..840c29ec8b09e 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -114,6 +114,39 @@ Optional libraries below the lowest tested version may still work, but are not c
 
 See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more.
 
+Datetimes are now parsed with a consistent format
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+:func:`to_datetime` now parses dates with a consistent format, which is guessed from the first non-NA value
+(unless ``format`` is specified). Previously, it would've guessed the format for each element individually.
+
+*Old behavior*:
+
+  .. code-block:: ipython
+
+     In [1]: ser = pd.Series(['13-01-2000', '12-01-2000'])
+     In [2]: pd.to_datetime(ser)
+     Out[2]:
+     0   2000-01-13
+     1   2000-12-01
+     dtype: datetime64[ns]
+
+*New behavior*:
+
+  .. ipython:: python
+    :okwarning:
+
+     ser = pd.Series(['13-01-2000', '12-01-2000'])
+     pd.to_datetime(ser)
+
+Note that this affects :func:`read_csv` as well.
+
+If you still need to parse dates with inconsistent formats, you'll need to apply :func:`to_datetime`
+to each element individually, e.g. ::
+
+     ser = pd.Series(['13-01-2000', '12 January 2000'])
+     ser.apply(pd.to_datetime)
+
 .. _whatsnew_200.api_breaking.other:
 
 Other API changes

From 98db2b5c7e5e4206d15b0348f4f37d429e0b1c78 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Wed, 19 Oct 2022 10:15:01 +0100
Subject: [PATCH 07/12] :memo: add example of reading csv file with mixed
 formats

---
 doc/source/user_guide/io.rst    | 9 ++++++++-
 doc/source/whatsnew/v2.0.0.rst  | 2 +-
 pandas/_libs/tslibs/parsing.pyx | 4 ++--
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index 3f7abac3e5582..c2992236381c7 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -1023,7 +1023,14 @@ format.
    df
 
 In the case that you have mixed datetime formats within the same column, you'll need to
-first read it in the file, and then apply :func:`to_datetime` to each element.
+first read it in as an object dtype and then apply :func:`to_datetime` to each element.
+
+.. ipython:: python
+
+   data = io.StringIO("date\n12 Jan 2000\n2000-01-13\n")
+   df = pd.read_csv(data)
+   df['date'] = df['date'].apply(pd.to_datetime)
+   df
 
 .. ipython:: python
    :suppress:
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 840c29ec8b09e..49f99592e69a8 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -118,7 +118,7 @@ Datetimes are now parsed with a consistent format
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 :func:`to_datetime` now parses dates with a consistent format, which is guessed from the first non-NA value
-(unless ``format`` is specified). Previously, it would've guessed the format for each element individually.
+(unless ``format`` is specified). Previously, it would have guessed the format for each element individually.
 
 *Old behavior*:
 
diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
index 74de3502b73de..c9df9146240da 100644
--- a/pandas/_libs/tslibs/parsing.pyx
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -1117,13 +1117,13 @@ cdef void _maybe_warn_about_dayfirst(format: str, bint dayfirst):
         if (day_index > month_index) and dayfirst:
             warnings.warn(
                 f"Parsing dates in {format} format when dayfirst=True was specified. "
-                f"Pass `dayfirst=False` or specify a format to silence this warning.",
+                "Pass `dayfirst=False` or specify a format to silence this warning.",
                 stacklevel=find_stack_level(),
             )
         if (day_index < month_index) and not dayfirst:
             warnings.warn(
                 f"Parsing dates in {format} format when dayfirst=False was specified. "
-                f"Pass `dayfirst=True` or specify a format to silence this warning.",
+                "Pass `dayfirst=True` or specify a format to silence this warning.",
                 stacklevel=find_stack_level(),
             )
 

From cc307ab27b36c323e8ef11b78988b3df059d0f46 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Wed, 19 Oct 2022 10:24:42 +0100
Subject: [PATCH 08/12] :wastebasket: removed now outdated tests / clean inputs

---
 .../indexes/datetimes/test_constructors.py    | 22 ++--------
 pandas/tests/tools/test_to_datetime.py        | 43 +------------------
 2 files changed, 6 insertions(+), 59 deletions(-)

diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py
index c1039728f5b5e..a9491f90e80f0 100644
--- a/pandas/tests/indexes/datetimes/test_constructors.py
+++ b/pandas/tests/indexes/datetimes/test_constructors.py
@@ -1042,27 +1042,13 @@ def test_datetimeindex_constructor_misc(self):
         arr = np.array(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"], dtype="O")
         idx4 = DatetimeIndex(arr)
 
-        # Can't be parsed consistently, need to parse each element individually
-        arr = [
-            to_datetime(date_string)
-            for date_string in ["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"]
-        ]
-        idx5 = DatetimeIndex(arr)
-
-        # Can't be parsed consistently, need to parse each element individually
-        arr = [
-            to_datetime(date_string)
-            for date_string in ["1/1/2005", "1/2/2005", "Jan 3, 2005", "2005-01-04"]
-        ]
-        idx6 = DatetimeIndex(arr)
-
-        idx7 = DatetimeIndex(["12/05/2007", "25/01/2008"], dayfirst=True)
-        idx8 = DatetimeIndex(
+        idx5 = DatetimeIndex(["12/05/2007", "25/01/2008"], dayfirst=True)
+        idx6 = DatetimeIndex(
             ["2007/05/12", "2008/01/25"], dayfirst=False, yearfirst=True
         )
-        tm.assert_index_equal(idx7, idx8)
+        tm.assert_index_equal(idx5, idx6)
 
-        for other in [idx2, idx3, idx4, idx5, idx6]:
+        for other in [idx2, idx3, idx4]:
             assert (idx1.values == other.values).all()
 
         sdate = datetime(1999, 12, 25)
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index a2871e79dc7d9..e3b9e30e1923c 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -1225,40 +1225,6 @@ def test_iso_8601_strings_with_different_offsets_utc(self):
         )
         tm.assert_index_equal(result, expected)
 
-    def test_iso8601_strings_mixed_offsets_with_naive(self):
-        # GH 24992
-        # Can't parse consistently, need to parse each element in loop.
-        result = DatetimeIndex(
-            [
-                to_datetime(string, utc=True)
-                for string in [
-                    "2018-11-28T00:00:00",
-                    "2018-11-28T00:00:00+12:00",
-                    "2018-11-28T00:00:00",
-                    "2018-11-28T00:00:00+06:00",
-                    "2018-11-28T00:00:00",
-                ]
-            ]
-        )
-        expected = to_datetime(
-            [
-                "2018-11-28T00:00:00",
-                "2018-11-27T12:00:00",
-                "2018-11-28T00:00:00",
-                "2018-11-27T18:00:00",
-                "2018-11-28T00:00:00",
-            ],
-            utc=True,
-        )
-        tm.assert_index_equal(result, expected)
-
-    def test_iso8601_strings_mixed_offsets_with_naive_reversed(self):
-        items = ["2018-11-28T00:00:00+12:00", "2018-11-28T00:00:00"]
-        # Can't parse consistently, need to parse each element in loop.
-        result = [to_datetime(item, utc=True) for item in items]
-        expected = [to_datetime(item, utc=True) for item in list(reversed(items))][::-1]
-        assert result == expected
-
     def test_mixed_offsets_with_native_datetime_raises(self):
         # GH 25978
 
@@ -1910,9 +1876,7 @@ def test_to_datetime_overflow(self):
     def test_string_na_nat_conversion(self, cache):
         # GH #999, #858
 
-        strings = np.array(
-            ["1/1/2000", "1/2/2000", np.nan, "1/4/2000, 12:34:56"], dtype=object
-        )
+        strings = np.array(["1/1/2000", "1/2/2000", np.nan, "1/4/2000"], dtype=object)
 
         expected = np.empty(4, dtype="M8[ns]")
         for i, val in enumerate(strings):
@@ -1924,10 +1888,7 @@ def test_string_na_nat_conversion(self, cache):
         result = tslib.array_to_datetime(strings)[0]
         tm.assert_almost_equal(result, expected)
 
-        # Can't parse in consistent format, so need to convert each individually.
-        result2 = DatetimeIndex(
-            [to_datetime(string, cache=cache) for string in strings]
-        )
+        result2 = to_datetime(strings, cache=cache)
         assert isinstance(result2, DatetimeIndex)
         tm.assert_numpy_array_equal(result, result2.values)
 

From fc419d526ad1f7cf2a1174dae6f9385734e7283e Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Wed, 19 Oct 2022 15:21:11 +0100
Subject: [PATCH 09/12] make iso8601 fastpath respect exact

---
 f.py                                          | 92 +++++++++++++++++++
 pandas/_libs/tslib.pyx                        | 50 +++++++++-
 pandas/_libs/tslibs/conversion.pyx            | 13 ++-
 pandas/_libs/tslibs/np_datetime.pxd           | 11 +++
 pandas/_libs/tslibs/np_datetime.pyx           | 44 ++++++++-
 pandas/_libs/tslibs/parsing.pyx               | 13 ++-
 .../tslibs/src/datetime/np_datetime_strings.c | 55 ++++++++++-
 .../tslibs/src/datetime/np_datetime_strings.h | 13 ++-
 pandas/core/arrays/datetimes.py               |  8 ++
 pandas/core/tools/datetimes.py                | 27 ++++--
 pandas/tests/tools/test_to_datetime.py        | 92 ++++++++++++++++++-
 11 files changed, 396 insertions(+), 22 deletions(-)
 create mode 100644 f.py

diff --git a/f.py b/f.py
new file mode 100644
index 0000000000000..e2e151a6271d9
--- /dev/null
+++ b/f.py
@@ -0,0 +1,92 @@
+from typing import NamedTuple
+
+
+class ISO8601Info(NamedTuple):
+    format: str = b""
+    date_sep: str = b""
+    time_sep: str = b""
+    micro_or_tz: str = b""
+    year: bool = False
+    month: bool = False
+    day: bool = False
+    hour: bool = False
+    minute: bool = False
+    second: bool = False
+
+
+def format_is_iso(f: str):
+    """
+    Does format match the iso8601 set that can be handled by the C parser?
+    Generally of form YYYY-MM-DDTHH:MM:SS - date separator can be different
+    but must be consistent.  Leading 0s in dates and times are optional.
+
+    no, needs doing in c. eff...
+    """
+    excluded_formats = ["%Y%m%d", "%Y%m", "%Y"]
+
+    if f in excluded_formats:
+        return ISO8601Info()
+    for date_sep in [" ", "/", "\\", "-", ".", ""]:
+        for time_sep in [" ", "T"]:
+            for micro_or_tz in ["", "%z", "%Z", ".%f", ".%f%z", ".%f%Z"]:
+                if f"%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M:%S{micro_or_tz}" == f:
+                    return ISO8601Info(
+                        format=f.encode("utf-8"),
+                        date_sep=date_sep.encode("utf-8"),
+                        time_sep=time_sep.encode("utf-8"),
+                        micro_or_tz=micro_or_tz.encode("utf-8"),
+                        year=True,
+                        month=True,
+                        day=True,
+                        hour=True,
+                        minute=True,
+                        second=True,
+                    )
+                elif f"%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M" == f:
+                    return ISO8601Info(
+                        format=f.encode("utf-8"),
+                        date_sep=date_sep.encode("utf-8"),
+                        time_sep=time_sep.encode("utf-8"),
+                        micro_or_tz=micro_or_tz.encode("utf-8"),
+                        year=True,
+                        month=True,
+                        day=True,
+                        hour=True,
+                        minute=True,
+                    )
+                elif f"%Y{date_sep}%m{date_sep}%d{time_sep}%H" == f:
+                    return ISO8601Info(
+                        format=f.encode("utf-8"),
+                        date_sep=date_sep.encode("utf-8"),
+                        time_sep=time_sep.encode("utf-8"),
+                        micro_or_tz=micro_or_tz.encode("utf-8"),
+                        year=True,
+                        month=True,
+                        day=True,
+                        hour=True,
+                    )
+                elif f"%Y{date_sep}%m{date_sep}%d" == f:
+                    return ISO8601Info(
+                        format=f.encode("utf-8"),
+                        date_sep=date_sep.encode("utf-8"),
+                        time_sep=time_sep.encode("utf-8"),
+                        micro_or_tz=micro_or_tz.encode("utf-8"),
+                        year=True,
+                        month=True,
+                        day=True,
+                    )
+                elif f"%Y{date_sep}%m" == f:
+                    return ISO8601Info(
+                        format=f.encode("utf-8"),
+                        date_sep=date_sep.encode("utf-8"),
+                        time_sep=time_sep.encode("utf-8"),
+                        micro_or_tz=micro_or_tz.encode("utf-8"),
+                        year=True,
+                        month=True,
+                    )
+    return ISO8601Info()
+
+
+if __name__ == "__main__":
+    print(format_is_iso("%Y-%m-%d %H:%M:%S%z"))
+# print(format_is_iso('%Y%m%d %H'))
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 03331f54db892..a4036023843ef 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -93,7 +93,19 @@ def _test_parse_iso8601(ts: str):
     elif ts == 'today':
         return Timestamp.now().normalize()
 
-    string_to_dts(ts, &obj.dts, &out_bestunit, &out_local, &out_tzoffset, True)
+    string_to_dts(ts, &obj.dts, &out_bestunit, &out_local, &out_tzoffset, True,
+                        format='',
+                        date_sep='',
+                        time_sep='',
+                        micro_or_tz='',
+                        year=False,
+                        month=False,
+                        day=False,
+                        hour=False,
+                        minute=False,
+                        second=False,
+                        exact=False,
+    )
     obj.value = npy_datetimestruct_to_datetime(NPY_FR_ns, &obj.dts)
     check_dts_bounds(&obj.dts)
     if out_local == 1:
@@ -449,6 +461,17 @@ cpdef array_to_datetime(
     bint utc=False,
     bint require_iso8601=False,
     bint allow_mixed=False,
+    const char *format='',
+    const char *date_sep='',
+    const char *time_sep='',
+    const char *micro_or_tz='',
+    bint year=False,
+    bint month=False,
+    bint day=False,
+    bint hour=False,
+    bint minute=False,
+    bint second=False,
+    bint exact=False,
 ):
     """
     Converts a 1D array of date-like values to a numpy array of either:
@@ -568,6 +591,16 @@ cpdef array_to_datetime(
                     iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)
 
                 elif is_integer_object(val) or is_float_object(val):
+                    if require_iso8601:
+                        if is_coerce:
+                            iresult[i] = NPY_NAT
+                            continue
+                        elif is_raise:
+                            raise ValueError(
+                                f"time data \"{val}\" at position {i} doesn't match format {format.decode('utf-8')}"
+                            )
+                        return values, tz_out
+
                     # these must be ns unit by-definition
                     seen_integer = True
 
@@ -598,7 +631,18 @@ cpdef array_to_datetime(
 
                     string_to_dts_failed = string_to_dts(
                         val, &dts, &out_bestunit, &out_local,
-                        &out_tzoffset, False
+                        &out_tzoffset, False,
+                        format,
+                        date_sep=date_sep,
+                        time_sep=time_sep,
+                        micro_or_tz=micro_or_tz,
+                        year=year,
+                        month=month,
+                        day=day,
+                        hour=hour,
+                        minute=minute,
+                        second=second,
+                        exact=exact,
                     )
                     if string_to_dts_failed:
                         # An error at this point is a _parsing_ error
@@ -613,7 +657,7 @@ cpdef array_to_datetime(
                                 continue
                             elif is_raise:
                                 raise ValueError(
-                                    f"time data \"{val}\" at position {i} doesn't match format specified"
+                                    f"time data \"{val}\" at position {i} doesn't match format {format.decode('utf-8')}"
                                 )
                             return values, tz_out
 
diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
index 923dfa3c54d26..a4d10703f4865 100644
--- a/pandas/_libs/tslibs/conversion.pyx
+++ b/pandas/_libs/tslibs/conversion.pyx
@@ -488,7 +488,18 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit,
     else:
         string_to_dts_failed = string_to_dts(
             ts, &dts, &out_bestunit, &out_local,
-            &out_tzoffset, False
+            &out_tzoffset, False,
+                        '',
+                        date_sep='',
+                        time_sep='',
+                        micro_or_tz='',
+                        year=False,
+                        month=False,
+                        day=False,
+                        hour=False,
+                        minute=False,
+                        second=False,
+                        exact=False,
         )
         if not string_to_dts_failed:
             try:
diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd
index e51bbd4e074e1..82363ef79e29b 100644
--- a/pandas/_libs/tslibs/np_datetime.pxd
+++ b/pandas/_libs/tslibs/np_datetime.pxd
@@ -95,6 +95,17 @@ cdef int string_to_dts(
     int* out_local,
     int* out_tzoffset,
     bint want_exc,
+    const char *format,
+    const char *date_sep,
+    const char *time_sep,
+    const char *micro_or_tz,
+    bint year,
+    bint month,
+    bint day,
+    bint hour,
+    bint minute,
+    bint second,
+    bint exact,
 ) except? -1
 
 cdef NPY_DATETIMEUNIT get_unit_from_dtype(cnp.dtype dtype)
diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx
index 07872050dc822..7749c77d13f3b 100644
--- a/pandas/_libs/tslibs/np_datetime.pyx
+++ b/pandas/_libs/tslibs/np_datetime.pyx
@@ -52,7 +52,19 @@ cdef extern from "src/datetime/np_datetime_strings.h":
     int parse_iso_8601_datetime(const char *str, int len, int want_exc,
                                 npy_datetimestruct *out,
                                 NPY_DATETIMEUNIT *out_bestunit,
-                                int *out_local, int *out_tzoffset)
+                                int *out_local, int *out_tzoffset,
+        int format,
+        const char *date_sep,
+        const char *time_sep,
+        const char *micro_or_tz,
+        int year,
+        int month,
+        int day,
+        int hour,
+        int minute,
+        int second,
+        int exact
+        )
 
 
 # ----------------------------------------------------------------------
@@ -273,14 +285,40 @@ cdef inline int string_to_dts(
     int* out_local,
     int* out_tzoffset,
     bint want_exc,
+    const char *format,
+    const char *date_sep,
+    const char *time_sep,
+    const char *micro_or_tz,
+    bint year,
+    bint month,
+    bint day,
+    bint hour,
+    bint minute,
+    bint second,
+    bint exact,
 ) except? -1:
     cdef:
         Py_ssize_t length
+        Py_ssize_t format_length
         const char* buf
 
     buf = get_c_string_buf_and_size(val, &length)
-    return parse_iso_8601_datetime(buf, length, want_exc,
-                                   dts, out_bestunit, out_local, out_tzoffset)
+    format_length = len(format)
+    result = parse_iso_8601_datetime(buf, length, want_exc,
+                                   dts, out_bestunit, out_local, out_tzoffset,
+        format_length,
+        date_sep,
+        time_sep,
+        micro_or_tz,
+        year,
+        month,
+        day,
+        hour,
+        minute,
+        second,
+        exact,
+    )
+    return result
 
 
 cpdef ndarray astype_overflowsafe(
diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
index c9df9146240da..30f934ef9129c 100644
--- a/pandas/_libs/tslibs/parsing.pyx
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -409,7 +409,18 @@ cdef parse_datetime_string_with_reso(
     # TODO: does this render some/all of parse_delimited_date redundant?
     string_to_dts_failed = string_to_dts(
         date_string, &dts, &out_bestunit, &out_local,
-        &out_tzoffset, False
+        &out_tzoffset, False,
+        '',
+        '',
+        '',
+        '',
+        False,
+        False,
+        False,
+        False,
+        False,
+        False,
+        False,
     )
     if not string_to_dts_failed:
         if dts.ps != 0 or out_local:
diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c
index cfbaed01b57c9..f5977b8066ef0 100644
--- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c
+++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c
@@ -69,7 +69,18 @@ This file implements string parsing and creation for NumPy datetime.
 int parse_iso_8601_datetime(const char *str, int len, int want_exc,
                             npy_datetimestruct *out,
                             NPY_DATETIMEUNIT *out_bestunit,
-                            int *out_local, int *out_tzoffset) {
+                            int *out_local, int *out_tzoffset,
+        int format_length,
+        const char *date_sep,
+        const char *time_sep,
+        const char *micro_or_tz,
+        int year,
+        int month,
+        int day,
+        int hour,
+        int minute,
+        int second,
+        int exact) {
     int year_leap = 0;
     int i, numdigits;
     const char *substr;
@@ -134,6 +145,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     /* Check whether it's a leap-year */
     year_leap = is_leapyear(out->year);
 
+    /* If the format contains month but we're
+    already at the end of the string, error */
+    if ((format_length > 0) && month && (sublen == 0)) {
+        goto parse_error;
+    }
     /* Next character must be a separator, start of month, or end of string */
     if (sublen == 0) {
         if (out_local != NULL) {
@@ -154,6 +170,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
         }
         has_ymd_sep = 1;
         ymd_sep = valid_ymd_sep[i];
+        if ((format_length > 0) && (ymd_sep != *date_sep)) {
+            goto parse_error;
+        }
         ++substr;
         --sublen;
         /* Cannot have trailing separator */
@@ -163,6 +182,12 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     }
 
     /* PARSE THE MONTH */
+
+    /* If the format doesn't contain month, and there's still some
+    string to be parsed, and we're not checking for an exact match, error*/
+    if ((format_length > 0) && !month && exact) {
+        goto parse_error;
+    }
     /* First digit required */
     out->month = (*substr - '0');
     ++substr;
@@ -183,6 +208,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
         goto error;
     }
 
+    if ((format_length > 0) && day && (sublen == 0)) {
+        goto parse_error;
+    }
     /* Next character must be the separator, start of day, or end of string */
     if (sublen == 0) {
         bestunit = NPY_FR_M;
@@ -206,6 +234,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     }
 
     /* PARSE THE DAY */
+    if ((format_length > 0) && !day && exact) {
+        goto parse_error;
+    }
     /* First digit required */
     if (!isdigit(*substr)) {
         goto parse_error;
@@ -230,6 +261,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
         goto error;
     }
 
+    if ((format_length > 0) && hour && (sublen == 0)) {
+        goto parse_error;
+    }
     /* Next character must be a 'T', ' ', or end of string */
     if (sublen == 0) {
         if (out_local != NULL) {
@@ -239,13 +273,18 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
         goto finish;
     }
 
-    if ((*substr != 'T' && *substr != ' ') || sublen == 1) {
+    if ((format_length > 0) && (*substr != *time_sep)) {
+        goto parse_error;
+    } else if ((*substr != 'T' && *substr != ' ') || sublen == 1) {
         goto parse_error;
     }
     ++substr;
     --sublen;
 
     /* PARSE THE HOURS */
+    if ((format_length > 0) && !hour && exact) {
+        goto parse_error;
+    }
     /* First digit required */
     if (!isdigit(*substr)) {
         goto parse_error;
@@ -269,6 +308,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
         }
     }
 
+    if ((format_length > 0) && minute && (sublen == 0)) {
+        goto parse_error;
+    }
     /* Next character must be a ':' or the end of the string */
     if (sublen == 0) {
         if (!hour_was_2_digits) {
@@ -294,6 +336,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     }
 
     /* PARSE THE MINUTES */
+    if ((format_length > 0) && !minute && exact) {
+        goto parse_error;
+    }
     /* First digit required */
     out->min = (*substr - '0');
     ++substr;
@@ -315,6 +360,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
         goto parse_error;
     }
 
+    if ((format_length > 0) && second && (sublen == 0)) {
+        goto parse_error;
+    }
     if (sublen == 0) {
         bestunit = NPY_FR_m;
         goto finish;
@@ -335,6 +383,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     }
 
     /* PARSE THE SECONDS */
+    if ((format_length > 0) && !second && exact) {
+        goto parse_error;
+    }
     /* First digit required */
     out->sec = (*substr - '0');
     ++substr;
diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h
index 511d9a401fed2..7ebf3e981a787 100644
--- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h
+++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h
@@ -58,7 +58,18 @@ parse_iso_8601_datetime(const char *str, int len, int want_exc,
                         npy_datetimestruct *out,
                         NPY_DATETIMEUNIT *out_bestunit,
                         int *out_local,
-                        int *out_tzoffset);
+                        int *out_tzoffset,
+        int format_length,
+        const char *date_sep,
+        const char *time_sep,
+        const char *micro_or_tz,
+        int year,
+        int month,
+        int day,
+        int hour,
+        int minute,
+        int second,
+        int exact);
 
 /*
  * Provides a string length to use for converting datetime
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index ca0a745c180e9..61e8dcadf7dc9 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -78,6 +78,7 @@
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
 from pandas.core.dtypes.missing import isna
 
+from f import ISO8601Info
 from pandas.core.arrays import datetimelike as dtl
 from pandas.core.arrays._ranges import generate_regular_range
 import pandas.core.common as com
@@ -2180,6 +2181,8 @@ def objects_to_datetime64ns(
     require_iso8601: bool = False,
     allow_object: bool = False,
     allow_mixed: bool = False,
+    iso_info=ISO8601Info(),
+    exact: bool = False,
 ):
     """
     Convert data to array of timestamps.
@@ -2193,11 +2196,14 @@ def objects_to_datetime64ns(
         Whether to convert timezone-aware timestamps to UTC.
     errors : {'raise', 'ignore', 'coerce'}
     require_iso8601 : bool, default False
+        If True, then only try parsing in ISO8601 format, and skip other formats.
     allow_object : bool
         Whether to return an object-dtype ndarray instead of raising if the
         data contains more than one timezone.
     allow_mixed : bool, default False
         Interpret integers as timestamps when datetime objects are also present.
+    iso_info : ISO860Info
+        Info about how to parse the ISO8601-formatted string.
 
     Returns
     -------
@@ -2227,6 +2233,8 @@ def objects_to_datetime64ns(
             yearfirst=yearfirst,
             require_iso8601=require_iso8601,
             allow_mixed=allow_mixed,
+            **iso_info._asdict(),
+            exact=exact,
         )
         result = result.reshape(data.shape, order=order)
     except OverflowError as err:
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 41feb153978d4..7607f637cabc6 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -32,9 +32,8 @@
     parsing,
     timezones,
 )
-from pandas._libs.tslibs.parsing import (
+from pandas._libs.tslibs.parsing import (  # format_is_iso,
     DateParseError,
-    format_is_iso,
     guess_datetime_format,
 )
 from pandas._libs.tslibs.strptime import array_strptime
@@ -65,6 +64,10 @@
 )
 from pandas.core.dtypes.missing import notna
 
+from f import (
+    ISO8601Info,
+    format_is_iso,
+)
 from pandas.arrays import (
     DatetimeArray,
     IntegerArray,
@@ -424,19 +427,21 @@ def _convert_listlike_datetimes(
         raise
 
     arg = ensure_object(arg)
-    require_iso8601 = False
 
     if format is None:
         format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)
 
-    # There is a special fast-path for iso8601 formatted
-    # datetime strings, so in those cases don't use the inferred
-    # format because this path makes process slower in this
-    # special case
-    if format is not None and format_is_iso(format):
-        require_iso8601 = True
-        format = None
     if format is not None:
+        iso_info = format_is_iso(format)
+        require_iso8601 = True
+    else:
+        iso_info = ISO8601Info()
+        require_iso8601 = False
+    if format is not None and not iso_info.format:
+        # There is a special fast-path for iso8601 formatted
+        # datetime strings, so in those cases don't use the inferred
+        # format because this path makes process slower in this
+        # special case
         return _to_datetime_with_format(arg, orig_arg, name, tz, format, exact, errors)
 
     utc = tz == "utc"
@@ -448,6 +453,8 @@ def _convert_listlike_datetimes(
         errors=errors,
         require_iso8601=require_iso8601,
         allow_object=True,
+        iso_info=iso_info,
+        exact=exact,
     )
 
     if tz_parsed is not None:
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index e3b9e30e1923c..77946f0570256 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -1733,6 +1733,96 @@ def test_to_datetime_iso8601(self, cache, arg, exp_str):
         exp = Timestamp(exp_str)
         assert result[0] == exp
 
+    @pytest.mark.parametrize(
+        "input, format",
+        [
+            ("2012", "%Y-%m"),
+            ("2012-01", "%Y-%m-%d"),
+            ("2012-01-01", "%Y-%m-%d %H"),
+            ("2012-01-01 10", "%Y-%m-%d %H:%M"),
+            ("2012-01-01 10:00", "%Y-%m-%d %H:%M:%S"),
+            (0, "%Y-%m-%d"),
+        ],
+    )
+    @pytest.mark.parametrize("exact", [True, False])
+    def test_to_datetime_iso8601_fails(self, input, format, exact):
+        with pytest.raises(
+            ValueError,
+            match=rf"time data \"{input}\" at position 0 doesn't match format {format}",
+        ):
+            to_datetime(input, format=format, exact=exact)
+
+    @pytest.mark.parametrize(
+        "input, format",
+        [
+            ("2012-01-01", "%Y-%m"),
+            ("2012-01-01 10", "%Y-%m-%d"),
+            ("2012-01-01 10:00", "%Y-%m-%d %H"),
+            ("2012-01-01 10:00:00", "%Y-%m-%d %H:%M"),
+            (0, "%Y-%m-%d"),
+        ],
+    )
+    def test_to_datetime_iso8601_exact_fails(self, input, format):
+        with pytest.raises(
+            ValueError,
+            match=rf"time data \"{input}\" at position 0 doesn't match format {format}",
+        ):
+            to_datetime(input, format=format)
+
+    @pytest.mark.parametrize(
+        "input, format",
+        [
+            ("2012-01-01", "%Y-%m"),
+            ("2012-01-01 10", "%Y-%m-%d"),
+            ("2012-01-01 10:00", "%Y-%m-%d %H"),
+            ("2012-01-01 10:00:00", "%Y-%m-%d %H:%M"),
+        ],
+    )
+    def test_to_datetime_iso8601_non_exact(self, input, format):
+        to_datetime(input, format=format, exact=False)
+
+    @pytest.mark.parametrize(
+        "input, format",
+        [
+            ("2020-01", "%Y/%m"),
+            ("2020-01-01", "%Y/%m/%d"),
+            ("2020-01-01 00", "%Y/%m/%dT%H"),
+            ("2020-01-01T00", "%Y/%m/%d %H"),
+            ("2020-01-01 00:00", "%Y/%m/%dT%H:%M"),
+            ("2020-01-01T00:00", "%Y/%m/%d %H:%M"),
+            ("2020-01-01 00:00:00", "%Y/%m/%dT%H:%M:%S"),
+            ("2020-01-01T00:00:00", "%Y/%m/%d %H:%M:%S"),
+        ],
+    )
+    def test_to_datetime_iso8601_separator(self, input, format):
+        with pytest.raises(
+            ValueError,
+            match=(
+                rf"time data \"{input}\" at position 0 doesn\'t match format {format}"
+            ),
+        ):
+            to_datetime(input, format=format)
+
+    @pytest.mark.parametrize(
+        "input, format",
+        [
+            ("2020-01", "%Y-%m"),
+            ("2020-01-01", "%Y-%m-%d"),
+            ("2020-01-01 00", "%Y-%m-%d %H"),
+            ("2020-01-01T00", "%Y-%m-%dT%H"),
+            ("2020-01-01 00:00", "%Y-%m-%d %H:%M"),
+            ("2020-01-01T00:00", "%Y-%m-%dT%H:%M"),
+            ("2020-01-01 00:00:00", "%Y-%m-%d %H:%M:%S"),
+            ("2020-01-01T00:00:00", "%Y-%m-%dT%H:%M:%S"),
+            ("2020-01-01T00:00:00.000", "%Y-%m-%dT%H:%M:%S"),
+            ("2020-01-01T00:00:00.000", "%Y-%m-%dT%H:%M:%S.%f"),
+            ("2020-01-01T00:00:00.000000", "%Y-%m-%dT%H:%M:%S.%f"),
+            ("2020-01-01T00:00:00.000000000", "%Y-%m-%dT%H:%M:%S.%f"),
+        ],
+    )
+    def test_to_datetime_iso8601_valid(self, input, format):
+        to_datetime(input, format=format)
+
     def test_to_datetime_default(self, cache):
         rs = to_datetime("2001", cache=cache)
         xp = datetime(2001, 1, 1)
@@ -2172,7 +2262,7 @@ def test_day_not_in_month_raise(self, cache):
 
     @pytest.mark.parametrize("arg", ["2015-02-29", "2015-02-32", "2015-04-31"])
     def test_day_not_in_month_raise_value(self, cache, arg):
-        msg = f'time data "{arg}" at position 0 doesn\'t match format specified'
+        msg = f'time data "{arg}" at position 0 doesn\'t match format %Y-%m-%d'
         with pytest.raises(ValueError, match=msg):
             to_datetime(arg, errors="raise", format="%Y-%m-%d", cache=cache)
 

From de10e598feff0f21d7d6d86bb71efaa0cf520bf7 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Wed, 19 Oct 2022 19:57:48 +0100
Subject: [PATCH 10/12] fixup test

---
 pandas/tests/io/parser/dtypes/test_categorical.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/io/parser/dtypes/test_categorical.py b/pandas/tests/io/parser/dtypes/test_categorical.py
index 3b8c520004f12..cc23c95433148 100644
--- a/pandas/tests/io/parser/dtypes/test_categorical.py
+++ b/pandas/tests/io/parser/dtypes/test_categorical.py
@@ -263,7 +263,15 @@ def test_categorical_coerces_timestamp(all_parsers):
     dtype = {"b": CategoricalDtype([Timestamp("2014")])}
 
     data = "b\n2014-01-01\n2014-01-01T00:00:00"
-    expected = DataFrame({"b": Categorical([Timestamp("2014")] * 2)})
+    if parser.engine == "pyarrow":
+        # pyarrow parses the data, and then
+        # converts to the dtypes
+        expected = DataFrame({"b": Categorical([Timestamp("2014")] * 2)})
+    else:
+        # pandas parses the data as the dtype as it reads it,
+        # but the second row doesn't respect the format inferred
+        # from the first row (%Y-%m-%d)
+        expected = DataFrame({"b": Categorical([Timestamp("2014"), pd.NaT])})
 
     result = parser.read_csv(StringIO(data), dtype=dtype)
     tm.assert_frame_equal(result, expected)

From 594f3d49bbb1762e589a5d55219d979174e03850 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Thu, 20 Oct 2022 15:14:28 +0100
Subject: [PATCH 11/12] use struct

---
 pandas/_libs/tslib.pxd                        |  20 +++
 pandas/_libs/tslib.pyx                        |  52 +++----
 pandas/_libs/tslibs/conversion.pxd            |  15 ++
 pandas/_libs/tslibs/conversion.pyx            |  29 ++--
 pandas/_libs/tslibs/np_datetime.pxd           |  28 ++--
 pandas/_libs/tslibs/np_datetime.pyx           |  53 ++-----
 pandas/_libs/tslibs/parsing.pxd               |  21 +++
 pandas/_libs/tslibs/parsing.pyx               | 146 ++++++++++++++----
 .../tslibs/src/datetime/np_datetime_strings.c |  36 ++---
 .../tslibs/src/datetime/np_datetime_strings.h |  28 ++--
 pandas/core/arrays/datetimes.py               |  10 +-
 pandas/core/tools/datetimes.py                |  12 +-
 12 files changed, 280 insertions(+), 170 deletions(-)
 create mode 100644 pandas/_libs/tslib.pxd

diff --git a/pandas/_libs/tslib.pxd b/pandas/_libs/tslib.pxd
new file mode 100644
index 0000000000000..69ad8a0e10f4c
--- /dev/null
+++ b/pandas/_libs/tslib.pxd
@@ -0,0 +1,20 @@
+from pandas._libs.tslibs.np_datetime cimport (
+    NPY_DATETIMEUNIT,
+    npy_datetimestruct,
+)
+
+
+cdef extern from "src/datetime/np_datetime_strings.h":
+    ctypedef struct ISOInfo:
+        const char *format
+        int format_len
+        const char *date_sep
+        const char *time_sep
+        const char *micro_or_tz
+        int year
+        int month
+        int day
+        int hour
+        int minute
+        int second
+        int exact
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index a4036023843ef..5c150c5c7a958 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -85,16 +85,11 @@ def _test_parse_iso8601(ts: str):
         _TSObject obj
         int out_local = 0, out_tzoffset = 0
         NPY_DATETIMEUNIT out_bestunit
+        ISOInfo iso_info
 
-    obj = _TSObject()
-
-    if ts == 'now':
-        return Timestamp.utcnow()
-    elif ts == 'today':
-        return Timestamp.now().normalize()
-
-    string_to_dts(ts, &obj.dts, &out_bestunit, &out_local, &out_tzoffset, True,
+    iso_info = ISOInfo(
                         format='',
+                        format_len=0,
                         date_sep='',
                         time_sep='',
                         micro_or_tz='',
@@ -105,7 +100,16 @@ def _test_parse_iso8601(ts: str):
                         minute=False,
                         second=False,
                         exact=False,
-    )
+                        )
+
+    obj = _TSObject()
+
+    if ts == 'now':
+        return Timestamp.utcnow()
+    elif ts == 'today':
+        return Timestamp.now().normalize()
+
+    string_to_dts(ts, &obj.dts, &out_bestunit, &out_local, &out_tzoffset, True, &iso_info)
     obj.value = npy_datetimestruct_to_datetime(NPY_FR_ns, &obj.dts)
     check_dts_bounds(&obj.dts)
     if out_local == 1:
@@ -455,23 +459,13 @@ def first_non_null(values: ndarray) -> int:
 @cython.boundscheck(False)
 cpdef array_to_datetime(
     ndarray[object] values,
+    ISOInfo iso_info,
     str errors='raise',
     bint dayfirst=False,
     bint yearfirst=False,
     bint utc=False,
     bint require_iso8601=False,
     bint allow_mixed=False,
-    const char *format='',
-    const char *date_sep='',
-    const char *time_sep='',
-    const char *micro_or_tz='',
-    bint year=False,
-    bint month=False,
-    bint day=False,
-    bint hour=False,
-    bint minute=False,
-    bint second=False,
-    bint exact=False,
 ):
     """
     Converts a 1D array of date-like values to a numpy array of either:
@@ -533,6 +527,7 @@ cpdef array_to_datetime(
         tzinfo tz_out = None
         bint found_tz = False, found_naive = False
 
+
     # specify error conditions
     assert is_raise or is_ignore or is_coerce
 
@@ -597,7 +592,7 @@ cpdef array_to_datetime(
                             continue
                         elif is_raise:
                             raise ValueError(
-                                f"time data \"{val}\" at position {i} doesn't match format {format.decode('utf-8')}"
+                                f"time data \"{val}\" at position {i} doesn't match format {iso_info.format.decode('utf-8')}"
                             )
                         return values, tz_out
 
@@ -631,18 +626,7 @@ cpdef array_to_datetime(
 
                     string_to_dts_failed = string_to_dts(
                         val, &dts, &out_bestunit, &out_local,
-                        &out_tzoffset, False,
-                        format,
-                        date_sep=date_sep,
-                        time_sep=time_sep,
-                        micro_or_tz=micro_or_tz,
-                        year=year,
-                        month=month,
-                        day=day,
-                        hour=hour,
-                        minute=minute,
-                        second=second,
-                        exact=exact,
+                        &out_tzoffset, False, &iso_info,
                     )
                     if string_to_dts_failed:
                         # An error at this point is a _parsing_ error
@@ -657,7 +641,7 @@ cpdef array_to_datetime(
                                 continue
                             elif is_raise:
                                 raise ValueError(
-                                    f"time data \"{val}\" at position {i} doesn't match format {format.decode('utf-8')}"
+                                    f"time data \"{val}\" at position {i} doesn't match format {iso_info.format.decode('utf-8')}"
                                 )
                             return values, tz_out
 
diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd
index c285b248f7a5b..e4db12a412523 100644
--- a/pandas/_libs/tslibs/conversion.pxd
+++ b/pandas/_libs/tslibs/conversion.pxd
@@ -40,3 +40,18 @@ cdef int64_t cast_from_unit(object ts, str unit) except? -1
 cpdef (int64_t, int) precision_from_unit(str unit)
 
 cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso)
+
+cdef extern from "src/datetime/np_datetime_strings.h":
+    ctypedef struct ISOInfo:
+        const char *format
+        int format_len
+        const char *date_sep
+        const char *time_sep
+        const char *micro_or_tz
+        int year
+        int month
+        int day
+        int hour
+        int minute
+        int second
+        int exact
diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
index a4d10703f4865..acf1dc005cb9b 100644
--- a/pandas/_libs/tslibs/conversion.pyx
+++ b/pandas/_libs/tslibs/conversion.pyx
@@ -469,6 +469,22 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit,
         datetime dt
         int64_t ival
         NPY_DATETIMEUNIT out_bestunit
+        ISOInfo iso_info
+
+    iso_info = ISOInfo(
+        format='',
+        format_len=0,
+                        date_sep='',
+                        time_sep='',
+                        micro_or_tz='',
+                        year=False,
+                        month=False,
+                        day=False,
+                        hour=False,
+                        minute=False,
+                        second=False,
+                        exact=False,
+    )
 
     if len(ts) == 0 or ts in nat_strings:
         ts = NaT
@@ -488,18 +504,7 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit,
     else:
         string_to_dts_failed = string_to_dts(
             ts, &dts, &out_bestunit, &out_local,
-            &out_tzoffset, False,
-                        '',
-                        date_sep='',
-                        time_sep='',
-                        micro_or_tz='',
-                        year=False,
-                        month=False,
-                        day=False,
-                        hour=False,
-                        minute=False,
-                        second=False,
-                        exact=False,
+            &out_tzoffset, False, &iso_info,
         )
         if not string_to_dts_failed:
             try:
diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd
index 82363ef79e29b..bc67720897d54 100644
--- a/pandas/_libs/tslibs/np_datetime.pxd
+++ b/pandas/_libs/tslibs/np_datetime.pxd
@@ -95,17 +95,7 @@ cdef int string_to_dts(
     int* out_local,
     int* out_tzoffset,
     bint want_exc,
-    const char *format,
-    const char *date_sep,
-    const char *time_sep,
-    const char *micro_or_tz,
-    bint year,
-    bint month,
-    bint day,
-    bint hour,
-    bint minute,
-    bint second,
-    bint exact,
+    ISOInfo* iso_info,
 ) except? -1
 
 cdef NPY_DATETIMEUNIT get_unit_from_dtype(cnp.dtype dtype)
@@ -129,3 +119,19 @@ cdef int64_t convert_reso(
     NPY_DATETIMEUNIT to_reso,
     bint round_ok,
 ) except? -1
+
+cdef extern from "src/datetime/np_datetime_strings.h":
+
+    ctypedef struct ISOInfo:
+        const char *format
+        int format_len
+        const char *date_sep
+        const char *time_sep
+        const char *micro_or_tz
+        int year
+        int month
+        int day
+        int hour
+        int minute
+        int second
+        int exact
diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx
index 7749c77d13f3b..f68be361e5d35 100644
--- a/pandas/_libs/tslibs/np_datetime.pyx
+++ b/pandas/_libs/tslibs/np_datetime.pyx
@@ -48,23 +48,6 @@ cdef extern from "src/datetime/np_datetime.h":
 
     PyArray_DatetimeMetaData get_datetime_metadata_from_dtype(cnp.PyArray_Descr *dtype);
 
-cdef extern from "src/datetime/np_datetime_strings.h":
-    int parse_iso_8601_datetime(const char *str, int len, int want_exc,
-                                npy_datetimestruct *out,
-                                NPY_DATETIMEUNIT *out_bestunit,
-                                int *out_local, int *out_tzoffset,
-        int format,
-        const char *date_sep,
-        const char *time_sep,
-        const char *micro_or_tz,
-        int year,
-        int month,
-        int day,
-        int hour,
-        int minute,
-        int second,
-        int exact
-        )
 
 
 # ----------------------------------------------------------------------
@@ -285,38 +268,16 @@ cdef inline int string_to_dts(
     int* out_local,
     int* out_tzoffset,
     bint want_exc,
-    const char *format,
-    const char *date_sep,
-    const char *time_sep,
-    const char *micro_or_tz,
-    bint year,
-    bint month,
-    bint day,
-    bint hour,
-    bint minute,
-    bint second,
-    bint exact,
+    ISOInfo* iso_info,
 ) except? -1:
     cdef:
         Py_ssize_t length
-        Py_ssize_t format_length
         const char* buf
 
+
     buf = get_c_string_buf_and_size(val, &length)
-    format_length = len(format)
     result = parse_iso_8601_datetime(buf, length, want_exc,
-                                   dts, out_bestunit, out_local, out_tzoffset,
-        format_length,
-        date_sep,
-        time_sep,
-        micro_or_tz,
-        year,
-        month,
-        day,
-        hour,
-        minute,
-        second,
-        exact,
+                                   dts, out_bestunit, out_local, out_tzoffset, iso_info
     )
     return result
 
@@ -640,3 +601,11 @@ cdef int64_t _convert_reso_with_dtstruct(
     pandas_datetime_to_datetimestruct(value, from_unit, &dts)
     check_dts_bounds(&dts, to_unit)
     return npy_datetimestruct_to_datetime(to_unit, &dts)
+
+cdef extern from "src/datetime/np_datetime_strings.h":
+    int parse_iso_8601_datetime(const char *str, int len, int want_exc,
+                                npy_datetimestruct *out,
+                                NPY_DATETIMEUNIT *out_bestunit,
+                                int *out_local, int *out_tzoffset,
+                                ISOInfo *iso_info
+        )
diff --git a/pandas/_libs/tslibs/parsing.pxd b/pandas/_libs/tslibs/parsing.pxd
index 25667f00e42b5..a84cfa2523379 100644
--- a/pandas/_libs/tslibs/parsing.pxd
+++ b/pandas/_libs/tslibs/parsing.pxd
@@ -1,3 +1,24 @@
 
 cpdef str get_rule_month(str source)
 cpdef quarter_to_myear(int year, int quarter, str freq)
+cpdef ISOInfo null_iso_info()
+from pandas._libs.tslibs.np_datetime cimport (
+    NPY_DATETIMEUNIT,
+    npy_datetimestruct,
+)
+
+
+cdef extern from "src/datetime/np_datetime_strings.h":
+    ctypedef struct ISOInfo:
+        const char *format
+        int format_len
+        const char *date_sep
+        const char *time_sep
+        const char *micro_or_tz
+        int year
+        int month
+        int day
+        int hour
+        int minute
+        int second
+        int exact
diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
index 30f934ef9129c..c76646b8b1f46 100644
--- a/pandas/_libs/tslibs/parsing.pyx
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -397,6 +397,22 @@ cdef parse_datetime_string_with_reso(
         NPY_DATETIMEUNIT out_bestunit
         int out_local
         int out_tzoffset
+        ISOInfo iso_info
+
+    iso_info = ISOInfo(
+                        format='',
+                        format_len=0,
+                        date_sep='',
+                        time_sep='',
+                        micro_or_tz='',
+                        year=False,
+                        month=False,
+                        day=False,
+                        hour=False,
+                        minute=False,
+                        second=False,
+                        exact=False,
+    )
 
     if not _does_string_look_like_datetime(date_string):
         raise ValueError(f'Given date string {date_string} not likely a datetime')
@@ -409,18 +425,7 @@ cdef parse_datetime_string_with_reso(
     # TODO: does this render some/all of parse_delimited_date redundant?
     string_to_dts_failed = string_to_dts(
         date_string, &dts, &out_bestunit, &out_local,
-        &out_tzoffset, False,
-        '',
-        '',
-        '',
-        '',
-        False,
-        False,
-        False,
-        False,
-        False,
-        False,
-        False,
+        &out_tzoffset, False, &iso_info,
     )
     if not string_to_dts_failed:
         if dts.ps != 0 or out_local:
@@ -933,26 +938,115 @@ class _timelex:
 
 _DATEUTIL_LEXER_SPLIT = _timelex.split
 
+cpdef ISOInfo null_iso_info():
+    return ISOInfo(
+                format=''.encode('utf-8'),
+                format_len=0,
+                date_sep=''.encode('utf-8'),
+                time_sep=''.encode('utf-8'),
+                micro_or_tz=''.encode('utf-8'),
+                year=False,
+                month=False,
+                day=False,
+                hour=False,
+                minute=False,
+                second=False,
+                exact=False,
+            )
 
-def format_is_iso(f: str) -> bint:
+def format_is_iso(f: str, bint exact) -> ISOInfo:
     """
     Does format match the iso8601 set that can be handled by the C parser?
     Generally of form YYYY-MM-DDTHH:MM:SS - date separator can be different
     but must be consistent.  Leading 0s in dates and times are optional.
     """
-    iso_template = '%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M:%S{micro_or_tz}'.format
-    excluded_formats = ['%Y%m%d', '%Y%m', '%Y']
-
-    for date_sep in [' ', '/', '\\', '-', '.', '']:
-        for time_sep in [' ', 'T']:
-            for micro_or_tz in ['', '%z', '%Z', '.%f', '.%f%z', '.%f%Z']:
-                if (iso_template(date_sep=date_sep,
-                                 time_sep=time_sep,
-                                 micro_or_tz=micro_or_tz,
-                                 ).startswith(f) and f not in excluded_formats):
-                    return True
-    return False
-
+    excluded_formats = ["%Y%m%d", "%Y%m", "%Y"]
+
+    cdef ISOInfo null_info
+
+    if f in excluded_formats:
+        return null_iso_info()
+
+    for date_sep in [" ", "/", "\\", "-", ".", ""]:
+        for time_sep in [" ", "T"]:
+            for micro_or_tz in ["", "%z", "%Z", ".%f", ".%f%z", ".%f%Z"]:
+                if f"%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M:%S{micro_or_tz}" == f:
+                    return ISOInfo(
+                        format=f.encode("utf-8"),
+                        format_len=len(f),
+                        date_sep=date_sep.encode("utf-8"),
+                        time_sep=time_sep.encode("utf-8"),
+                        micro_or_tz=micro_or_tz.encode("utf-8"),
+                        year=True,
+                        month=True,
+                        day=True,
+                        hour=True,
+                        minute=True,
+                        second=True,
+                        exact=exact,
+                    )
+                elif f"%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M" == f:
+                    return ISOInfo(
+                        format=f.encode("utf-8"),
+                        format_len=len(f),
+                        date_sep=date_sep.encode("utf-8"),
+                        time_sep=time_sep.encode("utf-8"),
+                        micro_or_tz=micro_or_tz.encode("utf-8"),
+                        year=True,
+                        month=True,
+                        day=True,
+                        hour=True,
+                        minute=True,
+                        second=False,
+                        exact=exact,
+                    )
+                elif f"%Y{date_sep}%m{date_sep}%d{time_sep}%H" == f:
+                    return ISOInfo(
+                        format=f.encode("utf-8"),
+                        format_len=len(f),
+                        date_sep=date_sep.encode("utf-8"),
+                        time_sep=time_sep.encode("utf-8"),
+                        micro_or_tz=micro_or_tz.encode("utf-8"),
+                        year=True,
+                        month=True,
+                        day=True,
+                        hour=True,
+                        minute=False,
+                        second=False,
+                        exact=exact,
+                    )
+                elif f"%Y{date_sep}%m{date_sep}%d" == f:
+                    return ISOInfo(
+                        format=f.encode("utf-8"),
+                        format_len=len(f),
+                        date_sep=date_sep.encode("utf-8"),
+                        time_sep=time_sep.encode("utf-8"),
+                        micro_or_tz=micro_or_tz.encode("utf-8"),
+                        year=True,
+                        month=True,
+                        day=True,
+                        hour=False,
+                        minute=False,
+                        second=False,
+                        exact=exact,
+                    )
+                elif f"%Y{date_sep}%m" == f:
+                    return ISOInfo(
+                        format=f.encode("utf-8"),
+                        format_len=len(f),
+                        date_sep=date_sep.encode("utf-8"),
+                        time_sep=time_sep.encode("utf-8"),
+                        micro_or_tz=micro_or_tz.encode("utf-8"),
+                        year=True,
+                        month=True,
+                        day=False,
+                        hour=False,
+                        minute=False,
+                        second=False,
+                        exact=exact,
+                    )
+
+    return null_iso_info()
 
 def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None:
     """
diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c
index f5977b8066ef0..60063e3432d4d 100644
--- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c
+++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c
@@ -70,17 +70,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
                             npy_datetimestruct *out,
                             NPY_DATETIMEUNIT *out_bestunit,
                             int *out_local, int *out_tzoffset,
-        int format_length,
-        const char *date_sep,
-        const char *time_sep,
-        const char *micro_or_tz,
-        int year,
-        int month,
-        int day,
-        int hour,
-        int minute,
-        int second,
-        int exact) {
+        ISOInfo *iso_info) {
     int year_leap = 0;
     int i, numdigits;
     const char *substr;
@@ -147,7 +137,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
 
     /* If the format contains month but we're
     already at the end of the string, error */
-    if ((format_length > 0) && month && (sublen == 0)) {
+    if ((iso_info->format_len > 0) && iso_info->month && (sublen == 0)) {
         goto parse_error;
     }
     /* Next character must be a separator, start of month, or end of string */
@@ -170,7 +160,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
         }
         has_ymd_sep = 1;
         ymd_sep = valid_ymd_sep[i];
-        if ((format_length > 0) && (ymd_sep != *date_sep)) {
+        if ((iso_info->format_len > 0) && (ymd_sep != *iso_info->date_sep)) {
             goto parse_error;
         }
         ++substr;
@@ -185,7 +175,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
 
     /* If the format doesn't contain month, and there's still some
     string to be parsed, and we're not checking for an exact match, error*/
-    if ((format_length > 0) && !month && exact) {
+    if ((iso_info->format_len > 0) && !iso_info->month && iso_info->exact) {
         goto parse_error;
     }
     /* First digit required */
@@ -208,7 +198,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
         goto error;
     }
 
-    if ((format_length > 0) && day && (sublen == 0)) {
+    if ((iso_info->format_len > 0) && iso_info->day && (sublen == 0)) {
         goto parse_error;
     }
     /* Next character must be the separator, start of day, or end of string */
@@ -234,7 +224,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     }
 
     /* PARSE THE DAY */
-    if ((format_length > 0) && !day && exact) {
+    if ((iso_info->format_len > 0) && !iso_info->day && iso_info->exact) {
         goto parse_error;
     }
     /* First digit required */
@@ -261,7 +251,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
         goto error;
     }
 
-    if ((format_length > 0) && hour && (sublen == 0)) {
+    if ((iso_info->format_len > 0) && iso_info->hour && (sublen == 0)) {
         goto parse_error;
     }
     /* Next character must be a 'T', ' ', or end of string */
@@ -273,7 +263,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
         goto finish;
     }
 
-    if ((format_length > 0) && (*substr != *time_sep)) {
+    if ((iso_info->format_len > 0) && (*substr != *iso_info->time_sep)) {
         goto parse_error;
     } else if ((*substr != 'T' && *substr != ' ') || sublen == 1) {
         goto parse_error;
@@ -282,7 +272,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     --sublen;
 
     /* PARSE THE HOURS */
-    if ((format_length > 0) && !hour && exact) {
+    if ((iso_info->format_len > 0) && !iso_info->hour && iso_info->exact) {
         goto parse_error;
     }
     /* First digit required */
@@ -308,7 +298,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
         }
     }
 
-    if ((format_length > 0) && minute && (sublen == 0)) {
+    if ((iso_info->format_len > 0) && iso_info->minute && (sublen == 0)) {
         goto parse_error;
     }
     /* Next character must be a ':' or the end of the string */
@@ -336,7 +326,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     }
 
     /* PARSE THE MINUTES */
-    if ((format_length > 0) && !minute && exact) {
+    if ((iso_info->format_len > 0) && !iso_info->minute && iso_info->exact) {
         goto parse_error;
     }
     /* First digit required */
@@ -360,7 +350,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
         goto parse_error;
     }
 
-    if ((format_length > 0) && second && (sublen == 0)) {
+    if ((iso_info->format_len > 0) && iso_info->second && (sublen == 0)) {
         goto parse_error;
     }
     if (sublen == 0) {
@@ -383,7 +373,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     }
 
     /* PARSE THE SECONDS */
-    if ((format_length > 0) && !second && exact) {
+    if ((iso_info->format_len > 0) && !iso_info->second && iso_info->exact) {
         goto parse_error;
     }
     /* First digit required */
diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h
index 7ebf3e981a787..0e9ad256e0707 100644
--- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h
+++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h
@@ -53,23 +53,29 @@ This file implements string parsing and creation for NumPy datetime.
  *
  * Returns 0 on success, -1 on failure.
  */
+
+typedef struct {
+    const char *format;
+    int format_len;
+    const char *date_sep;
+    const char *time_sep;
+    const char *micro_or_tz;
+    int year;
+    int month;
+    int day;
+    int hour;
+    int minute;
+    int second;
+    int exact;
+} ISOInfo;
+
 int
 parse_iso_8601_datetime(const char *str, int len, int want_exc,
                         npy_datetimestruct *out,
                         NPY_DATETIMEUNIT *out_bestunit,
                         int *out_local,
                         int *out_tzoffset,
-        int format_length,
-        const char *date_sep,
-        const char *time_sep,
-        const char *micro_or_tz,
-        int year,
-        int month,
-        int day,
-        int hour,
-        int minute,
-        int second,
-        int exact);
+                        ISOInfo *iso_info);
 
 /*
  * Provides a string length to use for converting datetime
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 61e8dcadf7dc9..ac76c80a43da1 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -42,6 +42,7 @@
     tz_convert_from_utc,
     tzconversion,
 )
+from pandas._libs.tslibs.parsing import null_iso_info
 from pandas._typing import (
     DateTimeErrorChoices,
     IntervalClosedType,
@@ -78,7 +79,6 @@
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
 from pandas.core.dtypes.missing import isna
 
-from f import ISO8601Info
 from pandas.core.arrays import datetimelike as dtl
 from pandas.core.arrays._ranges import generate_regular_range
 import pandas.core.common as com
@@ -2181,7 +2181,7 @@ def objects_to_datetime64ns(
     require_iso8601: bool = False,
     allow_object: bool = False,
     allow_mixed: bool = False,
-    iso_info=ISO8601Info(),
+    iso_info=None,
     exact: bool = False,
 ):
     """
@@ -2219,6 +2219,9 @@ def objects_to_datetime64ns(
     """
     assert errors in ["raise", "ignore", "coerce"]
 
+    if iso_info is None:
+        iso_info = null_iso_info()
+
     # if str-dtype, convert
     data = np.array(data, copy=False, dtype=np.object_)
 
@@ -2227,14 +2230,13 @@ def objects_to_datetime64ns(
     try:
         result, tz_parsed = tslib.array_to_datetime(
             data.ravel("K"),
+            iso_info=iso_info,
             errors=errors,
             utc=utc,
             dayfirst=dayfirst,
             yearfirst=yearfirst,
             require_iso8601=require_iso8601,
             allow_mixed=allow_mixed,
-            **iso_info._asdict(),
-            exact=exact,
         )
         result = result.reshape(data.shape, order=order)
     except OverflowError as err:
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 7607f637cabc6..3d177797eab33 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -34,7 +34,9 @@
 )
 from pandas._libs.tslibs.parsing import (  # format_is_iso,
     DateParseError,
+    format_is_iso,
     guess_datetime_format,
+    null_iso_info,
 )
 from pandas._libs.tslibs.strptime import array_strptime
 from pandas._typing import (
@@ -64,10 +66,6 @@
 )
 from pandas.core.dtypes.missing import notna
 
-from f import (
-    ISO8601Info,
-    format_is_iso,
-)
 from pandas.arrays import (
     DatetimeArray,
     IntegerArray,
@@ -432,12 +430,12 @@ def _convert_listlike_datetimes(
         format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)
 
     if format is not None:
-        iso_info = format_is_iso(format)
+        iso_info = format_is_iso(format, exact=exact)
         require_iso8601 = True
     else:
-        iso_info = ISO8601Info()
+        iso_info = null_iso_info()
         require_iso8601 = False
-    if format is not None and not iso_info.format:
+    if format is not None and not iso_info["format"]:
         # There is a special fast-path for iso8601 formatted
         # datetime strings, so in those cases don't use the inferred
         # format because this path makes process slower in this

From 7ea1acbb9abe8c29830bf6ea6f063fdbc9579b8e Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Thu, 20 Oct 2022 15:16:05 +0100
Subject: [PATCH 12/12] remove f.py

---
 f.py | 92 ------------------------------------------------------------
 1 file changed, 92 deletions(-)
 delete mode 100644 f.py

diff --git a/f.py b/f.py
deleted file mode 100644
index e2e151a6271d9..0000000000000
--- a/f.py
+++ /dev/null
@@ -1,92 +0,0 @@
-from typing import NamedTuple
-
-
-class ISO8601Info(NamedTuple):
-    format: str = b""
-    date_sep: str = b""
-    time_sep: str = b""
-    micro_or_tz: str = b""
-    year: bool = False
-    month: bool = False
-    day: bool = False
-    hour: bool = False
-    minute: bool = False
-    second: bool = False
-
-
-def format_is_iso(f: str):
-    """
-    Does format match the iso8601 set that can be handled by the C parser?
-    Generally of form YYYY-MM-DDTHH:MM:SS - date separator can be different
-    but must be consistent.  Leading 0s in dates and times are optional.
-
-    no, needs doing in c. eff...
-    """
-    excluded_formats = ["%Y%m%d", "%Y%m", "%Y"]
-
-    if f in excluded_formats:
-        return ISO8601Info()
-    for date_sep in [" ", "/", "\\", "-", ".", ""]:
-        for time_sep in [" ", "T"]:
-            for micro_or_tz in ["", "%z", "%Z", ".%f", ".%f%z", ".%f%Z"]:
-                if f"%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M:%S{micro_or_tz}" == f:
-                    return ISO8601Info(
-                        format=f.encode("utf-8"),
-                        date_sep=date_sep.encode("utf-8"),
-                        time_sep=time_sep.encode("utf-8"),
-                        micro_or_tz=micro_or_tz.encode("utf-8"),
-                        year=True,
-                        month=True,
-                        day=True,
-                        hour=True,
-                        minute=True,
-                        second=True,
-                    )
-                elif f"%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M" == f:
-                    return ISO8601Info(
-                        format=f.encode("utf-8"),
-                        date_sep=date_sep.encode("utf-8"),
-                        time_sep=time_sep.encode("utf-8"),
-                        micro_or_tz=micro_or_tz.encode("utf-8"),
-                        year=True,
-                        month=True,
-                        day=True,
-                        hour=True,
-                        minute=True,
-                    )
-                elif f"%Y{date_sep}%m{date_sep}%d{time_sep}%H" == f:
-                    return ISO8601Info(
-                        format=f.encode("utf-8"),
-                        date_sep=date_sep.encode("utf-8"),
-                        time_sep=time_sep.encode("utf-8"),
-                        micro_or_tz=micro_or_tz.encode("utf-8"),
-                        year=True,
-                        month=True,
-                        day=True,
-                        hour=True,
-                    )
-                elif f"%Y{date_sep}%m{date_sep}%d" == f:
-                    return ISO8601Info(
-                        format=f.encode("utf-8"),
-                        date_sep=date_sep.encode("utf-8"),
-                        time_sep=time_sep.encode("utf-8"),
-                        micro_or_tz=micro_or_tz.encode("utf-8"),
-                        year=True,
-                        month=True,
-                        day=True,
-                    )
-                elif f"%Y{date_sep}%m" == f:
-                    return ISO8601Info(
-                        format=f.encode("utf-8"),
-                        date_sep=date_sep.encode("utf-8"),
-                        time_sep=time_sep.encode("utf-8"),
-                        micro_or_tz=micro_or_tz.encode("utf-8"),
-                        year=True,
-                        month=True,
-                    )
-    return ISO8601Info()
-
-
-if __name__ == "__main__":
-    print(format_is_iso("%Y-%m-%d %H:%M:%S%z"))
-# print(format_is_iso('%Y%m%d %H'))