From 0d320e9ebe921f724f1e076c7f864badc91c3b67 Mon Sep 17 00:00:00 2001
From: Steven Rotondo <steven.rotondo75@gmail.com>
Date: Fri, 15 Jul 2022 11:16:28 -0700
Subject: [PATCH 1/7] BUG: Fixed behavior with fallback between raise and
 coerce #46071

---
 pandas/core/tools/datetimes.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index d4d61df915acb..8c4d0ce24c5e6 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -215,6 +215,7 @@ def _maybe_cache(
     cache_array : Series
         Cache of converted, unique dates. Can be empty
     """
+
     from pandas import Series
 
     cache_array = Series(dtype=object)
@@ -391,7 +392,6 @@ def _convert_listlike_datetimes(
         raise TypeError(
             "arg must be a string, datetime, list, tuple, 1-d array, or Series"
         )
-
     # warn if passing timedelta64, raise for PeriodDtype
     # NB: this must come after unit transformation
     orig_arg = arg
@@ -411,7 +411,6 @@ def _convert_listlike_datetimes(
 
     if infer_datetime_format and format is None:
         format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)
-
     if format is not None:
         # There is a special fast-path for iso8601 formatted
         # datetime strings, so in those cases don't use the inferred
@@ -428,7 +427,6 @@ def _convert_listlike_datetimes(
         )
         if res is not None:
             return res
-
     assert format is None or infer_datetime_format
     utc = tz == "utc"
     result, tz_parsed = objects_to_datetime64ns(
@@ -440,7 +438,6 @@ def _convert_listlike_datetimes(
         require_iso8601=require_iso8601,
         allow_object=True,
     )
-
     if tz_parsed is not None:
         # We can take a shortcut since the datetime64 numpy array
         # is in UTC
@@ -495,6 +492,8 @@ def _array_strptime_with_fallback(
     else:
         if "%Z" in fmt or "%z" in fmt:
             return _return_parsed_timezone_results(result, timezones, tz, name)
+        if infer_datetime_format and np.isnan(result).any():
+            return None
 
     return _box_as_indexlike(result, utc=utc, name=name)
 
@@ -513,7 +512,6 @@ def _to_datetime_with_format(
     Try parsing with the given format, returning None on failure.
     """
     result = None
-
     # shortcut formatting here
     if fmt == "%Y%m%d":
         # pass orig_arg as float-dtype may have been converted to
@@ -1029,6 +1027,7 @@ def to_datetime(
                    '2020-01-01 18:00:00+00:00', '2020-01-01 19:00:00+00:00'],
                   dtype='datetime64[ns, UTC]', freq=None)
     """
+
     if arg is None:
         return None
 

From df08fd13038f98cd7f5636c9aade02891dc4c959 Mon Sep 17 00:00:00 2001
From: Steven Rotondo <steven.rotondo75@gmail.com>
Date: Fri, 15 Jul 2022 11:22:29 -0700
Subject: [PATCH 2/7] BUG: Fixed behavior with fallback between raise and
 coerce #46071

---
 pandas/core/tools/datetimes.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 8c4d0ce24c5e6..7909a7bfc500c 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -492,6 +492,7 @@ def _array_strptime_with_fallback(
     else:
         if "%Z" in fmt or "%z" in fmt:
             return _return_parsed_timezone_results(result, timezones, tz, name)
+        # GH#46071
         if infer_datetime_format and np.isnan(result).any():
             return None
 

From 2ab558a00b04f310e9b22aa0a0adba6d1e9597a3 Mon Sep 17 00:00:00 2001
From: Steven Rotondo <steven.rotondo75@gmail.com>
Date: Fri, 22 Jul 2022 16:44:59 -0700
Subject: [PATCH 3/7] BUG: Added test and release note and removed line shifts
 #46071

---
 doc/source/whatsnew/v1.5.0.rst        |  2 +-
 pandas/core/tools/datetimes.py        |  9 ++++++---
 pandas/tests/arrays/test_datetimes.py | 18 ++++++++++++++++++
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
index 16fcb34fdb7d1..acee25a76f9f6 100644
--- a/doc/source/whatsnew/v1.5.0.rst
+++ b/doc/source/whatsnew/v1.5.0.rst
@@ -827,7 +827,7 @@ Datetimelike
 - Bug in :meth:`DatetimeIndex.resolution` incorrectly returning "day" instead of "nanosecond" for nanosecond-resolution indexes (:issue:`46903`)
 - Bug in :class:`Timestamp` with an integer or float value and ``unit="Y"`` or ``unit="M"`` giving slightly-wrong results (:issue:`47266`)
 - Bug in :class:`.DatetimeArray` construction when passed another :class:`.DatetimeArray` and ``freq=None`` incorrectly inferring the freq from the given array (:issue:`47296`)
--
+- Bug in :func:`to_datetime` where ``infer_datetime_format`` fallback would not run if ``errors=coerce`` (:issue:`46071`)
 
 Timedelta
 ^^^^^^^^^
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 1156f4e4d3254..782c51a979945 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -215,7 +215,6 @@ def _maybe_cache(
     cache_array : Series
         Cache of converted, unique dates. Can be empty
     """
-
     from pandas import Series
 
     cache_array = Series(dtype=object)
@@ -392,6 +391,7 @@ def _convert_listlike_datetimes(
         raise TypeError(
             "arg must be a string, datetime, list, tuple, 1-d array, or Series"
         )
+
     # warn if passing timedelta64, raise for PeriodDtype
     # NB: this must come after unit transformation
     orig_arg = arg
@@ -411,6 +411,7 @@ def _convert_listlike_datetimes(
 
     if infer_datetime_format and format is None:
         format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)
+
     if format is not None:
         # There is a special fast-path for iso8601 formatted
         # datetime strings, so in those cases don't use the inferred
@@ -427,6 +428,7 @@ def _convert_listlike_datetimes(
         )
         if res is not None:
             return res
+
     assert format is None or infer_datetime_format
     utc = tz == "utc"
     result, tz_parsed = objects_to_datetime64ns(
@@ -438,6 +440,7 @@ def _convert_listlike_datetimes(
         require_iso8601=require_iso8601,
         allow_object=True,
     )
+
     if tz_parsed is not None:
         # We can take a shortcut since the datetime64 numpy array
         # is in UTC
@@ -492,7 +495,7 @@ def _array_strptime_with_fallback(
     else:
         if "%Z" in fmt or "%z" in fmt:
             return _return_parsed_timezone_results(result, timezones, tz, name)
-        # GH#46071
+
         if infer_datetime_format and np.isnan(result).any():
             return None
 
@@ -513,6 +516,7 @@ def _to_datetime_with_format(
     Try parsing with the given format, returning None on failure.
     """
     result = None
+
     # shortcut formatting here
     if fmt == "%Y%m%d":
         # pass orig_arg as float-dtype may have been converted to
@@ -1028,7 +1032,6 @@ def to_datetime(
                    '2020-01-01 18:00:00+00:00', '2020-01-01 19:00:00+00:00'],
                   dtype='datetime64[ns, UTC]', freq=None)
     """
-
     if arg is None:
         return None
 
diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py
index af1a292a2975a..cff19e04842f8 100644
--- a/pandas/tests/arrays/test_datetimes.py
+++ b/pandas/tests/arrays/test_datetimes.py
@@ -639,3 +639,21 @@ def test_tz_localize_t2d(self):
 
         roundtrip = expected.tz_localize("US/Pacific")
         tm.assert_datetime_array_equal(roundtrip, dta)
+
+    @pytest.mark.parametrize(
+        "error",
+        ["coerce", "raise"],
+    )
+    def test_coerce_fallback(self, error):
+        # GH#46071
+        s = pd.Series(["6/30/2025", "1 27 2024"])
+        expected = pd.Series(
+            [pd.Timestamp("2025-06-30 00:00:00"), pd.Timestamp("2024-01-27 00:00:00")]
+        )
+
+        result = pd.to_datetime(s, errors=error, infer_datetime_format=True)
+
+        if error == "coerce":
+            assert result[1] is not pd.NaT
+
+        tm.assert_series_equal(expected, result)

From 78e6bc770c11542019f08978395db2764eedcabb Mon Sep 17 00:00:00 2001
From: Steven Rotondo <steven.rotondo75@gmail.com>
Date: Thu, 11 Aug 2022 09:22:06 -0700
Subject: [PATCH 4/7] BUG: Edited documentation and improved tests #46071

---
 pandas/core/tools/datetimes.py        |  5 ++++-
 pandas/tests/arrays/test_datetimes.py | 21 +++++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 782c51a979945..2a23fa98a2506 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -796,7 +796,10 @@ def to_datetime(
         If :const:`True` and no `format` is given, attempt to infer the format
         of the datetime strings based on the first non-NaN element,
         and if it can be inferred, switch to a faster method of parsing them.
-        In some cases this can increase the parsing speed by ~5-10x.
+        In some cases this can increase the parsing speed by ~5-10x. If subsequent
+        datetime strings do not follow the inferred format, parsing will fall
+        back to the slower method of determining the format for each
+        string individually.
     origin : scalar, default 'unix'
         Define the reference date. The numeric values would be parsed as number
         of units (defined by `unit`) since this reference date.
diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py
index cff19e04842f8..556fc1ee6e1b1 100644
--- a/pandas/tests/arrays/test_datetimes.py
+++ b/pandas/tests/arrays/test_datetimes.py
@@ -3,11 +3,13 @@
 """
 import operator
 
+from dateutil.parser._parser import ParserError
 import numpy as np
 import pytest
 
 from pandas._libs.tslibs import tz_compare
 from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
+from pandas.errors import OutOfBoundsDatetime
 
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
 
@@ -657,3 +659,22 @@ def test_coerce_fallback(self, error):
             assert result[1] is not pd.NaT
 
         tm.assert_series_equal(expected, result)
+
+        expected2 = pd.Series([pd.Timestamp("2000-01-01 00:00:00"), pd.NaT])
+
+        es1 = pd.Series(["1/1/2000", "7/12/1200"])
+        es2 = pd.Series(["1/1/2000", "Hello"])
+
+        if error == "coerce":
+            eres1 = pd.to_datetime(es1, errors=error, infer_datetime_format=True)
+            eres2 = pd.to_datetime(es2, errors=error, infer_datetime_format=True)
+            tm.assert_series_equal(expected2, eres1)
+            tm.assert_series_equal(expected2, eres2)
+        else:
+            with pytest.raises(
+                OutOfBoundsDatetime, match="Out of bounds nanosecond timestamp"
+            ):
+                pd.to_datetime(es1, errors=error, infer_datetime_format=True)
+
+            with pytest.raises(ParserError, match="Unknown string format: Hello"):
+                pd.to_datetime(es2, errors=error, infer_datetime_format=True)

From ef1f73692d3dfb6f6f92adfc431a1c82ce470ab8 Mon Sep 17 00:00:00 2001
From: Steven Rotondo <steven.rotondo75@gmail.com>
Date: Wed, 17 Aug 2022 14:42:46 -0700
Subject: [PATCH 5/7] BUG: Edited test structure #46071

---
 pandas/tests/arrays/test_datetimes.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py
index 556fc1ee6e1b1..66780265324dc 100644
--- a/pandas/tests/arrays/test_datetimes.py
+++ b/pandas/tests/arrays/test_datetimes.py
@@ -648,27 +648,26 @@ def test_tz_localize_t2d(self):
     )
     def test_coerce_fallback(self, error):
         # GH#46071
+        # 2 valid dates with different formats
+        # Should parse with no errors
         s = pd.Series(["6/30/2025", "1 27 2024"])
         expected = pd.Series(
             [pd.Timestamp("2025-06-30 00:00:00"), pd.Timestamp("2024-01-27 00:00:00")]
         )
-
         result = pd.to_datetime(s, errors=error, infer_datetime_format=True)
-
-        if error == "coerce":
-            assert result[1] is not pd.NaT
-
         tm.assert_series_equal(expected, result)
 
+        # Invalid inputs
+        # Errors should be raised for the second element
         expected2 = pd.Series([pd.Timestamp("2000-01-01 00:00:00"), pd.NaT])
-
+        # Out of bounds date
         es1 = pd.Series(["1/1/2000", "7/12/1200"])
-        es2 = pd.Series(["1/1/2000", "Hello"])
-
+        # Invalid input string
+        es2 = pd.Series(["1/1/2000", "Invalid input"])
         if error == "coerce":
             eres1 = pd.to_datetime(es1, errors=error, infer_datetime_format=True)
-            eres2 = pd.to_datetime(es2, errors=error, infer_datetime_format=True)
             tm.assert_series_equal(expected2, eres1)
+            eres2 = pd.to_datetime(es2, errors=error, infer_datetime_format=True)
             tm.assert_series_equal(expected2, eres2)
         else:
             with pytest.raises(
@@ -676,5 +675,7 @@ def test_coerce_fallback(self, error):
             ):
                 pd.to_datetime(es1, errors=error, infer_datetime_format=True)
 
-            with pytest.raises(ParserError, match="Unknown string format: Hello"):
+            with pytest.raises(
+                ParserError, match="Unknown string format: Invalid input"
+            ):
                 pd.to_datetime(es2, errors=error, infer_datetime_format=True)

From 7347c780b8b79175be9d173d9510e3f25a29ef6e Mon Sep 17 00:00:00 2001
From: Steven Rotondo <steven.rotondo75@gmail.com>
Date: Fri, 2 Sep 2022 11:07:35 -0700
Subject: [PATCH 6/7] BUG: Split up tests #46071

---
 pandas/tests/arrays/test_datetimes.py | 48 +++++++++++++++------------
 1 file changed, 27 insertions(+), 21 deletions(-)

diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py
index 66780265324dc..68ffff3fc93f5 100644
--- a/pandas/tests/arrays/test_datetimes.py
+++ b/pandas/tests/arrays/test_datetimes.py
@@ -646,7 +646,7 @@ def test_tz_localize_t2d(self):
         "error",
         ["coerce", "raise"],
     )
-    def test_coerce_fallback(self, error):
+    def test_fallback_different_formats(self, error):
         # GH#46071
         # 2 valid dates with different formats
         # Should parse with no errors
@@ -657,25 +657,31 @@ def test_coerce_fallback(self, error):
         result = pd.to_datetime(s, errors=error, infer_datetime_format=True)
         tm.assert_series_equal(expected, result)
 
+    @pytest.mark.parametrize(
+        "dateseries",
+        [
+            pd.Series(["1/1/2000", "7/12/1200"]),
+            pd.Series(["1/1/2000", "Invalid input"]),
+        ],
+    )
+    def test_fallback_with_errors_coerce(self, dateseries):
+        # GH#46071
         # Invalid inputs
-        # Errors should be raised for the second element
-        expected2 = pd.Series([pd.Timestamp("2000-01-01 00:00:00"), pd.NaT])
-        # Out of bounds date
-        es1 = pd.Series(["1/1/2000", "7/12/1200"])
-        # Invalid input string
-        es2 = pd.Series(["1/1/2000", "Invalid input"])
-        if error == "coerce":
-            eres1 = pd.to_datetime(es1, errors=error, infer_datetime_format=True)
-            tm.assert_series_equal(expected2, eres1)
-            eres2 = pd.to_datetime(es2, errors=error, infer_datetime_format=True)
-            tm.assert_series_equal(expected2, eres2)
-        else:
-            with pytest.raises(
-                OutOfBoundsDatetime, match="Out of bounds nanosecond timestamp"
-            ):
-                pd.to_datetime(es1, errors=error, infer_datetime_format=True)
+        # Parsing should fail for the second element
+        expected = pd.Series([pd.Timestamp("2000-01-01 00:00:00"), pd.NaT])
+        result = pd.to_datetime(dateseries, errors="coerce", infer_datetime_format=True)
+        tm.assert_series_equal(expected, result)
 
-            with pytest.raises(
-                ParserError, match="Unknown string format: Invalid input"
-            ):
-                pd.to_datetime(es2, errors=error, infer_datetime_format=True)
+    def test_fallback_with_errors_raise(self):
+        # GH#46071
+        # Invalid inputs
+        # Parsing should fail for the second element
+        dates1 = pd.Series(["1/1/2000", "7/12/1200"])
+        with pytest.raises(
+            OutOfBoundsDatetime, match="Out of bounds nanosecond timestamp"
+        ):
+            pd.to_datetime(dates1, errors="raise", infer_datetime_format=True)
+
+        dates2 = pd.Series(["1/1/2000", "Invalid input"])
+        with pytest.raises(ParserError, match="Unknown string format: Invalid input"):
+            pd.to_datetime(dates2, errors="raise", infer_datetime_format=True)

From 3b08c718f2ba062417ae39710e23dab7bb7b00eb Mon Sep 17 00:00:00 2001
From: Steven Rotondo <97266896+srotondo@users.noreply.github.com>
Date: Sun, 18 Sep 2022 14:06:28 -0700
Subject: [PATCH 7/7] Update pandas/core/tools/datetimes.py

Co-authored-by: Marco Edward Gorelli <marcogorelli@protonmail.com>
---
 pandas/core/tools/datetimes.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 6417db7b3974d..0063df68c595b 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -502,6 +502,7 @@ def _array_strptime_with_fallback(
             return _return_parsed_timezone_results(result, timezones, tz, name)
 
         if infer_datetime_format and np.isnan(result).any():
+            # Indicates to the caller to fallback to objects_to_datetime64ns
             return None
 
     return _box_as_indexlike(result, utc=utc, name=name)