pandas-dev · jreback · Apr 20, 2021 · Apr 17, 2021 · Apr 18, 2021 · Apr 19, 2021
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -813,6 +813,7 @@ Reshaping
 - Bug in :meth:`DataFrame.stack` not preserving ``CategoricalDtype`` in a ``MultiIndex`` (:issue:`36991`)
 - Bug in :func:`to_datetime` raising error when input sequence contains unhashable items (:issue:`39756`)
 - Bug in :meth:`Series.explode` preserving index when ``ignore_index`` was ``True`` and values were scalars (:issue:`40487`)
+- Bug in :func:`to_datetime` raising ``ValueError`` when :class:`Series` contains ``None`` and ``NaT`` and has more than 50 elements (:issue:`39882`)
 
 Sparse
 ^^^^^^

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
@@ -84,6 +84,7 @@
 Scalar = Union[int, float, str]
 DatetimeScalar = TypeVar("DatetimeScalar", Scalar, datetime)
 DatetimeScalarOrArrayConvertible = Union[DatetimeScalar, ArrayConvertible]
+start_caching_at = 50
 
 
 # ---------------------------------------------------------------------
@@ -130,7 +131,7 @@ def should_cache(
     # default realization
     if check_count is None:
         # in this case, the gain from caching is negligible
-        if len(arg) <= 50:
+        if len(arg) <= start_caching_at:
             return False
 
         if len(arg) <= 5000:
@@ -193,6 +194,9 @@ def _maybe_cache(
         if len(unique_dates) < len(arg):
             cache_dates = convert_listlike(unique_dates, format)
             cache_array = Series(cache_dates, index=unique_dates)
+            if not cache_array.is_unique:
+                # GH#39882 in case of None and NaT we get duplicates
+                cache_array = cache_array.drop_duplicates()
     return cache_array
 
 

diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py
@@ -13,6 +13,8 @@
 # for most cases), and the specific cases where the result deviates from
 # this default. Those overrides are defined as a dict with (keyword, val) as
 # dictionary key. In case of multiple items, the last override takes precedence.
+from pandas.core.tools.datetimes import start_caching_at
+
 test_cases = [
     (
         # data
@@ -225,3 +227,16 @@ def test_convert_bool_dtype(self):
         # GH32287
         df = pd.DataFrame({"A": pd.array([True])})
         tm.assert_frame_equal(df, df.convert_dtypes())
+
+    def test_convert_object_to_datetime_with_cache(self):
+        # GH#39882
+        ser = pd.Series(
+            [None] + [pd.NaT] * start_caching_at + [pd.Timestamp("2012-07-26")],
+            dtype="object",
+        )
+        result = pd.to_datetime(ser, errors="coerce")
+        expected = pd.Series(
+            [pd.NaT] * (start_caching_at + 1) + [pd.Timestamp("2012-07-26")],
+            dtype="datetime64[ns]",
+        )
+        tm.assert_series_equal(result, expected)