pandas-dev · jreback · Feb 1, 2020 · Jan 31, 2020 · TomAugspurger · Jan 31, 2020
diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst
@@ -25,8 +25,8 @@ Categorical
 
 Datetimelike
 ^^^^^^^^^^^^
--
--
+- Fixed regression in :meth:`to_datetime` when parsing non-nanosecond resolution datetimes (:issue:`31491`)
+- Fixed bug in :meth:`to_datetime` raising when ``cache=True`` and out-of-bound values are present (:issue:`31491`)
 
 Timedelta
 ^^^^^^^^^

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -29,6 +29,7 @@
     is_categorical_dtype,
     is_complex_dtype,
     is_datetime64_any_dtype,
+    is_datetime64_dtype,
     is_datetime64_ns_dtype,
     is_extension_array_dtype,
     is_float_dtype,
@@ -191,6 +192,11 @@ def _reconstruct_data(values, dtype, original):
         if isinstance(original, ABCIndexClass):
             values = values.astype(object, copy=False)
     elif dtype is not None:
+        if is_datetime64_dtype(dtype):
+            dtype = "datetime64[ns]"
+        elif is_timedelta64_dtype(dtype):
+            dtype = "timedelta64[ns]"
+
         values = values.astype(dtype, copy=False)
 
     return values

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
@@ -602,7 +602,9 @@ def to_datetime(
     cache : bool, default True
         If True, use a cache of unique, converted dates to apply the datetime
         conversion. May produce significant speed-up when parsing duplicate
-        date strings, especially ones with timezone offsets.
+        date strings, especially ones with timezone offsets. The cache is only
+        used when there are at least 50 values. The presence of out-of-bounds
+        values will render the cache unusable and may slow down parsing.
 
         .. versionadded:: 0.23.0
 
@@ -734,7 +736,17 @@ def to_datetime(
             convert_listlike = partial(convert_listlike, name=arg.name)
             result = convert_listlike(arg, format)
     elif is_list_like(arg):
-        cache_array = _maybe_cache(arg, format, cache, convert_listlike)
+        try:
+            cache_array = _maybe_cache(arg, format, cache, convert_listlike)
+        except tslibs.OutOfBoundsDatetime:
+            # caching attempts to create a DatetimeIndex, which may raise
+            # an OOB. If that's the desired behavior, then just reraise...
+            if errors == "raise":
+                raise
+            # ... otherwise, continue without the cache.
+            from pandas import Series
+
+            cache_array = Series([], dtype=object)  # just an empty array
         if not cache_array.empty:
             result = _convert_and_box_cache(arg, cache_array)
         else:

diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
@@ -559,9 +559,14 @@ def test_to_datetime_dt64s_out_of_bounds(self, cache, dt):
         assert pd.to_datetime(dt, errors="coerce", cache=cache) is NaT
 
     @pytest.mark.parametrize("cache", [True, False])
-    def test_to_datetime_array_of_dt64s(self, cache):
-        dts = [np.datetime64("2000-01-01"), np.datetime64("2000-01-02")]
-
+    @pytest.mark.parametrize("unit", ["s", "D"])
+    def test_to_datetime_array_of_dt64s(self, cache, unit):
+        # https://github.com/pandas-dev/pandas/issues/31491
+        # Need at least 50 to ensure cache is used.
+        dts = [
+            np.datetime64("2000-01-01", unit),
+            np.datetime64("2000-01-02", unit),
+        ] * 30
         # Assuming all datetimes are in bounds, to_datetime() returns
         # an array that is equal to Timestamp() parsing
         tm.assert_index_equal(
@@ -579,11 +584,8 @@ def test_to_datetime_array_of_dt64s(self, cache):
         tm.assert_index_equal(
             pd.to_datetime(dts_with_oob, errors="coerce", cache=cache),
             pd.DatetimeIndex(
-                [
-                    Timestamp(dts_with_oob[0]).asm8,
-                    Timestamp(dts_with_oob[1]).asm8,
-                    pd.NaT,
-                ]
+                [Timestamp(dts_with_oob[0]).asm8, Timestamp(dts_with_oob[1]).asm8] * 30
+                + [pd.NaT],
             ),
         )
 

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
@@ -420,6 +420,18 @@ def test_datetime64_dtype_array_returned(self):
         tm.assert_numpy_array_equal(result, expected)
         assert result.dtype == expected.dtype
 
+    def test_datetime_non_ns(self):
+        a = np.array(["2000", "2000", "2001"], dtype="datetime64[s]")
+        result = pd.unique(a)
+        expected = np.array(["2000", "2001"], dtype="datetime64[ns]")
+        tm.assert_numpy_array_equal(result, expected)
+
+    def test_timedelta_non_ns(self):
+        a = np.array(["2000", "2000", "2001"], dtype="timedelta64[s]")
+        result = pd.unique(a)
+        expected = np.array([2000000000000, 2001000000000], dtype="timedelta64[ns]")
+        tm.assert_numpy_array_equal(result, expected)
+
     def test_timedelta64_dtype_array_returned(self):
         # GH 9431
         expected = np.array([31200, 45678, 10000], dtype="m8[ns]")