pandas-dev · jbrockmendel · Apr 6, 2023
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
@@ -412,7 +412,6 @@ def array_to_timedelta64(
         object item
         int64_t ival
         cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, values)
-        cnp.flatiter it
 
     if values.descr.type_num != cnp.NPY_OBJECT:
         # raise here otherwise we segfault below
@@ -421,17 +420,6 @@ def array_to_timedelta64(
     if errors not in {"ignore", "raise", "coerce"}:
         raise ValueError("errors must be one of {'ignore', 'raise', or 'coerce'}")
 
-    if unit is not None and errors != "coerce":
-        it = cnp.PyArray_IterNew(values)
-        for i in range(n):
-            # Analogous to: item = values[i]
-            item = cnp.PyArray_GETITEM(values, cnp.PyArray_ITER_DATA(it))
-            if isinstance(item, str):
-                raise ValueError(
-                    "unit must not be specified if the input contains a str"
-                )
-            cnp.PyArray_ITER_NEXT(it)
-
     # Usually, we have all strings. If so, we hit the fast path.
     # If this path fails, we try conversion a different way, and
     # this is where all of the error handling will take place.
@@ -1847,8 +1835,12 @@ class Timedelta(_Timedelta):
 
         from pandas._libs.tslibs.offsets import to_offset
 
-        to_offset(freq).nanos  # raises on non-fixed freq
-        unit = delta_to_nanoseconds(to_offset(freq), self._creso)
+        freq = to_offset(freq)
+        freq.nanos  # raises on non-fixed freq
+        unit = delta_to_nanoseconds(freq, self._creso)
+        if unit == 0 and freq.nanos != 0:
+            # e.g. we have unit="s" and freq="ms"
+            return self
 
         arr = np.array([self._value], dtype="i8")
         try:

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -676,6 +676,13 @@ def _validate_listlike(self, value, allow_object: bool = False):
                 # TODO: do we need equal dtype or just comparable?
                 value = value._internal_get_values()
                 value = extract_array(value, extract_numpy=True)
+            elif self.dtype.kind == "m" and value.categories.dtype.kind == "m":
+                # e.g. Categorical[timedelta64[ns]] and we are timedelta64[s]
+                value = value._internal_get_values()
+                value = extract_array(value, extract_numpy=True)
+                value = value.as_unit(self.unit)
+                # TODO: for e.g. searchsorted should we be able to do this
+                #  without cast?
 
         if allow_object and is_object_dtype(value.dtype):
             pass
@@ -1981,8 +1988,12 @@ def _round(self, freq, mode, ambiguous, nonexistent):
 
         values = self.view("i8")
         values = cast(np.ndarray, values)
-        nanos = to_offset(freq).nanos  # raises on non-fixed frequencies
-        nanos = delta_to_nanoseconds(to_offset(freq), self._creso)
+        freq = to_offset(freq)
+        freq.nanos  # raises on non-fixed frequencies
+        nanos = delta_to_nanoseconds(freq, self._creso)
+        if freq.nanos != 0 and nanos == 0:
+            # e.g. we have unit="s" and freq="ms"
+            return self.copy()
         result_i8 = round_nsint64(values, mode, nanos)
         result = self._maybe_mask_results(result_i8, fill_value=iNaT)
         result = result.view(self._ndarray.dtype)

diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -215,6 +215,9 @@ def _simple_new(  # type: ignore[override]
     def _from_sequence(cls, data, *, dtype=None, copy: bool = False) -> Self:
         if dtype:
             dtype = _validate_td64_dtype(dtype)
+            np.datetime_data(dtype)[0]
+        else:
+            pass
 
         data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=None)
         freq, _ = dtl.validate_inferred_freq(None, inferred_freq, False)
@@ -242,6 +245,8 @@ def _from_sequence_not_strict(
             dtype = _validate_td64_dtype(dtype)
 
         assert unit not in ["Y", "y", "M"]  # caller is responsible for checking
+        if unit is None and dtype is not None:
+            unit = np.datetime_data(dtype)[0]
 
         explicit_none = freq is None
         freq = freq if freq is not lib.no_default else None
@@ -991,10 +996,15 @@ def _ints_to_td64ns(data, unit: str = "ns"):
         dtype_str = f"timedelta64[{unit}]"
         data = data.view(dtype_str)
 
-        data = astype_overflowsafe(data, dtype=TD64NS_DTYPE)
+        data_unit = get_unit_from_dtype(data.dtype)
+        if not is_supported_unit(data_unit):
+            new_reso = get_supported_reso(data_unit)
+            new_unit = npy_unit_to_abbrev(new_reso)
+            new_dtype = np.dtype(f"m8[{new_unit}]")
+            data = astype_overflowsafe(data, dtype=new_dtype)
 
-        # the astype conversion makes a copy, so we can avoid re-copying later
-        copy_made = True
+            # the astype conversion makes a copy, so we can avoid re-copying later
+            copy_made = True
 
     else:
         data = data.view("timedelta64[ns]")

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
@@ -502,12 +502,19 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
     else:
         arg = np.asarray(arg)
 
+        if unit in ["ns", "us", "ms", "s"]:
+            out_unit = unit
+        else:
+            # closest supported unit is seconds
+            out_unit = "s"
+
         if arg.dtype.kind in "iu":
             # Note we can't do "f" here because that could induce unwanted
             #  rounding GH#14156, GH#20445
             arr = arg.astype(f"datetime64[{unit}]", copy=False)
+            out_dtype = np.dtype(f"M8[{out_unit}]")
             try:
-                arr = astype_overflowsafe(arr, np.dtype("M8[ns]"), copy=False)
+                arr = astype_overflowsafe(arr, out_dtype, copy=False)
             except OutOfBoundsDatetime:
                 if errors == "raise":
                     raise

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
@@ -1275,7 +1275,9 @@ def _try_convert_to_date(self, data):
         date_units = (self.date_unit,) if self.date_unit else self._STAMP_UNITS
         for date_unit in date_units:
             try:
-                new_data = to_datetime(new_data, errors="raise", unit=date_unit)
+                converted = to_datetime(new_data, errors="raise", unit=date_unit)
+                # make sure we are within ns bounds (largely for backward compat)
+                new_data = Series(converted).dt.as_unit("ns")
             except (ValueError, OverflowError, TypeError):
                 continue
             return new_data, True

diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
@@ -71,7 +71,7 @@ def test_apply_mixed_datetimelike():
     expected = DataFrame(
         {
             "A": date_range("20130101", periods=3),
-            "B": pd.to_timedelta(np.arange(3), unit="s"),
+            "B": pd.to_timedelta(np.arange(3), unit="s").astype("m8[ns]"),
         }
     )
     result = expected.apply(lambda x: x, axis=1)

diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py
@@ -374,7 +374,10 @@ def test_groupby_quantile_allNA_column(dtype):
 def test_groupby_timedelta_quantile():
     # GH: 29485
     df = DataFrame(
-        {"value": pd.to_timedelta(np.arange(4), unit="s"), "group": [1, 1, 2, 2]}
+        {
+            "value": pd.to_timedelta(np.arange(4), unit="s").astype("m8[ns]"),
+            "group": [1, 1, 2, 2],
+        }
     )
     result = df.groupby("group").quantile(0.99)
     expected = DataFrame(

diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
@@ -1060,7 +1060,7 @@ def test_value_counts_time_grouper(utc):
     result = gb.value_counts()
     dates = to_datetime(
         ["2019-08-06", "2019-08-07", "2019-08-09", "2019-08-10"], utc=utc
-    )
+    ).as_unit("s")
     timestamps = df["Timestamp"].unique()
     index = MultiIndex(
         levels=[dates, timestamps, ["apple", "banana", "orange", "pear"]],

diff --git a/pandas/tests/indexes/timedeltas/test_constructors.py b/pandas/tests/indexes/timedeltas/test_constructors.py
@@ -156,16 +156,19 @@ def test_constructor(self):
 
         expected = TimedeltaIndex(
             ["0 days 00:00:00", "0 days 00:00:01", "0 days 00:00:02"]
-        )
-        tm.assert_index_equal(TimedeltaIndex(range(3), unit="s"), expected)
+        ).astype("m8[s]")
+        result = TimedeltaIndex(range(3), unit="s")
+        tm.assert_index_equal(result, expected)
         expected = TimedeltaIndex(
             ["0 days 00:00:00", "0 days 00:00:05", "0 days 00:00:09"]
-        )
-        tm.assert_index_equal(TimedeltaIndex([0, 5, 9], unit="s"), expected)
+        ).astype("m8[s]")
+        result = TimedeltaIndex([0, 5, 9], unit="s")
+        tm.assert_index_equal(result, expected)
         expected = TimedeltaIndex(
             ["0 days 00:00:00.400", "0 days 00:00:00.450", "0 days 00:00:01.200"]
-        )
-        tm.assert_index_equal(TimedeltaIndex([400, 450, 1200], unit="ms"), expected)
+        ).astype("m8[ms]")
+        result = TimedeltaIndex([400, 450, 1200], unit="ms")
+        tm.assert_index_equal(result, expected)
 
     def test_constructor_iso(self):
         # GH #21877

diff --git a/pandas/tests/indexes/timedeltas/test_scalar_compat.py b/pandas/tests/indexes/timedeltas/test_scalar_compat.py
@@ -100,7 +100,7 @@ def test_round(self):
         t1 = timedelta_range("1 days", periods=3, freq="1 min 2 s 3 us")
         t2 = -1 * t1
         t1a = timedelta_range("1 days", periods=3, freq="1 min 2 s")
-        t1c = TimedeltaIndex([1, 1, 1], unit="D")
+        t1c = TimedeltaIndex([1, 1, 1], unit="D").astype("m8[ns]")
 
         # note that negative times round DOWN! so don't give whole numbers
         for freq, s1, s2 in [
@@ -122,7 +122,7 @@ def test_round(self):
             ),
             ("12T", t1c, TimedeltaIndex(["-1 days", "-1 days", "-1 days"])),
             ("H", t1c, TimedeltaIndex(["-1 days", "-1 days", "-1 days"])),
-            ("d", t1c, TimedeltaIndex([-1, -1, -1], unit="D")),
+            ("d", t1c, TimedeltaIndex([-1, -1, -1], unit="D").astype("m8[ns]")),
         ]:
             r1 = t1.round(freq)
             tm.assert_index_equal(r1, s1)

diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py
@@ -24,23 +24,23 @@ def test_timedelta_range_unit(self):
     def test_timedelta_range(self):
         expected = to_timedelta(np.arange(5), unit="D")
         result = timedelta_range("0 days", periods=5, freq="D")
-        tm.assert_index_equal(result, expected)
+        tm.assert_index_equal(result, expected.astype("m8[ns]"))
 
         expected = to_timedelta(np.arange(11), unit="D")
         result = timedelta_range("0 days", "10 days", freq="D")
-        tm.assert_index_equal(result, expected)
+        tm.assert_index_equal(result, expected.astype("m8[ns]"))
 
         expected = to_timedelta(np.arange(5), unit="D") + Second(2) + Day()
         result = timedelta_range("1 days, 00:00:02", "5 days, 00:00:02", freq="D")
-        tm.assert_index_equal(result, expected)
+        tm.assert_index_equal(result, expected.astype("m8[ns]"))
 
         expected = to_timedelta([1, 3, 5, 7, 9], unit="D") + Second(2)
         result = timedelta_range("1 days, 00:00:02", periods=5, freq="2D")
-        tm.assert_index_equal(result, expected)
+        tm.assert_index_equal(result, expected.astype("m8[ns]"))
 
         expected = to_timedelta(np.arange(50), unit="T") * 30
         result = timedelta_range("0 days", freq="30T", periods=50)
-        tm.assert_index_equal(result, expected)
+        tm.assert_index_equal(result, expected.astype("m8[ns]"))
 
     @pytest.mark.parametrize(
         "periods, freq", [(3, "2D"), (5, "D"), (6, "19H12T"), (7, "16H"), (9, "12H")]

diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -714,7 +714,9 @@ def test_loc_modify_datetime(self):
         )
 
         columns = ["date_dt", "date_dt_cp"]
-        expected[columns] = expected[columns].apply(to_datetime)
+        expected[columns] = expected[columns].apply(
+            lambda x: to_datetime(x).dt.as_unit("ms")
+        )
 
         tm.assert_frame_equal(df, expected)
 

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -974,7 +974,8 @@ def test_timedelta(self):
 
         frame = DataFrame([timedelta(23), timedelta(seconds=5)])
         assert frame[0].dtype == "timedelta64[ns]"
-        tm.assert_frame_equal(frame, read_json(frame.to_json()).apply(converter))
+        result = read_json(frame.to_json()).apply(converter)
+        tm.assert_frame_equal(frame.astype("m8[ms]"), result)
 
     def test_timedelta2(self):
         frame = DataFrame(

diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
@@ -70,6 +70,7 @@ def __custom_date_parser(time):
     )
     time = [41047, 41048, 41049, 41050, 41051]
     time = pd.TimedeltaIndex([pd.to_timedelta(i, unit="s") for i in time], name="time")
+    time = time.astype("m8[s]")
     expected = DataFrame(
         {
             "e": [-98573.7297, -98573.7299, -98573.7300, -98573.7299, -98573.7302],

diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py
@@ -67,7 +67,9 @@ def test_period_mean(self, box, freq):
 
     @pytest.mark.parametrize("box", [Series, pd.Index, TimedeltaArray])
     def test_td64_mean(self, box):
-        tdi = pd.TimedeltaIndex([0, 3, -2, -7, 1, 2, -1, 3, 5, -2, 4], unit="D")
+        tdi = pd.TimedeltaIndex(
+            [0, 3, -2, -7, 1, 2, -1, 3, 5, -2, 4], unit="D"
+        ).as_unit("ns")
 
         tdarr = tdi._data
         obj = box(tdarr, copy=False)

diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py
@@ -100,7 +100,7 @@ def test_resample_categorical_data_with_timedeltaindex():
     result = df.resample("10s").agg(lambda x: (x.value_counts().index[0]))
     expected = DataFrame(
         {"Group_obj": ["A", "A"], "Group": ["A", "A"]},
-        index=pd.TimedeltaIndex([0, 10], unit="s", freq="10s"),
+        index=pd.TimedeltaIndex([0, 10], unit="s", freq="10s").astype("m8[ns]"),
     )
     expected = expected.reindex(["Group_obj", "Group"], axis=1)
     expected["Group"] = expected["Group_obj"]

diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py
@@ -471,7 +471,13 @@ def test_timedelta_pass_td_and_kwargs_raises():
     [
         (Timedelta, "10s", "ms", (ValueError, "unit must not be specified")),
         (to_timedelta, "10s", "ms", (ValueError, "unit must not be specified")),
-        (to_timedelta, ["1", 2, 3], "s", (ValueError, "unit must not be specified")),
+        pytest.param(
+            to_timedelta,
+            ["1", 2, 3],
+            "s",
+            (ValueError, "unit must not be specified"),
+            marks=pytest.mark.xfail(reason="Reconsidering API"),
+        ),
     ],
 )
 def test_string_with_unit(constructor, value, unit, expectation):

diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py
@@ -568,21 +568,30 @@ def test_nat_converters(self):
     def test_unit_parser(self, unit, np_unit, wrapper):
         # validate all units, GH 6855, GH 21762
         # array-likes
+        if np_unit in ["s", "ms", "us", "ns"]:
+            # Supported unit, we retain
+            pd_unit = np_unit
+        else:
+            # closest supported unit
+            pd_unit = "s"
         expected = TimedeltaIndex(
             [np.timedelta64(i, np_unit) for i in np.arange(5).tolist()],
-            dtype="m8[ns]",
+            dtype=f"m8[{pd_unit}]",
         )
-        # TODO(2.0): the desired output dtype may have non-nano resolution
         result = to_timedelta(wrapper(range(5)), unit=unit)
-        tm.assert_index_equal(result, expected)
+        if wrapper is list:
+            # TODO: should not depend on this -> need inference in array_to_timedelta64
+            tm.assert_index_equal(result, expected.astype("m8[ns]"))
+        else:
+            tm.assert_index_equal(result, expected)
         result = TimedeltaIndex(wrapper(range(5)), unit=unit)
         tm.assert_index_equal(result, expected)
 
         str_repr = [f"{x}{unit}" for x in np.arange(5)]
         result = to_timedelta(wrapper(str_repr))
-        tm.assert_index_equal(result, expected)
+        tm.assert_index_equal(result, expected.astype("m8[ns]", copy=False))
         result = to_timedelta(wrapper(str_repr))
-        tm.assert_index_equal(result, expected)
+        tm.assert_index_equal(result, expected.astype("m8[ns]", copy=False))
 
         # scalar
         expected = Timedelta(np.timedelta64(2, np_unit).astype("timedelta64[ns]"))

diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
@@ -1930,6 +1930,8 @@ def test_to_datetime_unit(self, dtype):
         expected = Series(
             [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)]
         )
+        if dtype is int:
+            expected = expected.dt.as_unit("s")
         tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize("null", [iNaT, np.nan])
@@ -1941,6 +1943,8 @@ def test_to_datetime_unit_with_nulls(self, null):
             [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)]
             + [NaT]
         )
+        if null is iNaT:
+            expected = expected.dt.as_unit("s")
         tm.assert_series_equal(result, expected)
 
     def test_to_datetime_unit_fractional_seconds(self):
@@ -3249,8 +3253,10 @@ def test_invalid_origin(self, unit):
             to_datetime("2005-01-01", origin="1960-01-01", unit=unit)
 
     def test_epoch(self, units, epochs, epoch_1960, units_from_epochs):
+        exp_unit = units if units in ["ns", "us", "ms", "s"] else "s"
         expected = Series(
-            [pd.Timedelta(x, unit=units) + epoch_1960 for x in units_from_epochs]
+            [pd.Timedelta(x, unit=units) + epoch_1960 for x in units_from_epochs],
+            dtype=f"M8[{exp_unit}]",
         )
 
         result = Series(to_datetime(units_from_epochs, unit=units, origin=epochs))