Skip to content

BUG/API Series(ints, dtype=m8[s]) use unit=s #52476

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 6 additions & 14 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,6 @@ def array_to_timedelta64(
object item
int64_t ival
cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, values)
cnp.flatiter it

if values.descr.type_num != cnp.NPY_OBJECT:
# raise here otherwise we segfault below
Expand All @@ -421,17 +420,6 @@ def array_to_timedelta64(
if errors not in {"ignore", "raise", "coerce"}:
raise ValueError("errors must be one of {'ignore', 'raise', or 'coerce'}")

if unit is not None and errors != "coerce":
it = cnp.PyArray_IterNew(values)
for i in range(n):
# Analogous to: item = values[i]
item = cnp.PyArray_GETITEM(values, cnp.PyArray_ITER_DATA(it))
if isinstance(item, str):
raise ValueError(
"unit must not be specified if the input contains a str"
)
cnp.PyArray_ITER_NEXT(it)

# Usually, we have all strings. If so, we hit the fast path.
# If this path fails, we try conversion a different way, and
# this is where all of the error handling will take place.
Expand Down Expand Up @@ -1847,8 +1835,12 @@ class Timedelta(_Timedelta):

from pandas._libs.tslibs.offsets import to_offset

to_offset(freq).nanos # raises on non-fixed freq
unit = delta_to_nanoseconds(to_offset(freq), self._creso)
freq = to_offset(freq)
freq.nanos # raises on non-fixed freq
unit = delta_to_nanoseconds(freq, self._creso)
if unit == 0 and freq.nanos != 0:
# e.g. we have unit="s" and freq="ms"
return self

arr = np.array([self._value], dtype="i8")
try:
Expand Down
15 changes: 13 additions & 2 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,13 @@ def _validate_listlike(self, value, allow_object: bool = False):
# TODO: do we need equal dtype or just comparable?
value = value._internal_get_values()
value = extract_array(value, extract_numpy=True)
elif self.dtype.kind == "m" and value.categories.dtype.kind == "m":
# e.g. Categorical[timedelta64[ns]] and we are timedelta64[s]
value = value._internal_get_values()
value = extract_array(value, extract_numpy=True)
value = value.as_unit(self.unit)
# TODO: for e.g. searchsorted should we be able to do this
# without cast?

if allow_object and is_object_dtype(value.dtype):
pass
Expand Down Expand Up @@ -1981,8 +1988,12 @@ def _round(self, freq, mode, ambiguous, nonexistent):

values = self.view("i8")
values = cast(np.ndarray, values)
nanos = to_offset(freq).nanos # raises on non-fixed frequencies
nanos = delta_to_nanoseconds(to_offset(freq), self._creso)
freq = to_offset(freq)
freq.nanos # raises on non-fixed frequencies
nanos = delta_to_nanoseconds(freq, self._creso)
if freq.nanos != 0 and nanos == 0:
# e.g. we have unit="s" and freq="ms"
return self.copy()
result_i8 = round_nsint64(values, mode, nanos)
result = self._maybe_mask_results(result_i8, fill_value=iNaT)
result = result.view(self._ndarray.dtype)
Expand Down
16 changes: 13 additions & 3 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,9 @@ def _simple_new( # type: ignore[override]
def _from_sequence(cls, data, *, dtype=None, copy: bool = False) -> Self:
if dtype:
dtype = _validate_td64_dtype(dtype)
np.datetime_data(dtype)[0]
else:
pass

data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=None)
freq, _ = dtl.validate_inferred_freq(None, inferred_freq, False)
Expand Down Expand Up @@ -242,6 +245,8 @@ def _from_sequence_not_strict(
dtype = _validate_td64_dtype(dtype)

assert unit not in ["Y", "y", "M"] # caller is responsible for checking
if unit is None and dtype is not None:
unit = np.datetime_data(dtype)[0]

explicit_none = freq is None
freq = freq if freq is not lib.no_default else None
Expand Down Expand Up @@ -991,10 +996,15 @@ def _ints_to_td64ns(data, unit: str = "ns"):
dtype_str = f"timedelta64[{unit}]"
data = data.view(dtype_str)

data = astype_overflowsafe(data, dtype=TD64NS_DTYPE)
data_unit = get_unit_from_dtype(data.dtype)
if not is_supported_unit(data_unit):
new_reso = get_supported_reso(data_unit)
new_unit = npy_unit_to_abbrev(new_reso)
new_dtype = np.dtype(f"m8[{new_unit}]")
data = astype_overflowsafe(data, dtype=new_dtype)

# the astype conversion makes a copy, so we can avoid re-copying later
copy_made = True
# the astype conversion makes a copy, so we can avoid re-copying later
copy_made = True

else:
data = data.view("timedelta64[ns]")
Expand Down
9 changes: 8 additions & 1 deletion pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,12 +502,19 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
else:
arg = np.asarray(arg)

if unit in ["ns", "us", "ms", "s"]:
out_unit = unit
else:
# closest supported unit is seconds
out_unit = "s"

if arg.dtype.kind in "iu":
# Note we can't do "f" here because that could induce unwanted
# rounding GH#14156, GH#20445
arr = arg.astype(f"datetime64[{unit}]", copy=False)
out_dtype = np.dtype(f"M8[{out_unit}]")
try:
arr = astype_overflowsafe(arr, np.dtype("M8[ns]"), copy=False)
arr = astype_overflowsafe(arr, out_dtype, copy=False)
except OutOfBoundsDatetime:
if errors == "raise":
raise
Expand Down
4 changes: 3 additions & 1 deletion pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -1275,7 +1275,9 @@ def _try_convert_to_date(self, data):
date_units = (self.date_unit,) if self.date_unit else self._STAMP_UNITS
for date_unit in date_units:
try:
new_data = to_datetime(new_data, errors="raise", unit=date_unit)
converted = to_datetime(new_data, errors="raise", unit=date_unit)
# make sure we are within ns bounds (largely for backward compat)
new_data = Series(converted).dt.as_unit("ns")
except (ValueError, OverflowError, TypeError):
continue
return new_data, True
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def test_apply_mixed_datetimelike():
expected = DataFrame(
{
"A": date_range("20130101", periods=3),
"B": pd.to_timedelta(np.arange(3), unit="s"),
"B": pd.to_timedelta(np.arange(3), unit="s").astype("m8[ns]"),
}
)
result = expected.apply(lambda x: x, axis=1)
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/groupby/test_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,10 @@ def test_groupby_quantile_allNA_column(dtype):
def test_groupby_timedelta_quantile():
# GH: 29485
df = DataFrame(
{"value": pd.to_timedelta(np.arange(4), unit="s"), "group": [1, 1, 2, 2]}
{
"value": pd.to_timedelta(np.arange(4), unit="s").astype("m8[ns]"),
"group": [1, 1, 2, 2],
}
)
result = df.groupby("group").quantile(0.99)
expected = DataFrame(
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/test_value_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -1060,7 +1060,7 @@ def test_value_counts_time_grouper(utc):
result = gb.value_counts()
dates = to_datetime(
["2019-08-06", "2019-08-07", "2019-08-09", "2019-08-10"], utc=utc
)
).as_unit("s")
timestamps = df["Timestamp"].unique()
index = MultiIndex(
levels=[dates, timestamps, ["apple", "banana", "orange", "pear"]],
Expand Down
15 changes: 9 additions & 6 deletions pandas/tests/indexes/timedeltas/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,16 +156,19 @@ def test_constructor(self):

expected = TimedeltaIndex(
["0 days 00:00:00", "0 days 00:00:01", "0 days 00:00:02"]
)
tm.assert_index_equal(TimedeltaIndex(range(3), unit="s"), expected)
).astype("m8[s]")
result = TimedeltaIndex(range(3), unit="s")
tm.assert_index_equal(result, expected)
expected = TimedeltaIndex(
["0 days 00:00:00", "0 days 00:00:05", "0 days 00:00:09"]
)
tm.assert_index_equal(TimedeltaIndex([0, 5, 9], unit="s"), expected)
).astype("m8[s]")
result = TimedeltaIndex([0, 5, 9], unit="s")
tm.assert_index_equal(result, expected)
expected = TimedeltaIndex(
["0 days 00:00:00.400", "0 days 00:00:00.450", "0 days 00:00:01.200"]
)
tm.assert_index_equal(TimedeltaIndex([400, 450, 1200], unit="ms"), expected)
).astype("m8[ms]")
result = TimedeltaIndex([400, 450, 1200], unit="ms")
tm.assert_index_equal(result, expected)

def test_constructor_iso(self):
# GH #21877
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexes/timedeltas/test_scalar_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def test_round(self):
t1 = timedelta_range("1 days", periods=3, freq="1 min 2 s 3 us")
t2 = -1 * t1
t1a = timedelta_range("1 days", periods=3, freq="1 min 2 s")
t1c = TimedeltaIndex([1, 1, 1], unit="D")
t1c = TimedeltaIndex([1, 1, 1], unit="D").astype("m8[ns]")

# note that negative times round DOWN! so don't give whole numbers
for freq, s1, s2 in [
Expand All @@ -122,7 +122,7 @@ def test_round(self):
),
("12T", t1c, TimedeltaIndex(["-1 days", "-1 days", "-1 days"])),
("H", t1c, TimedeltaIndex(["-1 days", "-1 days", "-1 days"])),
("d", t1c, TimedeltaIndex([-1, -1, -1], unit="D")),
("d", t1c, TimedeltaIndex([-1, -1, -1], unit="D").astype("m8[ns]")),
]:
r1 = t1.round(freq)
tm.assert_index_equal(r1, s1)
Expand Down
10 changes: 5 additions & 5 deletions pandas/tests/indexes/timedeltas/test_timedelta_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,23 @@ def test_timedelta_range_unit(self):
def test_timedelta_range(self):
expected = to_timedelta(np.arange(5), unit="D")
result = timedelta_range("0 days", periods=5, freq="D")
tm.assert_index_equal(result, expected)
tm.assert_index_equal(result, expected.astype("m8[ns]"))

expected = to_timedelta(np.arange(11), unit="D")
result = timedelta_range("0 days", "10 days", freq="D")
tm.assert_index_equal(result, expected)
tm.assert_index_equal(result, expected.astype("m8[ns]"))

expected = to_timedelta(np.arange(5), unit="D") + Second(2) + Day()
result = timedelta_range("1 days, 00:00:02", "5 days, 00:00:02", freq="D")
tm.assert_index_equal(result, expected)
tm.assert_index_equal(result, expected.astype("m8[ns]"))

expected = to_timedelta([1, 3, 5, 7, 9], unit="D") + Second(2)
result = timedelta_range("1 days, 00:00:02", periods=5, freq="2D")
tm.assert_index_equal(result, expected)
tm.assert_index_equal(result, expected.astype("m8[ns]"))

expected = to_timedelta(np.arange(50), unit="T") * 30
result = timedelta_range("0 days", freq="30T", periods=50)
tm.assert_index_equal(result, expected)
tm.assert_index_equal(result, expected.astype("m8[ns]"))

@pytest.mark.parametrize(
"periods, freq", [(3, "2D"), (5, "D"), (6, "19H12T"), (7, "16H"), (9, "12H")]
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -714,7 +714,9 @@ def test_loc_modify_datetime(self):
)

columns = ["date_dt", "date_dt_cp"]
expected[columns] = expected[columns].apply(to_datetime)
expected[columns] = expected[columns].apply(
lambda x: to_datetime(x).dt.as_unit("ms")
)

tm.assert_frame_equal(df, expected)

Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -974,7 +974,8 @@ def test_timedelta(self):

frame = DataFrame([timedelta(23), timedelta(seconds=5)])
assert frame[0].dtype == "timedelta64[ns]"
tm.assert_frame_equal(frame, read_json(frame.to_json()).apply(converter))
result = read_json(frame.to_json()).apply(converter)
tm.assert_frame_equal(frame.astype("m8[ms]"), result)

def test_timedelta2(self):
frame = DataFrame(
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/io/parser/test_parse_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def __custom_date_parser(time):
)
time = [41047, 41048, 41049, 41050, 41051]
time = pd.TimedeltaIndex([pd.to_timedelta(i, unit="s") for i in time], name="time")
time = time.astype("m8[s]")
expected = DataFrame(
{
"e": [-98573.7297, -98573.7299, -98573.7300, -98573.7299, -98573.7302],
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/reductions/test_stat_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,9 @@ def test_period_mean(self, box, freq):

@pytest.mark.parametrize("box", [Series, pd.Index, TimedeltaArray])
def test_td64_mean(self, box):
tdi = pd.TimedeltaIndex([0, 3, -2, -7, 1, 2, -1, 3, 5, -2, 4], unit="D")
tdi = pd.TimedeltaIndex(
[0, 3, -2, -7, 1, 2, -1, 3, 5, -2, 4], unit="D"
).as_unit("ns")

tdarr = tdi._data
obj = box(tdarr, copy=False)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/resample/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def test_resample_categorical_data_with_timedeltaindex():
result = df.resample("10s").agg(lambda x: (x.value_counts().index[0]))
expected = DataFrame(
{"Group_obj": ["A", "A"], "Group": ["A", "A"]},
index=pd.TimedeltaIndex([0, 10], unit="s", freq="10s"),
index=pd.TimedeltaIndex([0, 10], unit="s", freq="10s").astype("m8[ns]"),
)
expected = expected.reindex(["Group_obj", "Group"], axis=1)
expected["Group"] = expected["Group_obj"]
Expand Down
8 changes: 7 additions & 1 deletion pandas/tests/scalar/timedelta/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,13 @@ def test_timedelta_pass_td_and_kwargs_raises():
[
(Timedelta, "10s", "ms", (ValueError, "unit must not be specified")),
(to_timedelta, "10s", "ms", (ValueError, "unit must not be specified")),
(to_timedelta, ["1", 2, 3], "s", (ValueError, "unit must not be specified")),
pytest.param(
to_timedelta,
["1", 2, 3],
"s",
(ValueError, "unit must not be specified"),
marks=pytest.mark.xfail(reason="Reconsidering API"),
),
],
)
def test_string_with_unit(constructor, value, unit, expectation):
Expand Down
19 changes: 14 additions & 5 deletions pandas/tests/scalar/timedelta/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,21 +568,30 @@ def test_nat_converters(self):
def test_unit_parser(self, unit, np_unit, wrapper):
# validate all units, GH 6855, GH 21762
# array-likes
if np_unit in ["s", "ms", "us", "ns"]:
# Supported unit, we retain
pd_unit = np_unit
else:
# closest supported unit
pd_unit = "s"
expected = TimedeltaIndex(
[np.timedelta64(i, np_unit) for i in np.arange(5).tolist()],
dtype="m8[ns]",
dtype=f"m8[{pd_unit}]",
)
# TODO(2.0): the desired output dtype may have non-nano resolution
result = to_timedelta(wrapper(range(5)), unit=unit)
tm.assert_index_equal(result, expected)
if wrapper is list:
# TODO: should not depend on this -> need inference in array_to_timedelta64
tm.assert_index_equal(result, expected.astype("m8[ns]"))
else:
tm.assert_index_equal(result, expected)
result = TimedeltaIndex(wrapper(range(5)), unit=unit)
tm.assert_index_equal(result, expected)

str_repr = [f"{x}{unit}" for x in np.arange(5)]
result = to_timedelta(wrapper(str_repr))
tm.assert_index_equal(result, expected)
tm.assert_index_equal(result, expected.astype("m8[ns]", copy=False))
result = to_timedelta(wrapper(str_repr))
tm.assert_index_equal(result, expected)
tm.assert_index_equal(result, expected.astype("m8[ns]", copy=False))

# scalar
expected = Timedelta(np.timedelta64(2, np_unit).astype("timedelta64[ns]"))
Expand Down
8 changes: 7 additions & 1 deletion pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1930,6 +1930,8 @@ def test_to_datetime_unit(self, dtype):
expected = Series(
[Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)]
)
if dtype is int:
expected = expected.dt.as_unit("s")
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("null", [iNaT, np.nan])
Expand All @@ -1941,6 +1943,8 @@ def test_to_datetime_unit_with_nulls(self, null):
[Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)]
+ [NaT]
)
if null is iNaT:
expected = expected.dt.as_unit("s")
tm.assert_series_equal(result, expected)

def test_to_datetime_unit_fractional_seconds(self):
Expand Down Expand Up @@ -3249,8 +3253,10 @@ def test_invalid_origin(self, unit):
to_datetime("2005-01-01", origin="1960-01-01", unit=unit)

def test_epoch(self, units, epochs, epoch_1960, units_from_epochs):
exp_unit = units if units in ["ns", "us", "ms", "s"] else "s"
expected = Series(
[pd.Timedelta(x, unit=units) + epoch_1960 for x in units_from_epochs]
[pd.Timedelta(x, unit=units) + epoch_1960 for x in units_from_epochs],
dtype=f"M8[{exp_unit}]",
)

result = Series(to_datetime(units_from_epochs, unit=units, origin=epochs))
Expand Down
Loading