Skip to content

BUG: Series([ints], dtype=td[non-nano]) not respecting unit #52463

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.0.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Fixed regressions

Bug fixes
~~~~~~~~~
-
- Bug in :class:`Series` constructor when passing ``timedelta64`` dtype with non-nanosecond unit would not respect the unit (:issue:`48312`, :issue:`52457`)

.. ---------------------------------------------------------------------------
.. _whatsnew_201.other:
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,10 +213,12 @@ def _simple_new( # type: ignore[override]

@classmethod
def _from_sequence(cls, data, *, dtype=None, copy: bool = False) -> Self:
unit = None
if dtype:
dtype = _validate_td64_dtype(dtype)
unit = np.datetime_data(dtype)[0]

data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=None)
data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit)
freq, _ = dtl.validate_inferred_freq(None, inferred_freq, False)
freq = cast("Tick | None", freq)

Expand Down
35 changes: 8 additions & 27 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,15 @@


class TestSeriesConstructors:
def test_from_ints_with_non_nano_dt64_dtype(self, index_or_series):
@pytest.mark.parametrize("dtype", ["m8", "m8"])
def test_from_ints_with_non_nano_dt64_dtype(self, index_or_series, dtype):
values = np.arange(10)

res = index_or_series(values, dtype="M8[s]")
expected = index_or_series(values.astype("M8[s]"))
res = index_or_series(values, dtype=f"{dtype}[s]")
expected = index_or_series(values.astype(f"{dtype}[s]"))
tm.assert_equal(res, expected)

res = index_or_series(list(values), dtype="M8[s]")
res = index_or_series(list(values), dtype=f"{dtype}[s]")
tm.assert_equal(res, expected)

def test_from_na_value_and_interval_of_datetime_dtype(self):
Expand Down Expand Up @@ -1525,12 +1526,7 @@ def test_constructor_dtype_timedelta64(self):
td.astype("int32")

# this is an invalid casting
msg = "|".join(
[
"Could not convert object to NumPy timedelta",
"Could not convert 'foo' to NumPy timedelta",
]
)
msg = "unit must not be specified if the input contains a str"
with pytest.raises(ValueError, match=msg):
Series([timedelta(days=1), "foo"], dtype="m8[ns]")

Expand Down Expand Up @@ -1958,21 +1954,6 @@ def test_constructor_dtype_timedelta_ns_s(self):
expected = Series([1000000, 200000, 3000000], dtype="timedelta64[s]")
tm.assert_series_equal(result, expected)

@pytest.mark.xfail(
reason="Not clear what the correct expected behavior should be with "
"integers now that we support non-nano. ATM (2022-10-08) we treat ints "
"as nanoseconds, then cast to the requested dtype. xref #48312"
)
def test_constructor_dtype_timedelta_ns_s_astype_int64(self):
# GH#35465
result = Series([1000000, 200000, 3000000], dtype="timedelta64[ns]").astype(
"int64"
)
expected = Series([1000000, 200000, 3000000], dtype="timedelta64[s]").astype(
"int64"
)
tm.assert_series_equal(result, expected)

@pytest.mark.filterwarnings(
"ignore:elementwise comparison failed:DeprecationWarning"
)
Expand Down Expand Up @@ -2096,15 +2077,15 @@ def test_constructor_no_pandas_array(self, using_array_manager):

@td.skip_array_manager_invalid_test
def test_from_array(self):
result = Series(pd.array(["1H", "2H"], dtype="timedelta64[ns]"))
result = Series(pd.array([1, 2], dtype="timedelta64[ns]"))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do these need to change?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This goes through array_to_timedelta64 seems not to handle string arguments if unit is specified. I suppose this makes sense since "1H" conflicts in unit with ns in the dtype

assert result._mgr.blocks[0].is_extension is False

result = Series(pd.array(["2015"], dtype="datetime64[ns]"))
assert result._mgr.blocks[0].is_extension is False

@td.skip_array_manager_invalid_test
def test_from_list_dtype(self):
result = Series(["1H", "2H"], dtype="timedelta64[ns]")
result = Series([1, 2], dtype="timedelta64[ns]")
assert result._mgr.blocks[0].is_extension is False

result = Series(["2015"], dtype="datetime64[ns]")
Expand Down