Skip to content

ENH: Align Timedelta fractional Seconds (+ replace) #58928

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 11 commits into from
Closed
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/timedeltas.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ cpdef int64_t delta_to_nanoseconds(
delta, NPY_DATETIMEUNIT reso=*, bint round_ok=*
) except? -1
cdef convert_to_timedelta64(object ts, str unit)
cdef create_timedelta_from_parts(int64_t days=*, int64_t hours=*, int64_t minutes=*, int64_t seconds=*,
int64_t milliseconds=*, int64_t microseconds=*, int64_t nanoseconds=*)
cdef bint is_any_td_scalar(object obj)


Expand Down
11 changes: 11 additions & 0 deletions pandas/_libs/tslibs/timedeltas.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,17 @@ class Timedelta(timedelta):
@property
def microseconds(self) -> int: ...
def total_seconds(self) -> float: ...
def replace(
self,
days: int | None = None,
seconds: int | None = None,
microseconds: int | None = None,
milliseconds: int | None = None,
minutes: int | None = None,
hours: int | None = None,
weeks: int | None = None,
nanoseconds: int | None = None,
) -> Timedelta: ...
def to_pytimedelta(self) -> timedelta: ...
def to_timedelta64(self) -> np.timedelta64: ...
@property
Expand Down
120 changes: 120 additions & 0 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,25 @@ cdef convert_to_timedelta64(object ts, str unit):
raise TypeError(f"Invalid type for timedelta scalar: {type(ts)}")
return ts.astype("timedelta64[ns]")

cdef create_timedelta_from_parts(
int64_t days=0, int64_t hours=0, int64_t minutes=0, int64_t seconds=0,
int64_t milliseconds=0, int64_t microseconds=0, int64_t nanoseconds=0):
"""
Convenience routine to construct a Timedelta from its parts
"""

# Get total Timedelta time in nanoseconds
total_nanoseconds = (
days * 24 * 3600 * 1_000_000_000 +
hours * 3600 * 1_000_000_000 +
minutes * 60 * 1_000_000_000 +
seconds * 1_000_000_000 +
milliseconds * 1_000_000 +
microseconds * 1_000 +
nanoseconds
)

return _timedelta_from_value_and_reso(Timedelta, total_nanoseconds, NPY_FR_ns)

cdef _maybe_cast_from_unit(ts, str unit):
# caller is responsible for checking
Expand Down Expand Up @@ -1189,6 +1208,107 @@ cdef class _Timedelta(timedelta):
# TODO: add nanos/1e9?
return self.days * 24 * 3600 + self.seconds + self.microseconds / 1_000_000

def replace(
self,
days: int = None,
hours: int = None,
minutes: int = None,
seconds: int = None,
milliseconds: int = None,
microseconds: int = None,
nanoseconds: int = None,
):
"""
Return a Timedelta with new specified fields replacing the corresponding
fields of the current Timedelta.

Parameters
----------
days : int, optional
hours : int, optional
minutes : int, optional
seconds : int, optional
milliseconds : int, optional
microseconds : int, optional
nanoseconds : int, optional

Returns
-------
Timedelta
New Timedelta with specified fields replaced.

Examples
--------
>>> td = pd.Timedelta(days=1, hours=5, minutes=45)
>>> td.replace(hours=10)
Timedelta('1 days 10:45:00')
"""
# Validate integer inputs
def validate(k, v):
""" validate integers """
if not is_integer_object(v):
raise ValueError(
f"value must be an integer, received {type(v)} for {k}"
)
return v

self._ensure_components()

current_days = self._d
current_hours = self._h
current_minutes = self._m
current_seconds = self._s
current_milliseconds = self._ms
current_microseconds = self._us
current_nanoseconds = self._ns

# Replace specified components, keep existing values for unspecified components
if days is not None:
days = validate("days", days)
else:
days = current_days

if hours is not None:
hours = validate("hours", hours)
else:
hours = current_hours

if minutes is not None:
minutes = validate("minutes", minutes)
else:
minutes = current_minutes

if seconds is not None:
seconds = validate("seconds", seconds)
else:
seconds = current_seconds

if milliseconds is not None:
milliseconds = validate("milliseconds", milliseconds)
else:
milliseconds = current_milliseconds

if microseconds is not None:
microseconds = validate("microseconds", microseconds)
else:
microseconds = current_microseconds

if nanoseconds is not None:
nanoseconds = validate("nanoseconds", nanoseconds)
else:
nanoseconds = current_nanoseconds

# Create new Timedelta from parts
return create_timedelta_from_parts(
days=days,
hours=hours,
minutes=minutes,
seconds=seconds,
milliseconds=milliseconds,
microseconds=microseconds,
nanoseconds=nanoseconds
)

@property
def unit(self) -> str:
"""
Expand Down
56 changes: 55 additions & 1 deletion pandas/core/indexes/accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,60 @@ def components(self) -> DataFrame:
def freq(self):
return self._get_values().inferred_freq

@property
def adjusted(self):
import pandas as pd

max_days_length = 0
max_seconds_length = 0

for td in self._get_values():
days_length = len(str(td.components.days))
if days_length > max_days_length:
max_days_length = days_length

total_ns = (
td.components.seconds * 1000000000 +
td.components.milliseconds * 1000000 +
td.components.microseconds * 1000 +
td.components.nanoseconds
)
time_str = (
f"{td.components.seconds:02d}."
f"{td.components.milliseconds:03d}"
f"{td.components.microseconds:03d}"
f"{td.components.nanoseconds:03d}"
)
if len(time_str) > max_seconds_length:
max_seconds_length = len(time_str)

formatted_td = []
for td in self._get_values():
days = td.components.days
hours = td.components.hours
minutes = td.components.minutes
seconds = td.components.seconds
milliseconds = td.components.milliseconds
microseconds = td.components.microseconds
nanoseconds = td.components.nanoseconds

# Construct combined time string with leading zeros
seconds_str = (
f"{seconds:02d}."
f"{milliseconds:03d}"
f"{microseconds:03d}"
f"{nanoseconds:03d}"
)
seconds_str = seconds_str.ljust(max_seconds_length, '0')

# Format timedelta string with aligned days and padded time components
formatted_td.append(
f"{days:>{max_days_length}} days "
f"{hours:02d}:{minutes:02d}:"
f"{seconds_str}"
)

return pd.Series(formatted_td)

@delegate_names(
delegate=PeriodArray, accessors=PeriodArray._datetimelike_ops, typ="property"
Expand Down Expand Up @@ -642,4 +696,4 @@ def __new__(cls, data: Series): # pyright: ignore[reportInconsistentConstructor
elif isinstance(data.dtype, PeriodDtype):
return PeriodProperties(data, orig)

raise AttributeError("Can only use .dt accessor with datetimelike values")
raise AttributeError("Can only use .dt accessor with datetimelike values")
47 changes: 47 additions & 0 deletions pandas/tests/arrays/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,53 @@ def test_total_seconds_nanoseconds(self):
result = (end_time - start_time).dt.total_seconds().values
assert result == expected

def test_adjusted_timedelta(self):
cases = [
pd.Series(
pd.timedelta_range("1 day", periods=3),
name="xxx"
),
pd.Series(
pd.timedelta_range("1 day 01:23:45", periods=3, freq="s"),
name="xxx"
),
pd.Series(
pd.timedelta_range("2 days 01:23:45.012345", periods=3, freq="ms"),
name="xxx"
),
pd.Series(
[pd.Timedelta('2 days 4 min 3 us 42 ns'),
pd.Timedelta('1 days 23 hours 59 min 59 sec 999 ms 999 us 999 ns'),
pd.Timedelta('10 days')],
name="xxx"
)
]

expected_results = [
['1 days 00:00:00.000000000', '2 days 00:00:00.000000000', '3 days 00:00:00.000000000'],
['1 days 01:23:45.000000000', '1 days 01:23:46.000000000', '1 days 01:23:47.000000000'],
['2 days 01:23:45.012345000', '2 days 01:23:45.013345000', '2 days 01:23:45.014345000'],
[' 2 days 00:04:00.000003042', ' 1 days 23:59:59.999999999', '10 days 00:00:00.000000000']
]

for ser, expected in zip(cases, expected_results):
result_adjusted = ser.dt.adjusted
expected_adjusted = pd.Series(expected, name="xxx")
result_adjusted.name = "xxx"
tm.assert_series_equal(result_adjusted, expected_adjusted)

def test_adjusted_single_timedelta(self):
tda = pd.Series([pd.Timedelta('2 days 4 min 3 us 42 ns'),
pd.Timedelta('1 days 23 hours 59 min 59 sec 999 ms 999 us 999 ns'),
pd.Timedelta('10 days')])

expected = pd.Series([' 2 days 00:04:00.000003042',
' 1 days 23:59:59.999999999',
'10 days 00:00:00.000000000'])

result = tda.dt.adjusted
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
"nat", [np.datetime64("NaT", "ns"), np.datetime64("NaT", "us")]
)
Expand Down
75 changes: 75 additions & 0 deletions pandas/tests/scalar/timedelta/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,81 @@ def test_resolution_deprecated(self):
result = Timedelta.resolution
assert result == Timedelta(nanoseconds=1)

def test_replace_timedelta(self):
td = Timedelta(days=1, hours=5, minutes=45)

# Test replacing hours
replaced = td.replace(hours=10)
expected = Timedelta(days=1, hours=10, minutes=45)
assert replaced == expected, f"Expected {expected}, but got {replaced}"

# Test replacing days and hours
replaced = td.replace(days=2, hours=3)
expected = Timedelta(days=2, hours=3, minutes=45)
assert replaced == expected, f"Expected {expected}, but got {replaced}"

# Test replacing minutes
replaced = td.replace(minutes=30)
expected = Timedelta(days=1, hours=5, minutes=30)
assert replaced == expected, f"Expected {expected}, but got {replaced}"

# Test replacing multiple fields
replaced = td.replace(hours=8, minutes=15, seconds=30)
expected = Timedelta(days=1, hours=8, minutes=15, seconds=30)
assert replaced == expected, f"Expected {expected}, but got {replaced}"

# Test replacing nanoseconds
td_with_nanos = Timedelta(days=1, hours=5, minutes=45, nanoseconds=500)
replaced = td_with_nanos.replace(nanoseconds=1000)
expected = Timedelta(days=1, hours=5, minutes=45, nanoseconds=1000)
assert replaced == expected, f"Expected {expected}, but got {replaced}"

# Test replacing with zero values
replaced = td.replace(days=0, hours=0, minutes=0, seconds=0, milliseconds=0,
microseconds=0, nanoseconds=0)
expected = Timedelta(0)
assert replaced == expected, f"Expected {expected}, but got {replaced}"

# Test invalid inputs
with pytest.raises(TypeError):
td.replace(hours='10')

with pytest.raises(TypeError):
td.replace(days=2.5)

def test_replace_single_field(self):
td = Timedelta(days=1, hours=5, minutes=45, seconds=30, milliseconds=250,
microseconds=125, nanoseconds=60)

# Test replacing only days
replaced = td.replace(days=2)
expected = Timedelta(days=2, hours=5, minutes=45, seconds=30, milliseconds=250,
microseconds=125, nanoseconds=60)
assert replaced == expected, f"Expected {expected}, but got {replaced}"

# Test replacing only seconds
replaced = td.replace(seconds=45)
expected = Timedelta(days=1, hours=5, minutes=45, seconds=45, milliseconds=250,
microseconds=125, nanoseconds=60)
assert replaced == expected, f"Expected {expected}, but got {replaced}"

# Test replacing only milliseconds
replaced = td.replace(milliseconds=500)
expected = Timedelta(days=1, hours=5, minutes=45, seconds=30, milliseconds=500,
microseconds=125, nanoseconds=60)
assert replaced == expected, f"Expected {expected}, but got {replaced}"

# Test replacing only microseconds
replaced = td.replace(microseconds=300)
expected = Timedelta(days=1, hours=5, minutes=45, seconds=30, milliseconds=250,
microseconds=300, nanoseconds=60)
assert replaced == expected, f"Expected {expected}, but got {replaced}"

# Test replacing only nanoseconds
replaced = td.replace(nanoseconds=500)
expected = Timedelta(days=1, hours=5, minutes=45, seconds=30, milliseconds=250,
microseconds=125, nanoseconds=500)
assert replaced == expected, f"Expected {expected}, but got {replaced}"

@pytest.mark.parametrize(
"value, expected",
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/series/accessors/test_dt_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,10 @@ def test_dt_namespace_accessor_timedelta(self):
assert isinstance(result, Series)
assert result.dtype == "float64"

result = ser.dt.adjusted
assert isinstance(result, Series)
assert result.dtype == object

freq_result = ser.dt.freq
assert freq_result == TimedeltaIndex(ser.values, freq="infer").freq

Expand Down
Loading