From c965b9af6537f30611245db4ec95d8c1697abfae Mon Sep 17 00:00:00 2001 From: Sofia Simas Date: Thu, 23 May 2024 18:43:19 +0100 Subject: [PATCH 01/11] feat: initial implementation of replace --- pandas/_libs/tslibs/timedeltas.pxd | 2 + pandas/_libs/tslibs/timedeltas.pyi | 13 ++++- pandas/_libs/tslibs/timedeltas.pyx | 94 +++++++++++++++++++++++++++++- pandas/core/arrays/timedeltas.py | 2 + 4 files changed, 109 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index f3473e46b6699..9237435fd7529 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -10,6 +10,8 @@ cpdef int64_t delta_to_nanoseconds( delta, NPY_DATETIMEUNIT reso=*, bint round_ok=* ) except? -1 cdef convert_to_timedelta64(object ts, str unit) +cdef create_timedelta_from_parts(int days=*, int hours=*, int minutes=*, int seconds=*, + int milliseconds=*, int microseconds=*, int nanoseconds=*) cdef bint is_any_td_scalar(object obj) diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi index 24ec6c8891a89..44e014b6ba7b4 100644 --- a/pandas/_libs/tslibs/timedeltas.pyi +++ b/pandas/_libs/tslibs/timedeltas.pyi @@ -108,13 +108,24 @@ class Timedelta(timedelta): ) -> _S | NaTType: ... @classmethod def _from_value_and_reso(cls, value: np.int64, reso: int) -> Timedelta: ... + def replace( + self, + days: int | None = None, + seconds: int | None = None, + microseconds: int | None = None, + milliseconds: int | None = None, + minutes: int | None = None, + hours: int | None = None, + weeks: int | None = None, + nanoseconds: int | None = None, + ) -> Timedelta: ... @property def days(self) -> int: ... @property def seconds(self) -> int: ... @property def microseconds(self) -> int: ... - def total_seconds(self) -> float: ... + def total_seconds(self) -> float: ... def to_pytimedelta(self) -> timedelta: ... def to_timedelta64(self) -> np.timedelta64: ... @property diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 9078fd4116899..367c0756b6072 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -151,7 +151,6 @@ _no_input = object() # ---------------------------------------------------------------------- # API - @cython.boundscheck(False) @cython.wraparound(False) def ints_to_pytimedelta(ndarray m8values, box=False): @@ -370,6 +369,25 @@ cdef convert_to_timedelta64(object ts, str unit): return ts.astype("timedelta64[ns]") +cdef create_timedelta_from_parts( + int days=0, int hours=0, int minutes=0, int seconds=0, + int milliseconds=0, int microseconds=0, int nanoseconds=0): + """ + Convenience routine to construct a Timedelta from its parts + """ + cdef int64_t total_nanoseconds = ( + days * 24 * 3600 * 1_000_000_000 + + hours * 3600 * 1_000_000_000 + + minutes * 60 * 1_000_000_000 + + seconds * 1_000_000_000 + + milliseconds * 1_000_000 + + microseconds * 1_000 + + nanoseconds + ) + + return _timedelta_from_value_and_reso(Timedelta, total_nanoseconds, NPY_FR_ns) + + cdef _maybe_cast_from_unit(ts, str unit): # caller is responsible for checking # assert unit not in ["Y", "y", "M"] @@ -2083,6 +2101,80 @@ class Timedelta(_Timedelta): """ return self._round(freq, RoundTo.PLUS_INFTY) + def replace( + self, + days: int = None, + hours: int = None, + minutes: int = None, + seconds: int = None, + milliseconds: int = None, + microseconds: int = None, + nanoseconds: int = None, + ): + """ + Return a Timedelta with new specified fields replacing the corresponding + fields of the current Timedelta. + + Parameters + ---------- + days : int, optional + hours : int, optional + minutes : int, optional + seconds : int, optional + milliseconds : int, optional + microseconds : int, optional + nanoseconds : int, optional + + Returns + ------- + Timedelta + New Timedelta with specified fields replaced. + + Examples + -------- + >>> td = pd.Timedelta(days=1, hours=5, minutes=45) + >>> td.replace(hours=10) + Timedelta('1 days 10:45:00') + """ + # Validate integer inputs + def validate(k, v): + """ validate integers """ + print(k) + print("\nENTROU\n") + if not is_integer_object(v): + raise ValueError( + f"value must be an integer, received {type(v)} for {k}" + ) + return v + + current_days = self._d + current_hours = self._h + current_minutes = self._m + current_seconds = self._s + current_milliseconds = self._ms + current_microseconds = self._us + current_nanoseconds = self._ns + + # Replace specified components, keep existing values for unspecified components + days = validate("days", days) if days is not None else current_days + hours = validate("hours", hours) if hours is not None else current_hours + minutes = validate("minutes", minutes) if minutes is not None else current_minutes + seconds = validate("seconds", seconds) if seconds is not None else current_seconds + milliseconds = validate("milliseconds", milliseconds) if milliseconds is not None else current_milliseconds + microseconds = validate("microseconds", microseconds) if microseconds is not None else current_microseconds + nanoseconds = validate("nanoseconds", nanoseconds) if nanoseconds is not None else current_nanoseconds + + # Create new Timedelta from parts + return create_timedelta_from_parts( + days=days, + hours=hours, + minutes=minutes, + seconds=seconds, + milliseconds=milliseconds, + microseconds=microseconds, + nanoseconds=nanoseconds + ) + # ---------------------------------------------------------------- # Arithmetic Methods # TODO: Can some of these be defined in the cython class? diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index c41e078095feb..ce5b4d9c02814 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -776,6 +776,8 @@ def total_seconds(self) -> npt.NDArray[np.float64]: """ pps = periods_per_second(self._creso) return self._maybe_mask_results(self.asi8 / pps, fill_value=None) + + def to_pytimedelta(self) -> npt.NDArray[np.object_]: """ From 06245fc53b3134b50ed510df180ffec22bc86666 Mon Sep 17 00:00:00 2001 From: Mafalda Matias Date: Sat, 25 May 2024 14:35:05 +0000 Subject: [PATCH 02/11] fix: changed replace to use int64 --- pandas/_libs/tslibs/timedeltas.pxd | 4 ++-- pandas/_libs/tslibs/timedeltas.pyi | 2 +- pandas/_libs/tslibs/timedeltas.pyx | 13 +++++++------ pandas/core/arrays/timedeltas.py | 2 -- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index 9237435fd7529..a5ed8d2e814a9 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -10,8 +10,8 @@ cpdef int64_t delta_to_nanoseconds( delta, NPY_DATETIMEUNIT reso=*, bint round_ok=* ) except? -1 cdef convert_to_timedelta64(object ts, str unit) -cdef create_timedelta_from_parts(int days=*, int hours=*, int minutes=*, int seconds=*, - int milliseconds=*, int microseconds=*, int nanoseconds=*) +cdef create_timedelta_from_parts(int64_t days=*, int64_t hours=*, int64_t minutes=*, int64_t seconds=*, + int64_t milliseconds=*, int64_t microseconds=*, int64_t nanoseconds=*) cdef bint is_any_td_scalar(object obj) diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi index 44e014b6ba7b4..553e5266d07d7 100644 --- a/pandas/_libs/tslibs/timedeltas.pyi +++ b/pandas/_libs/tslibs/timedeltas.pyi @@ -125,7 +125,7 @@ class Timedelta(timedelta): def seconds(self) -> int: ... @property def microseconds(self) -> int: ... - def total_seconds(self) -> float: ... + def total_seconds(self) -> float: ... def to_pytimedelta(self) -> timedelta: ... def to_timedelta64(self) -> np.timedelta64: ... @property diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 367c0756b6072..a4c4d2021903f 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -151,6 +151,7 @@ _no_input = object() # ---------------------------------------------------------------------- # API + @cython.boundscheck(False) @cython.wraparound(False) def ints_to_pytimedelta(ndarray m8values, box=False): @@ -370,12 +371,13 @@ cdef convert_to_timedelta64(object ts, str unit): cdef create_timedelta_from_parts( - int days=0, int hours=0, int minutes=0, int seconds=0, - int milliseconds=0, int microseconds=0, int nanoseconds=0): + int64_t days=0, int64_t hours=0, int64_t minutes=0, int64_t seconds=0, + int64_t milliseconds=0, int64_t microseconds=0, int64_t nanoseconds=0): """ Convenience routine to construct a Timedelta from its parts """ - cdef int64_t total_nanoseconds = ( + + total_nanoseconds = ( days * 24 * 3600 * 1_000_000_000 + hours * 3600 * 1_000_000_000 + minutes * 60 * 1_000_000_000 + @@ -384,7 +386,7 @@ cdef create_timedelta_from_parts( microseconds * 1_000 + nanoseconds ) - + return _timedelta_from_value_and_reso(Timedelta, total_nanoseconds, NPY_FR_ns) @@ -979,6 +981,7 @@ cdef _timedelta_from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso): _Timedelta td_base assert value != NPY_NAT + # For millisecond and second resos, we cannot actually pass int(value) because # many cases would fall outside of the pytimedelta implementation bounds. # We pass 0 instead, and override seconds, microseconds, days. @@ -2139,8 +2142,6 @@ class Timedelta(_Timedelta): # Validate integer inputs def validate(k, v): """ validate integers """ - print(k) - print("\nENTROU\n") if not is_integer_object(v): raise ValueError( f"value must be an integer, received {type(v)} for {k}" diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index ce5b4d9c02814..49501d7aadc01 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -777,8 +777,6 @@ def total_seconds(self) -> npt.NDArray[np.float64]: pps = periods_per_second(self._creso) return self._maybe_mask_results(self.asi8 / pps, fill_value=None) - - def to_pytimedelta(self) -> npt.NDArray[np.object_]: """ Return an ndarray of datetime.timedelta objects. From 9350dd7fb9650bc3c2dcc60bfbb54468c9ff3ac8 Mon Sep 17 00:00:00 2001 From: Mafalda Matias Date: Sat, 25 May 2024 18:33:08 +0000 Subject: [PATCH 03/11] feat: implement adjusted for timedeltas --- pandas/core/indexes/accessors.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 2bb234e174563..e176c595b3993 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -12,6 +12,8 @@ import numpy as np +import pandas as pd + from pandas._libs import lib from pandas.core.dtypes.common import ( @@ -500,7 +502,35 @@ def components(self) -> DataFrame: @property def freq(self): return self._get_values().inferred_freq + + import pandas as pd + + import pandas as pd + @property + def adjusted(self): + + max_days_length = 1 + max_nanoseconds_lenght = 2 + + for td in self._get_values(): + aux = len(str(td.components.days)) + if(aux > max_days_length): + max_days_length = aux + + aux = td.components.seconds*1000000000 + td.components.milliseconds * 1000000 + td.components.microseconds * 1000 + td.components.nanoseconds + if(len(str(aux)) > max_nanoseconds_lenght): + max_nanoseconds_lenght = aux + + formatted_td = [] + for td in self._get_values(): + days = td.components.days + hours = td.components.hours + minutes = td.components.minutes + seconds = td.components.seconds*1000000000 + td.components.milliseconds * 1000000 + td.components.microseconds * 1000 + td.components.nanoseconds + formatted_td.append(f"{days:>{max_days_length}} days {hours:02d}:{minutes:02d}:{seconds:09d}") + + return pd.Series(formatted_td) @delegate_names( delegate=PeriodArray, accessors=PeriodArray._datetimelike_ops, typ="property" @@ -642,4 +672,4 @@ def __new__(cls, data: Series): # pyright: ignore[reportInconsistentConstructor elif isinstance(data.dtype, PeriodDtype): return PeriodProperties(data, orig) - raise AttributeError("Can only use .dt accessor with datetimelike values") + raise AttributeError("Can only use .dt accessor with datetimelike values") \ No newline at end of file From f45c0aa76054f4e77c2d061d31ba734f721c3836 Mon Sep 17 00:00:00 2001 From: Mafalda Matias Date: Sun, 26 May 2024 20:16:22 +0000 Subject: [PATCH 04/11] fix: dt.adjusted --- pandas/core/indexes/accessors.py | 45 +++++++++++++++++++------------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index e176c595b3993..0205a4c111b5f 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -12,8 +12,6 @@ import numpy as np -import pandas as pd - from pandas._libs import lib from pandas.core.dtypes.common import ( @@ -502,36 +500,47 @@ def components(self) -> DataFrame: @property def freq(self): return self._get_values().inferred_freq - - import pandas as pd - - import pandas as pd @property def adjusted(self): + import pandas as pd - max_days_length = 1 - max_nanoseconds_lenght = 2 + max_days_length = 0 + max_time_length = 0 for td in self._get_values(): - aux = len(str(td.components.days)) - if(aux > max_days_length): - max_days_length = aux - - aux = td.components.seconds*1000000000 + td.components.milliseconds * 1000000 + td.components.microseconds * 1000 + td.components.nanoseconds - if(len(str(aux)) > max_nanoseconds_lenght): - max_nanoseconds_lenght = aux - + days_length = len(str(td.components.days)) + if days_length > max_days_length: + max_days_length = days_length + + total_ns = ( + td.components.seconds * 1000000000 + + td.components.milliseconds * 1000000 + + td.components.microseconds * 1000 + + td.components.nanoseconds + ) + time_str = f"{td.components.seconds:02d}.{td.components.milliseconds:03d}{td.components.microseconds:03d}{td.components.nanoseconds:03d}" + if len(time_str) > max_time_length: + max_time_length = len(time_str) + formatted_td = [] for td in self._get_values(): days = td.components.days hours = td.components.hours minutes = td.components.minutes - seconds = td.components.seconds*1000000000 + td.components.milliseconds * 1000000 + td.components.microseconds * 1000 + td.components.nanoseconds - formatted_td.append(f"{days:>{max_days_length}} days {hours:02d}:{minutes:02d}:{seconds:09d}") + seconds = td.components.seconds + milliseconds = td.components.milliseconds + microseconds = td.components.microseconds + nanoseconds = td.components.nanoseconds + + combined_time_str = f"{seconds:02d}.{milliseconds:03d}{microseconds:03d}{nanoseconds:03d}" + combined_time_str = combined_time_str.ljust(max_time_length, '0') + + formatted_td.append(f"{days:>{max_days_length}} days {hours:02d}:{minutes:02d}:{combined_time_str}") return pd.Series(formatted_td) + @delegate_names( delegate=PeriodArray, accessors=PeriodArray._datetimelike_ops, typ="property" ) From 25bb8181ca4fab949c08fcdcb3e8407a8a2b5327 Mon Sep 17 00:00:00 2001 From: Mafalda Matias Date: Sun, 26 May 2024 21:22:06 +0000 Subject: [PATCH 05/11] fix: replace --- pandas/_libs/tslibs/timedeltas.pyi | 14 +-- pandas/_libs/tslibs/timedeltas.pyx | 147 +++++++++++++++-------------- 2 files changed, 82 insertions(+), 79 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi index 553e5266d07d7..ec0cf674eeca4 100644 --- a/pandas/_libs/tslibs/timedeltas.pyi +++ b/pandas/_libs/tslibs/timedeltas.pyi @@ -108,6 +108,13 @@ class Timedelta(timedelta): ) -> _S | NaTType: ... @classmethod def _from_value_and_reso(cls, value: np.int64, reso: int) -> Timedelta: ... + @property + def days(self) -> int: ... + @property + def seconds(self) -> int: ... + @property + def microseconds(self) -> int: ... + def total_seconds(self) -> float: ... def replace( self, days: int | None = None, @@ -119,13 +126,6 @@ class Timedelta(timedelta): weeks: int | None = None, nanoseconds: int | None = None, ) -> Timedelta: ... - @property - def days(self) -> int: ... - @property - def seconds(self) -> int: ... - @property - def microseconds(self) -> int: ... - def total_seconds(self) -> float: ... def to_pytimedelta(self) -> timedelta: ... def to_timedelta64(self) -> np.timedelta64: ... @property diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index a4c4d2021903f..dec72c0d6de0d 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1210,6 +1210,81 @@ cdef class _Timedelta(timedelta): # TODO: add nanos/1e9? return self.days * 24 * 3600 + self.seconds + self.microseconds / 1_000_000 + def replace( + self, + days: int = None, + hours: int = None, + minutes: int = None, + seconds: int = None, + milliseconds: int = None, + microseconds: int = None, + nanoseconds: int = None, + ): + """ + Return a Timedelta with new specified fields replacing the corresponding + fields of the current Timedelta. + + Parameters + ---------- + days : int, optional + hours : int, optional + minutes : int, optional + seconds : int, optional + milliseconds : int, optional + microseconds : int, optional + nanoseconds : int, optional + + Returns + ------- + Timedelta + New Timedelta with specified fields replaced. + + Examples + -------- + >>> td = pd.Timedelta(days=1, hours=5, minutes=45) + >>> td.replace(hours=10) + Timedelta('1 days 10:45:00') + """ + # Validate integer inputs + def validate(k, v): + """ validate integers """ + if not is_integer_object(v): + raise ValueError( + f"value must be an integer, received {type(v)} for {k}" + ) + return v + + self._ensure_components() + + current_days = self._d + current_hours = self._h + current_minutes = self._m + current_seconds = self._s + current_milliseconds = self._ms + current_microseconds = self._us + current_nanoseconds = self._ns + + # Replace specified components, keep existing values for unspecified components + days = validate("days", days) if days is not None else current_days + hours = validate("hours", hours) if hours is not None else current_hours + minutes = validate("minutes", minutes) if minutes is not None else current_minutes + seconds = validate("seconds", seconds) if seconds is not None else current_seconds + milliseconds = validate("milliseconds", milliseconds) if milliseconds is not None else current_milliseconds + microseconds = validate("microseconds", microseconds) if microseconds is not None else current_microseconds + nanoseconds = validate("nanoseconds", nanoseconds) if nanoseconds is not None else current_nanoseconds + + # Create new Timedelta from parts + return create_timedelta_from_parts( + days=days, + hours=hours, + minutes=minutes, + seconds=seconds, + milliseconds=milliseconds, + microseconds=microseconds, + nanoseconds=nanoseconds + ) + + @property def unit(self) -> str: """ @@ -2104,78 +2179,6 @@ class Timedelta(_Timedelta): """ return self._round(freq, RoundTo.PLUS_INFTY) - def replace( - self, - days: int = None, - hours: int = None, - minutes: int = None, - seconds: int = None, - milliseconds: int = None, - microseconds: int = None, - nanoseconds: int = None, - ): - """ - Return a Timedelta with new specified fields replacing the corresponding - fields of the current Timedelta. - - Parameters - ---------- - days : int, optional - hours : int, optional - minutes : int, optional - seconds : int, optional - milliseconds : int, optional - microseconds : int, optional - nanoseconds : int, optional - - Returns - ------- - Timedelta - New Timedelta with specified fields replaced. - - Examples - -------- - >>> td = pd.Timedelta(days=1, hours=5, minutes=45) - >>> td.replace(hours=10) - Timedelta('1 days 10:45:00') - """ - # Validate integer inputs - def validate(k, v): - """ validate integers """ - if not is_integer_object(v): - raise ValueError( - f"value must be an integer, received {type(v)} for {k}" - ) - return v - - current_days = self._d - current_hours = self._h - current_minutes = self._m - current_seconds = self._s - current_milliseconds = self._ms - current_microseconds = self._us - current_nanoseconds = self._ns - - # Replace specified components, keep existing values for unspecified components - days = validate("days", days) if days is not None else current_days - hours = validate("hours", hours) if hours is not None else current_hours - minutes = validate("minutes", minutes) if minutes is not None else current_minutes - seconds = validate("seconds", seconds) if seconds is not None else current_seconds - milliseconds = validate("milliseconds", milliseconds) if milliseconds is not None else current_milliseconds - microseconds = validate("microseconds", microseconds) if microseconds is not None else current_microseconds - nanoseconds = validate("nanoseconds", nanoseconds) if nanoseconds is not None else current_nanoseconds - - # Create new Timedelta from parts - return create_timedelta_from_parts( - days=days, - hours=hours, - minutes=minutes, - seconds=seconds, - milliseconds=milliseconds, - microseconds=microseconds, - nanoseconds=nanoseconds - ) - # ---------------------------------------------------------------- # Arithmetic Methods # TODO: Can some of these be defined in the cython class? From 39daeb121f3df7f03f7a6d16d6c81601f4374e13 Mon Sep 17 00:00:00 2001 From: Mafalda Matias Date: Sun, 26 May 2024 21:22:48 +0000 Subject: [PATCH 06/11] test: implement all tests --- pandas/tests/arrays/test_timedeltas.py | 47 ++++++++++++++ .../tests/scalar/timedelta/test_timedelta.py | 63 +++++++++++++++++++ .../series/accessors/test_dt_accessor.py | 4 ++ 3 files changed, 114 insertions(+) diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index bcc52f197ee51..e89fda313c8b1 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -78,6 +78,53 @@ def test_total_seconds_nanoseconds(self): result = (end_time - start_time).dt.total_seconds().values assert result == expected + def test_adjusted_timedelta(self): + cases = [ + pd.Series( + pd.timedelta_range("1 day", periods=3), + name="xxx" + ), + pd.Series( + pd.timedelta_range("1 day 01:23:45", periods=3, freq="s"), + name="xxx" + ), + pd.Series( + pd.timedelta_range("2 days 01:23:45.012345", periods=3, freq="ms"), + name="xxx" + ), + pd.Series( + [pd.Timedelta('2 days 4 min 3 us 42 ns'), + pd.Timedelta('1 days 23 hours 59 min 59 sec 999 ms 999 us 999 ns'), + pd.Timedelta('10 days')], + name="xxx" + ) + ] + + expected_results = [ + ['1 days 00:00:00.000000000', '2 days 00:00:00.000000000', '3 days 00:00:00.000000000'], + ['1 days 01:23:45.000000000', '1 days 01:23:46.000000000', '1 days 01:23:47.000000000'], + ['2 days 01:23:45.012345000', '2 days 01:23:45.013345000', '2 days 01:23:45.014345000'], + [' 2 days 00:04:00.000003042', ' 1 days 23:59:59.999999999', '10 days 00:00:00.000000000'] + ] + + for ser, expected in zip(cases, expected_results): + result_adjusted = ser.dt.adjusted + expected_adjusted = pd.Series(expected, name="xxx") + result_adjusted.name = "xxx" + tm.assert_series_equal(result_adjusted, expected_adjusted) + + def test_adjusted_single_timedelta(self): + tda = pd.Series([pd.Timedelta('2 days 4 min 3 us 42 ns'), + pd.Timedelta('1 days 23 hours 59 min 59 sec 999 ms 999 us 999 ns'), + pd.Timedelta('10 days')]) + + expected = pd.Series([' 2 days 00:04:00.000003042', + ' 1 days 23:59:59.999999999', + '10 days 00:00:00.000000000']) + + result = tda.dt.adjusted + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( "nat", [np.datetime64("NaT", "ns"), np.datetime64("NaT", "us")] ) diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 73b2da0f7dd50..3bf5d3dbead87 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -636,6 +636,69 @@ def test_resolution_deprecated(self): result = Timedelta.resolution assert result == Timedelta(nanoseconds=1) + def test_replace_timedelta(self): + td = Timedelta(days=1, hours=5, minutes=45) + + # Test replacing hours + replaced = td.replace(hours=10) + expected = Timedelta(days=1, hours=10, minutes=45) + assert replaced == expected, f"Expected {expected}, but got {replaced}" + + # Test replacing days and hours + replaced = td.replace(days=2, hours=3) + expected = Timedelta(days=2, hours=3, minutes=45) + assert replaced == expected, f"Expected {expected}, but got {replaced}" + + # Test replacing minutes + replaced = td.replace(minutes=30) + expected = Timedelta(days=1, hours=5, minutes=30) + assert replaced == expected, f"Expected {expected}, but got {replaced}" + + # Test replacing multiple fields + replaced = td.replace(hours=8, minutes=15, seconds=30) + expected = Timedelta(days=1, hours=8, minutes=15, seconds=30) + assert replaced == expected, f"Expected {expected}, but got {replaced}" + + # Test replacing nanoseconds + td_with_nanos = Timedelta(days=1, hours=5, minutes=45, nanoseconds=500) + replaced = td_with_nanos.replace(nanoseconds=1000) + expected = Timedelta(days=1, hours=5, minutes=45, nanoseconds=1000) + assert replaced == expected, f"Expected {expected}, but got {replaced}" + + # Test replacing with zero values + replaced = td.replace(days=0, hours=0, minutes=0, seconds=0, milliseconds=0, microseconds=0, nanoseconds=0) + expected = Timedelta(0) + assert replaced == expected, f"Expected {expected}, but got {replaced}" + + # Test invalid inputs + with pytest.raises(TypeError): + td.replace(hours='10') # invalid hours value + + with pytest.raises(TypeError): + td.replace(days=2.5) # invalid days value + + def test_replace_single_field(self): + td = Timedelta(days=1, hours=5, minutes=45, seconds=30, milliseconds=250, microseconds=125, nanoseconds=60) + + # Test replacing only seconds + replaced = td.replace(seconds=45) + expected = Timedelta(days=1, hours=5, minutes=45, seconds=45, milliseconds=250, microseconds=125, nanoseconds=60) + assert replaced == expected, f"Expected {expected}, but got {replaced}" + + # Test replacing only milliseconds + replaced = td.replace(milliseconds=500) + expected = Timedelta(days=1, hours=5, minutes=45, seconds=30, milliseconds=500, microseconds=125, nanoseconds=60) + assert replaced == expected, f"Expected {expected}, but got {replaced}" + + # Test replacing only microseconds + replaced = td.replace(microseconds=300) + expected = Timedelta(days=1, hours=5, minutes=45, seconds=30, milliseconds=250, microseconds=300, nanoseconds=60) + assert replaced == expected, f"Expected {expected}, but got {replaced}" + + # Test replacing only nanoseconds + replaced = td.replace(nanoseconds=500) + expected = Timedelta(days=1, hours=5, minutes=45, seconds=30, milliseconds=250, microseconds=125, nanoseconds=500) + assert replaced == expected, f"Expected {expected}, but got {replaced}" @pytest.mark.parametrize( "value, expected", diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 5f0057ac50b47..a04dd9ebb744d 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -200,6 +200,10 @@ def test_dt_namespace_accessor_timedelta(self): assert isinstance(result, Series) assert result.dtype == "float64" + result = ser.dt.adjusted + assert isinstance(result, Series) + assert result.dtype == object + freq_result = ser.dt.freq assert freq_result == TimedeltaIndex(ser.values, freq="infer").freq From 97d7db7a5d11f99eb3a59adf8f0b0cbb4932f867 Mon Sep 17 00:00:00 2001 From: Mafalda Matias Date: Sun, 26 May 2024 21:32:57 +0000 Subject: [PATCH 07/11] fix: formatting --- pandas/_libs/tslibs/timedeltas.pyx | 41 +++++++++++++++---- .../tests/scalar/timedelta/test_timedelta.py | 22 ++++++---- 2 files changed, 48 insertions(+), 15 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index dec72c0d6de0d..7aace63e19b78 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1265,13 +1265,40 @@ cdef class _Timedelta(timedelta): current_nanoseconds = self._ns # Replace specified components, keep existing values for unspecified components - days = validate("days", days) if days is not None else current_days - hours = validate("hours", hours) if hours is not None else current_hours - minutes = validate("minutes", minutes) if minutes is not None else current_minutes - seconds = validate("seconds", seconds) if seconds is not None else current_seconds - milliseconds = validate("milliseconds", milliseconds) if milliseconds is not None else current_milliseconds - microseconds = validate("microseconds", microseconds) if microseconds is not None else current_microseconds - nanoseconds = validate("nanoseconds", nanoseconds) if nanoseconds is not None else current_nanoseconds + if days is not None: + days = validate("days", days) + else: + days = current_days + + if hours is not None: + hours = validate("hours", hours) + else: + hours = current_hours + + if minutes is not None: + minutes = validate("minutes", minutes) + else: + minutes = current_minutes + + if seconds is not None: + seconds = validate("seconds", seconds) + else: + seconds = current_seconds + + if milliseconds is not None: + milliseconds = validate("milliseconds", milliseconds) + else: + milliseconds = current_milliseconds + + if microseconds is not None: + microseconds = validate("microseconds", microseconds) + else: + microseconds = current_microseconds + + if nanoseconds is not None: + nanoseconds = validate("nanoseconds", nanoseconds) + else: + nanoseconds = current_nanoseconds # Create new Timedelta from parts return create_timedelta_from_parts( diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 3bf5d3dbead87..114334cb69414 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -666,38 +666,44 @@ def test_replace_timedelta(self): assert replaced == expected, f"Expected {expected}, but got {replaced}" # Test replacing with zero values - replaced = td.replace(days=0, hours=0, minutes=0, seconds=0, milliseconds=0, microseconds=0, nanoseconds=0) + replaced = td.replace(days=0, hours=0, minutes=0, seconds=0, milliseconds=0, + microseconds=0, nanoseconds=0) expected = Timedelta(0) assert replaced == expected, f"Expected {expected}, but got {replaced}" # Test invalid inputs with pytest.raises(TypeError): - td.replace(hours='10') # invalid hours value + td.replace(hours='10') with pytest.raises(TypeError): - td.replace(days=2.5) # invalid days value + td.replace(days=2.5) def test_replace_single_field(self): - td = Timedelta(days=1, hours=5, minutes=45, seconds=30, milliseconds=250, microseconds=125, nanoseconds=60) + td = Timedelta(days=1, hours=5, minutes=45, seconds=30, milliseconds=250, + microseconds=125, nanoseconds=60) # Test replacing only seconds replaced = td.replace(seconds=45) - expected = Timedelta(days=1, hours=5, minutes=45, seconds=45, milliseconds=250, microseconds=125, nanoseconds=60) + expected = Timedelta(days=1, hours=5, minutes=45, seconds=45, milliseconds=250, + microseconds=125, nanoseconds=60) assert replaced == expected, f"Expected {expected}, but got {replaced}" # Test replacing only milliseconds replaced = td.replace(milliseconds=500) - expected = Timedelta(days=1, hours=5, minutes=45, seconds=30, milliseconds=500, microseconds=125, nanoseconds=60) + expected = Timedelta(days=1, hours=5, minutes=45, seconds=30, milliseconds=500, + microseconds=125, nanoseconds=60) assert replaced == expected, f"Expected {expected}, but got {replaced}" # Test replacing only microseconds replaced = td.replace(microseconds=300) - expected = Timedelta(days=1, hours=5, minutes=45, seconds=30, milliseconds=250, microseconds=300, nanoseconds=60) + expected = Timedelta(days=1, hours=5, minutes=45, seconds=30, milliseconds=250, + microseconds=300, nanoseconds=60) assert replaced == expected, f"Expected {expected}, but got {replaced}" # Test replacing only nanoseconds replaced = td.replace(nanoseconds=500) - expected = Timedelta(days=1, hours=5, minutes=45, seconds=30, milliseconds=250, microseconds=125, nanoseconds=500) + expected = Timedelta(days=1, hours=5, minutes=45, seconds=30, milliseconds=250, + microseconds=125, nanoseconds=500) assert replaced == expected, f"Expected {expected}, but got {replaced}" @pytest.mark.parametrize( From 0277082c6d8d476fa7ccea74741a864aa7720a65 Mon Sep 17 00:00:00 2001 From: Mafalda Matias Date: Sun, 26 May 2024 21:38:24 +0000 Subject: [PATCH 08/11] fix: formatting --- pandas/core/arrays/timedeltas.py | 2 +- pandas/core/indexes/accessors.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 49501d7aadc01..c41e078095feb 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -776,7 +776,7 @@ def total_seconds(self) -> npt.NDArray[np.float64]: """ pps = periods_per_second(self._creso) return self._maybe_mask_results(self.asi8 / pps, fill_value=None) - + def to_pytimedelta(self) -> npt.NDArray[np.object_]: """ Return an ndarray of datetime.timedelta objects. diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 0205a4c111b5f..20cb38ac084f6 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -519,7 +519,8 @@ def adjusted(self): td.components.microseconds * 1000 + td.components.nanoseconds ) - time_str = f"{td.components.seconds:02d}.{td.components.milliseconds:03d}{td.components.microseconds:03d}{td.components.nanoseconds:03d}" + time_str = f"{td.components.seconds:02d}.{td.components.milliseconds:03d}\ + {td.components.microseconds:03d}{td.components.nanoseconds:03d}" if len(time_str) > max_time_length: max_time_length = len(time_str) @@ -533,10 +534,12 @@ def adjusted(self): microseconds = td.components.microseconds nanoseconds = td.components.nanoseconds - combined_time_str = f"{seconds:02d}.{milliseconds:03d}{microseconds:03d}{nanoseconds:03d}" + combined_time_str = f"{seconds:02d}.{milliseconds:03d}{microseconds:03d}\ + {nanoseconds:03d}" combined_time_str = combined_time_str.ljust(max_time_length, '0') - formatted_td.append(f"{days:>{max_days_length}} days {hours:02d}:{minutes:02d}:{combined_time_str}") + formatted_td.append(f"{days:>{max_days_length}} days {hours:02d}:{minutes:02d}\ + :{combined_time_str}") return pd.Series(formatted_td) From 850c8725edaef603486c40ddea5c2a93c4b1ccfd Mon Sep 17 00:00:00 2001 From: Mafalda Matias Date: Sun, 26 May 2024 21:47:29 +0000 Subject: [PATCH 09/11] fix: formatting --- pandas/_libs/tslibs/timedeltas.pyx | 4 +--- pandas/core/indexes/accessors.py | 15 ++++++++------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 7aace63e19b78..855c0a4b08dbb 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -377,6 +377,7 @@ cdef create_timedelta_from_parts( Convenience routine to construct a Timedelta from its parts """ + # Get total Timedelta time in nanoseconds total_nanoseconds = ( days * 24 * 3600 * 1_000_000_000 + hours * 3600 * 1_000_000_000 + @@ -389,7 +390,6 @@ cdef create_timedelta_from_parts( return _timedelta_from_value_and_reso(Timedelta, total_nanoseconds, NPY_FR_ns) - cdef _maybe_cast_from_unit(ts, str unit): # caller is responsible for checking # assert unit not in ["Y", "y", "M"] @@ -981,7 +981,6 @@ cdef _timedelta_from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso): _Timedelta td_base assert value != NPY_NAT - # For millisecond and second resos, we cannot actually pass int(value) because # many cases would fall outside of the pytimedelta implementation bounds. # We pass 0 instead, and override seconds, microseconds, days. @@ -1311,7 +1310,6 @@ cdef class _Timedelta(timedelta): nanoseconds=nanoseconds ) - @property def unit(self) -> str: """ diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 20cb38ac084f6..d39604759e8d0 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -506,7 +506,7 @@ def adjusted(self): import pandas as pd max_days_length = 0 - max_time_length = 0 + max_seconds_length = 0 for td in self._get_values(): days_length = len(str(td.components.days)) @@ -521,8 +521,8 @@ def adjusted(self): ) time_str = f"{td.components.seconds:02d}.{td.components.milliseconds:03d}\ {td.components.microseconds:03d}{td.components.nanoseconds:03d}" - if len(time_str) > max_time_length: - max_time_length = len(time_str) + if len(time_str) > max_seconds_length: + max_seconds_length = len(time_str) formatted_td = [] for td in self._get_values(): @@ -534,16 +534,17 @@ def adjusted(self): microseconds = td.components.microseconds nanoseconds = td.components.nanoseconds - combined_time_str = f"{seconds:02d}.{milliseconds:03d}{microseconds:03d}\ + # Construct combined time string with leading zeros + seconds_str = f"{seconds:02d}.{milliseconds:03d}{microseconds:03d}\ {nanoseconds:03d}" - combined_time_str = combined_time_str.ljust(max_time_length, '0') + seconds_str = seconds_str.ljust(max_seconds_length, '0') + # Format timedelta string with aligned days and padded time components formatted_td.append(f"{days:>{max_days_length}} days {hours:02d}:{minutes:02d}\ - :{combined_time_str}") + :{seconds_str}") return pd.Series(formatted_td) - @delegate_names( delegate=PeriodArray, accessors=PeriodArray._datetimelike_ops, typ="property" ) From b62e5a7053aba37141ee9e049b0bf109b19b0e67 Mon Sep 17 00:00:00 2001 From: Mafalda Matias Date: Sun, 26 May 2024 21:52:11 +0000 Subject: [PATCH 10/11] test: add test --- pandas/_libs/tslibs/timedeltas.pyx | 1 - pandas/tests/scalar/timedelta/test_timedelta.py | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 855c0a4b08dbb..8868b4eab75fb 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -369,7 +369,6 @@ cdef convert_to_timedelta64(object ts, str unit): raise TypeError(f"Invalid type for timedelta scalar: {type(ts)}") return ts.astype("timedelta64[ns]") - cdef create_timedelta_from_parts( int64_t days=0, int64_t hours=0, int64_t minutes=0, int64_t seconds=0, int64_t milliseconds=0, int64_t microseconds=0, int64_t nanoseconds=0): diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 114334cb69414..702b11e0ccecf 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -682,6 +682,12 @@ def test_replace_single_field(self): td = Timedelta(days=1, hours=5, minutes=45, seconds=30, milliseconds=250, microseconds=125, nanoseconds=60) + # Test replacing only days + replaced = td.replace(days=2) + expected = Timedelta(days=2, hours=5, minutes=45, seconds=30, milliseconds=250, + microseconds=125, nanoseconds=60) + assert replaced == expected, f"Expected {expected}, but got {replaced}" + # Test replacing only seconds replaced = td.replace(seconds=45) expected = Timedelta(days=1, hours=5, minutes=45, seconds=45, milliseconds=250, From 7c70748a37ce812345025241a75c0d4dba49ef56 Mon Sep 17 00:00:00 2001 From: Mafalda Matias Date: Mon, 27 May 2024 16:04:36 +0000 Subject: [PATCH 11/11] Implement replace and adjusted for timedelta #57188 Co-authored by: Sofia Simas --- pandas/core/indexes/accessors.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index d39604759e8d0..e7d2813a0df60 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -519,8 +519,12 @@ def adjusted(self): td.components.microseconds * 1000 + td.components.nanoseconds ) - time_str = f"{td.components.seconds:02d}.{td.components.milliseconds:03d}\ - {td.components.microseconds:03d}{td.components.nanoseconds:03d}" + time_str = ( + f"{td.components.seconds:02d}." + f"{td.components.milliseconds:03d}" + f"{td.components.microseconds:03d}" + f"{td.components.nanoseconds:03d}" + ) if len(time_str) > max_seconds_length: max_seconds_length = len(time_str) @@ -535,13 +539,20 @@ def adjusted(self): nanoseconds = td.components.nanoseconds # Construct combined time string with leading zeros - seconds_str = f"{seconds:02d}.{milliseconds:03d}{microseconds:03d}\ - {nanoseconds:03d}" + seconds_str = ( + f"{seconds:02d}." + f"{milliseconds:03d}" + f"{microseconds:03d}" + f"{nanoseconds:03d}" + ) seconds_str = seconds_str.ljust(max_seconds_length, '0') # Format timedelta string with aligned days and padded time components - formatted_td.append(f"{days:>{max_days_length}} days {hours:02d}:{minutes:02d}\ - :{seconds_str}") + formatted_td.append( + f"{days:>{max_days_length}} days " + f"{hours:02d}:{minutes:02d}:" + f"{seconds_str}" + ) return pd.Series(formatted_td)