diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 188f5678bbbba..a167172d0a79f 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -595,7 +595,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.tseries.offsets.Day.freqstr SA01" \ -i "pandas.tseries.offsets.Day.is_on_offset GL08" \ -i "pandas.tseries.offsets.Day.n GL08" \ - -i "pandas.tseries.offsets.Day.nanos SA01" \ + -i "pandas.tseries.offsets.Day.nanos GL08" \ -i "pandas.tseries.offsets.Day.normalize GL08" \ -i "pandas.tseries.offsets.Day.rule_code GL08" \ -i "pandas.tseries.offsets.Easter PR02" \ diff --git a/doc/source/user_guide/timedeltas.rst b/doc/source/user_guide/timedeltas.rst index 5daf204f39bcf..12fd86469a848 100644 --- a/doc/source/user_guide/timedeltas.rst +++ b/doc/source/user_guide/timedeltas.rst @@ -63,7 +63,7 @@ Further, operations among the scalars yield another scalar ``Timedelta``. .. ipython:: python - pd.Timedelta(pd.offsets.Day(2)) + pd.Timedelta(pd.offsets.Second(2)) + pd.Timedelta( + pd.Timedelta(pd.offsets.Hour(48)) + pd.Timedelta(pd.offsets.Second(2)) + pd.Timedelta( "00:00:00.000123" ) diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py index 31979b293a940..6e4327137fd18 100644 --- a/pandas/_libs/tslibs/__init__.py +++ b/pandas/_libs/tslibs/__init__.py @@ -30,6 +30,7 @@ "get_unit_from_dtype", "periods_per_day", "periods_per_second", + "Day", "guess_datetime_format", "add_overflowsafe", "get_supported_dtype", @@ -61,6 +62,7 @@ ) from pandas._libs.tslibs.offsets import ( BaseOffset, + Day, Tick, to_offset, ) diff --git a/pandas/_libs/tslibs/offsets.pyi b/pandas/_libs/tslibs/offsets.pyi index 3f942d6aa3622..55dd913ce2771 100644 --- a/pandas/_libs/tslibs/offsets.pyi +++ b/pandas/_libs/tslibs/offsets.pyi @@ -92,6 +92,7 @@ class BaseOffset: def __getstate__(self): ... @property def nanos(self) -> int: ... + def _maybe_to_hours(self) -> BaseOffset: ... def _get_offset(name: str) -> BaseOffset: ... @@ -116,7 +117,9 @@ class Tick(SingleConstructorOffset): def delta_to_tick(delta: timedelta) -> Tick: ... -class Day(Tick): ... +class Day(BaseOffset): + def _maybe_to_hours(self) -> Hour: ... + class Hour(Tick): ... class Minute(Tick): ... class Second(Tick): ... diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index a24941e4f0a5a..3d79ed544a3c9 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -776,6 +776,11 @@ cdef class BaseOffset: def nanos(self): raise ValueError(f"{self} is a non-fixed frequency") + def _maybe_to_hours(self): + if not isinstance(self, Day): + return self + return Hour(self.n * 24) + # ------------------------------------------------------------------ def is_month_start(self, _Timestamp ts): @@ -954,8 +959,6 @@ cdef class Tick(SingleConstructorOffset): # Note: Without making this cpdef, we get AttributeError when calling # from __mul__ cpdef Tick _next_higher_resolution(Tick self): - if type(self) is Day: - return Hour(self.n * 24) if type(self) is Hour: return Minute(self.n * 60) if type(self) is Minute: @@ -1102,7 +1105,7 @@ cdef class Tick(SingleConstructorOffset): self.normalize = False -cdef class Day(Tick): +cdef class Day(SingleConstructorOffset): """ Offset ``n`` days. @@ -1132,11 +1135,51 @@ cdef class Day(Tick): >>> ts + Day(-4) Timestamp('2022-12-05 15:00:00') """ + _adjust_dst = True + _attributes = tuple(["n", "normalize"]) _nanos_inc = 24 * 3600 * 1_000_000_000 _prefix = "D" _period_dtype_code = PeriodDtypeCode.D _creso = NPY_DATETIMEUNIT.NPY_FR_D + def __init__(self, n=1, normalize=False): + BaseOffset.__init__(self, n) + if normalize: + # GH#21427 + raise ValueError( + "Day offset with `normalize=True` are not allowed." + ) + + def is_on_offset(self, dt) -> bool: + return True + + @apply_wraps + def _apply(self, other): + if isinstance(other, Day): + # TODO: why isn't this handled in __add__? + return Day(self.n + other.n) + return other + np.timedelta64(self.n, "D") + + def _apply_array(self, dtarr): + return dtarr + np.timedelta64(self.n, "D") + + @cache_readonly + def freqstr(self) -> str: + """ + Return a string representing the frequency. + + Examples + -------- + >>> pd.Day(5).freqstr + '5D' + + >>> pd.offsets.Day(1).freqstr + 'D' + """ + if self.n != 1: + return str(self.n) + "D" + return "D" + cdef class Hour(Tick): """ @@ -1360,16 +1403,13 @@ cdef class Nano(Tick): def delta_to_tick(delta: timedelta) -> Tick: if delta.microseconds == 0 and getattr(delta, "nanoseconds", 0) == 0: # nanoseconds only for pd.Timedelta - if delta.seconds == 0: - return Day(delta.days) + seconds = delta.days * 86400 + delta.seconds + if seconds % 3600 == 0: + return Hour(seconds / 3600) + elif seconds % 60 == 0: + return Minute(seconds / 60) else: - seconds = delta.days * 86400 + delta.seconds - if seconds % 3600 == 0: - return Hour(seconds / 3600) - elif seconds % 60 == 0: - return Minute(seconds / 60) - else: - return Second(seconds) + return Second(seconds) else: nanos = delta_to_nanoseconds(delta) if nanos % 1_000_000 == 0: @@ -4838,7 +4878,7 @@ cpdef to_offset(freq, bint is_period=False): <2 * BusinessDays> >>> to_offset(pd.Timedelta(days=1)) - + <24 * Hours> >>> to_offset(pd.offsets.Hour()) @@ -4918,7 +4958,7 @@ cpdef to_offset(freq, bint is_period=False): ) prefix = c_DEPR_ABBREVS[prefix] - if prefix in {"D", "h", "min", "s", "ms", "us", "ns"}: + if prefix in {"h", "min", "s", "ms", "us", "ns"}: # For these prefixes, we have something like "3h" or # "2.5min", so we can construct a Timedelta with the # matching unit and get our offset from delta_to_tick @@ -4936,6 +4976,12 @@ cpdef to_offset(freq, bint is_period=False): if result is None: result = offset + elif isinstance(result, Day) and isinstance(offset, Tick): + # e.g. "1D1H" is treated like "25H" + result = Hour(result.n * 24) + offset + elif isinstance(offset, Day) and isinstance(result, Tick): + # e.g. "1H1D" is treated like "25H" + result = result + Hour(offset.n * 24) else: result = result + offset except (ValueError, TypeError) as err: diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 023a0f52e320f..4c0a69e119278 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -113,6 +113,7 @@ from pandas._libs.tslibs.offsets cimport ( from pandas._libs.tslibs.offsets import ( INVALID_FREQ_ERR_MSG, BDay, + Day, ) cdef: @@ -1823,6 +1824,10 @@ cdef class _Period(PeriodMixin): # i.e. np.timedelta64("nat") return NaT + if isinstance(other, Day): + # Periods are timezone-naive, so we treat Day as Tick-like + other = np.timedelta64(other.n, "D") + try: inc = delta_to_nanoseconds(other, reso=self._dtype._creso, round_ok=False) except ValueError as err: @@ -1844,7 +1849,7 @@ cdef class _Period(PeriodMixin): @cython.overflowcheck(True) def __add__(self, other): - if is_any_td_scalar(other): + if is_any_td_scalar(other) or isinstance(other, Day): return self._add_timedeltalike_scalar(other) elif is_offset_object(other): return self._add_offset(other) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 4ff2df34ac717..e4e639a0bee26 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -2388,6 +2388,7 @@ cdef bint _should_cast_to_timedelta(object obj): cpdef int64_t get_unit_for_round(freq, NPY_DATETIMEUNIT creso) except? -1: from pandas._libs.tslibs.offsets import to_offset - freq = to_offset(freq) + # In this context it is unambiguous that "D" represents 24 hours + freq = to_offset(freq)._maybe_to_hours() freq.nanos # raises on non-fixed freq return delta_to_nanoseconds(freq, creso) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 673001337767b..c696f48f0fdc7 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -28,6 +28,7 @@ ) from pandas._libs.tslibs import ( BaseOffset, + Day, IncompatibleFrequency, NaT, NaTType, @@ -904,14 +905,21 @@ def inferred_freq(self) -> str | None: TimedeltaIndex(['0 days', '10 days', '20 days'], dtype='timedelta64[ns]', freq=None) >>> tdelta_idx.inferred_freq - '10D' + '240h' """ if self.ndim != 1: return None try: - return frequencies.infer_freq(self) + res = frequencies.infer_freq(self) except ValueError: return None + if self.dtype.kind == "m" and res is not None and res.endswith("D"): + # TimedeltaArray freq must be a Tick, so we convert the inferred + # daily freq to hourly. + if res == "D": + return "24h" + res = str(int(res[:-1]) * 24) + "h" + return res @property # NB: override with cache_readonly in immutable subclasses def _resolution_obj(self) -> Resolution | None: @@ -1052,6 +1060,10 @@ def _get_arithmetic_result_freq(self, other) -> BaseOffset | None: elif isinstance(self.freq, Tick): # In these cases return self.freq + elif isinstance(self.freq, Day) and getattr(self, "tz", None) is None: + return self.freq + # TODO: are there tzaware cases when we can reliably preserve freq? + # We have a bunch of tests that seem to think so return None @final @@ -1147,6 +1159,10 @@ def _sub_datetimelike(self, other: Timestamp | DatetimeArray) -> TimedeltaArray: res_m8 = res_values.view(f"timedelta64[{self.unit}]") new_freq = self._get_arithmetic_result_freq(other) + if new_freq is not None: + # TODO: are we sure this is right? + new_freq = new_freq._maybe_to_hours() + new_freq = cast("Tick | None", new_freq) return TimedeltaArray._simple_new(res_m8, dtype=res_m8.dtype, freq=new_freq) @@ -1988,6 +2004,8 @@ def _maybe_pin_freq(self, freq, validate_kwds: dict) -> None: # We cannot inherit a freq from the data, so we need to validate # the user-passed freq freq = to_offset(freq) + if self.dtype.kind == "m": + freq = freq._maybe_to_hours() type(self)._validate_frequency(self, freq, **validate_kwds) self._freq = freq else: @@ -2237,6 +2255,9 @@ def _with_freq(self, freq) -> Self: assert freq == "infer" freq = to_offset(self.inferred_freq) + if self.dtype.kind == "m" and freq is not None: + assert isinstance(freq, Tick) + arr = self.view() arr._freq = freq return arr diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 077bde35a4c94..539323f92581a 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -473,8 +473,10 @@ def _generate_range( if end is not None: end = end.tz_localize(None) - if isinstance(freq, Tick): - i8values = generate_regular_range(start, end, periods, freq, unit=unit) + if isinstance(freq, Tick) or (tz is None and isinstance(freq, Day)): + i8values = generate_regular_range( + start, end, periods, freq._maybe_to_hours(), unit=unit + ) else: xdr = _generate_range( start=start, end=end, periods=periods, offset=freq, unit=unit @@ -934,7 +936,14 @@ def tz_convert(self, tz) -> Self: # No conversion since timestamps are all UTC to begin with dtype = tz_to_dtype(tz, unit=self.unit) - return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq) + new_freq = self.freq + if self.freq is not None and self.freq._adjust_dst: + # TODO: in some cases we may be able to retain, e.g. if old and new + # tz are both fixed offsets, or if no DST-crossings occur. + # The latter is value-dependent behavior that we may want to avoid. + # Or could convert e.g. "D" to "24h", see GH#51716 + new_freq = None + return self._simple_new(self._ndarray, dtype=dtype, freq=new_freq) @dtl.ravel_compat def tz_localize( diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 8baf363b909fb..17e3685085b1a 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -22,6 +22,7 @@ from pandas._libs.arrays import NDArrayBacked from pandas._libs.tslibs import ( BaseOffset, + Day, NaT, NaTType, Timedelta, @@ -855,6 +856,9 @@ def _addsub_int_array_or_scalar( def _add_offset(self, other: BaseOffset): assert not isinstance(other, Tick) + if isinstance(other, Day): + return self + np.timedelta64(other.n, "D") + self._require_matching_freq(other, base=True) return self._addsub_int_array_or_scalar(other.n, operator.add) @@ -869,7 +873,7 @@ def _add_timedeltalike_scalar(self, other): ------- PeriodArray """ - if not isinstance(self.freq, Tick): + if not isinstance(self.freq, (Tick, Day)): # We cannot add timedelta-like to non-tick PeriodArray raise raise_on_incompatible(self, other) @@ -877,7 +881,10 @@ def _add_timedeltalike_scalar(self, other): # i.e. np.timedelta64("NaT") return super()._add_timedeltalike_scalar(other) - td = np.asarray(Timedelta(other).asm8) + if isinstance(other, Day): + td = np.asarray(Timedelta(days=other.n).asm8) + else: + td = np.asarray(Timedelta(other).asm8) return self._add_timedelta_arraylike(td) def _add_timedelta_arraylike( diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 865e81d7754ef..4ced74f57fbc1 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -14,6 +14,7 @@ tslibs, ) from pandas._libs.tslibs import ( + Day, NaT, NaTType, Tick, @@ -256,6 +257,12 @@ def _from_sequence_not_strict( assert unit not in ["Y", "y", "M"] # caller is responsible for checking + if isinstance(freq, Day): + raise ValueError( + "Day offset object is not valid for TimedeltaIndex, " + "pass e.g. 24H instead." + ) + data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit) if dtype is not None: @@ -274,6 +281,9 @@ def _generate_range( if freq is None and any(x is None for x in [periods, start, end]): raise ValueError("Must provide freq argument if no data is supplied") + if isinstance(freq, Day): + raise TypeError("TimedeltaArray/Index freq must be a Tick or None") + if com.count_not_none(start, end, periods, freq) != 3: raise ValueError( "Of the four parameters: start, end, periods, " diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 7e8d808769bc1..c911d6e466347 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -24,6 +24,7 @@ ) from pandas._libs.tslibs import ( BaseOffset, + Day, Resolution, Tick, parsing, @@ -90,6 +91,7 @@ class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex, ABC): _can_hold_strings = False _data: DatetimeArray | TimedeltaArray | PeriodArray + _freq: BaseOffset | None @doc(DatetimeLikeArrayMixin.mean) def mean(self, *, skipna: bool = True, axis: int | None = 0): @@ -585,8 +587,9 @@ def _intersection(self, other: Index, sort: bool = False) -> Index: # At this point we should have result.dtype == self.dtype # and type(result) is type(self._data) result = self._wrap_setop_result(other, result) - return result._with_freq(None)._with_freq("infer") - + result = result._with_freq(None)._with_freq("infer") + result = self._maybe_restore_day(result._data) + return result else: return self._fast_intersect(other, sort) @@ -710,7 +713,18 @@ def _union(self, other, sort): # that result.freq == self.freq return result else: - return super()._union(other, sort)._with_freq("infer") + result = super()._union(other, sort)._with_freq("infer") + return self._maybe_restore_day(result) + + def _maybe_restore_day(self, result: Self) -> Self: + if isinstance(self.freq, Day) and isinstance(result.freq, Tick): + # If we infer a 24H-like freq but are D, restore "D" + td = Timedelta(result.freq) + div, mod = divmod(td.value, 24 * 3600 * 10**9) + if mod == 0: + freq = to_offset("D") * div + result._freq = freq + return result # -------------------------------------------------------------------- # Join Methods diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 359cdf880937b..c5d1604c4917d 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1230,6 +1230,8 @@ def interval_range( raise ValueError( f"freq must be numeric or convertible to DateOffset, got {freq}" ) from err + if isinstance(start, Timedelta) or isinstance(end, Timedelta): + freq = freq._maybe_to_hours() # verify type compatibility if not all( diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index edd1fdd4da943..b0f17f78ba637 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -11,6 +11,7 @@ from pandas._libs import index as libindex from pandas._libs.tslibs import ( BaseOffset, + Day, NaT, Period, Resolution, @@ -347,7 +348,7 @@ def _maybe_convert_timedelta(self, other) -> int | npt.NDArray[np.int64]: of self.freq. Note IncompatibleFrequency subclasses ValueError. """ if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)): - if isinstance(self.freq, Tick): + if isinstance(self.freq, (Tick, Day)): # _check_timedeltalike_freq_compat will raise if incompatible delta = self._data._check_timedeltalike_freq_compat(other) return delta diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 29039ffd0217e..015688e08219a 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -9,6 +9,7 @@ lib, ) from pandas._libs.tslibs import ( + Day, Resolution, Timedelta, to_offset, @@ -114,7 +115,7 @@ class TimedeltaIndex(DatetimeTimedeltaMixin): >>> pd.TimedeltaIndex(np.arange(5) * 24 * 3600 * 1e9, freq="infer") TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], - dtype='timedelta64[ns]', freq='D') + dtype='timedelta64[ns]', freq='24h') """ _typ = "timedeltaindex" @@ -292,14 +293,14 @@ def timedelta_range( -------- >>> pd.timedelta_range(start="1 day", periods=4) TimedeltaIndex(['1 days', '2 days', '3 days', '4 days'], - dtype='timedelta64[ns]', freq='D') + dtype='timedelta64[ns]', freq='24h') The ``closed`` parameter specifies which endpoint is included. The default behavior is to include both endpoints. >>> pd.timedelta_range(start="1 day", periods=4, closed="right") TimedeltaIndex(['2 days', '3 days', '4 days'], - dtype='timedelta64[ns]', freq='D') + dtype='timedelta64[ns]', freq='24h') The ``freq`` parameter specifies the frequency of the TimedeltaIndex. Only fixed frequencies can be passed, non-fixed frequencies such as @@ -322,12 +323,22 @@ def timedelta_range( >>> pd.timedelta_range("1 Day", periods=3, freq="100000D", unit="s") TimedeltaIndex(['1 days', '100001 days', '200001 days'], - dtype='timedelta64[s]', freq='100000D') + dtype='timedelta64[s]', freq='2400000h') """ if freq is None and com.any_none(periods, start, end): - freq = "D" + freq = "24h" + + if isinstance(freq, Day): + # If a user specifically passes a Day *object* we disallow it, + # but if they pass a Day-like string we'll convert it to hourly below. + raise ValueError( + "Passing a Day offset to timedelta_range is not allowed, " + "pass an hourly offset instead" + ) freq = to_offset(freq) + if freq is not None: + freq = freq._maybe_to_hours() tdarr = TimedeltaArray._generate_range( start, end, periods, freq, closed=closed, unit=unit ) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index ccbe25fdae841..b5ec85daca8df 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1903,6 +1903,19 @@ def get_resampler(obj: Series | DataFrame, **kwds) -> Resampler: """ Create a TimeGrouper and return our resampler. """ + freq = kwds.get("freq", None) + if freq is not None: + # TODO: same thing in get_resampler_for_grouping? + axis = kwds.get("axis", 0) + axis = obj._get_axis_number(axis) + ax = obj.axes[axis] + if isinstance(ax, TimedeltaIndex): + # TODO: could disallow/deprecate Day _object_ while still + # allowing "D" string? + freq = to_offset(freq)._maybe_to_hours() + freq = freq._maybe_to_hours() + kwds["freq"] = freq + tg = TimeGrouper(obj, **kwds) # type: ignore[arg-type] return tg._get_resampler(obj) @@ -2230,29 +2243,28 @@ def _get_time_delta_bins(self, ax: TimedeltaIndex): f"an instance of {type(ax).__name__}" ) + freq = self.freq._maybe_to_hours() if not isinstance(self.freq, Tick): # GH#51896 raise ValueError( "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " - f"e.g. '24h' or '3D', not {self.freq}" + f"e.g. '24h' or '72h', not {freq}" ) if not len(ax): - binner = labels = TimedeltaIndex(data=[], freq=self.freq, name=ax.name) + binner = labels = TimedeltaIndex(data=[], freq=freq, name=ax.name) return binner, [], labels start, end = ax.min(), ax.max() if self.closed == "right": - end += self.freq + end += freq - labels = binner = timedelta_range( - start=start, end=end, freq=self.freq, name=ax.name - ) + labels = binner = timedelta_range(start=start, end=end, freq=freq, name=ax.name) end_stamps = labels if self.closed == "left": - end_stamps += self.freq + end_stamps += freq bins = ax.searchsorted(end_stamps, side=self.closed) @@ -2441,7 +2453,7 @@ def _get_timestamp_range_edges( ------- A tuple of length 2, containing the adjusted pd.Timestamp objects. """ - if isinstance(freq, Tick): + if isinstance(freq, (Tick, Day)): index_tz = first.tz if isinstance(origin, Timestamp) and (origin.tz is None) != (index_tz is None): raise ValueError("The origin must have the same timezone as the index.") @@ -2451,6 +2463,8 @@ def _get_timestamp_range_edges( origin = Timestamp("1970-01-01", tz=index_tz) if isinstance(freq, Day): + # TODO: should we change behavior for next comment now that Day + # respects DST? # _adjust_dates_anchored assumes 'D' means 24h, but first/last # might contain a DST transition (23h, 24h, or 25h). # So "pretend" the dates are naive when adjusting the endpoints @@ -2460,7 +2474,15 @@ def _get_timestamp_range_edges( origin = origin.tz_localize(None) first, last = _adjust_dates_anchored( - first, last, freq, closed=closed, origin=origin, offset=offset, unit=unit + first, + last, + # error: Argument 3 to "_adjust_dates_anchored" has incompatible + # type "BaseOffset"; expected "Tick" + freq._maybe_to_hours(), # type: ignore[arg-type] + closed=closed, + origin=origin, + offset=offset, + unit=unit, ) if isinstance(freq, Day): first = first.tz_localize(index_tz) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 2243d8dd1a613..96167d1bebe65 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1852,6 +1852,7 @@ def _validate(self) -> None: f"passed window {self.window} is not " "compatible with a datetimelike index" ) from err + if isinstance(self._on, PeriodIndex): # error: Incompatible types in assignment (expression has type # "float", variable has type "Optional[int]") @@ -1859,6 +1860,9 @@ def _validate(self) -> None: self._on.freq.nanos / self._on.freq.n ) else: + # In this context we treat Day as 24H + # TODO: will this cause trouble with tzaware cases? + freq = freq._maybe_to_hours() try: unit = dtype_to_unit(self._on.dtype) # type: ignore[arg-type] except TypeError: diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index f9807310460b4..36c6ad9c6bb54 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -813,6 +813,8 @@ def test_dt64arr_add_timedeltalike_scalar( rng = date_range("2000-01-01", "2000-02-01", tz=tz) expected = date_range("2000-01-01 02:00", "2000-02-01 02:00", tz=tz) + if tz is not None: + expected = expected._with_freq(None) rng = tm.box_expected(rng, box_with_array) expected = tm.box_expected(expected, box_with_array) @@ -833,6 +835,8 @@ def test_dt64arr_sub_timedeltalike_scalar( rng = date_range("2000-01-01", "2000-02-01", tz=tz) expected = date_range("1999-12-31 22:00", "2000-01-31 22:00", tz=tz) + if tz is not None: + expected = expected._with_freq(None) rng = tm.box_expected(rng, box_with_array) expected = tm.box_expected(expected, box_with_array) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 1b8ad1922b9d2..ce847c9d2055d 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -290,10 +290,16 @@ def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box_with_array index = tm.box_expected(index, box) expected = tm.box_expected(expected, box) - result = three_days / index - tm.assert_equal(result, expected) + if isinstance(three_days, pd.offsets.Day): + # GH#41943 Day is no longer timedelta-like + msg = "unsupported operand type" + with pytest.raises(TypeError, match=msg): + three_days / index + else: + result = three_days / index + tm.assert_equal(result, expected) + msg = "cannot use operands with types dtype" - msg = "cannot use operands with types dtype" with pytest.raises(TypeError, match=msg): index / three_days diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 4583155502374..5941585dceb72 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -628,32 +628,32 @@ def test_tdi_isub_timedeltalike(self, two_hours, box_with_array): # ------------------------------------------------------------- def test_tdi_ops_attributes(self): - rng = timedelta_range("2 days", periods=5, freq="2D", name="x") + rng = timedelta_range("2 days", periods=5, freq="48h", name="x") result = rng + 1 * rng.freq - exp = timedelta_range("4 days", periods=5, freq="2D", name="x") + exp = timedelta_range("4 days", periods=5, freq="48h", name="x") tm.assert_index_equal(result, exp) - assert result.freq == "2D" + assert result.freq == "48h" result = rng - 2 * rng.freq - exp = timedelta_range("-2 days", periods=5, freq="2D", name="x") + exp = timedelta_range("-2 days", periods=5, freq="48h", name="x") tm.assert_index_equal(result, exp) - assert result.freq == "2D" + assert result.freq == "48h" result = rng * 2 exp = timedelta_range("4 days", periods=5, freq="4D", name="x") tm.assert_index_equal(result, exp) - assert result.freq == "4D" + assert result.freq == "96h" result = rng / 2 exp = timedelta_range("1 days", periods=5, freq="D", name="x") tm.assert_index_equal(result, exp) - assert result.freq == "D" + assert result.freq == "24h" result = -rng exp = timedelta_range("-2 days", periods=5, freq="-2D", name="x") tm.assert_index_equal(result, exp) - assert result.freq == "-2D" + assert result.freq == "-48h" rng = timedelta_range("-2 days", periods=5, freq="D", name="x") @@ -1007,7 +1007,7 @@ def test_td64arr_add_sub_datetimelike_scalar( ts = dt_scalar tdi = timedelta_range("1 day", periods=3) - expected = pd.date_range("2012-01-02", periods=3, tz=tz) + expected = pd.date_range("2012-01-02", periods=3, tz=tz, freq="24h") tdarr = tm.box_expected(tdi, box_with_array) expected = tm.box_expected(expected, box_with_array) @@ -1015,7 +1015,7 @@ def test_td64arr_add_sub_datetimelike_scalar( tm.assert_equal(ts + tdarr, expected) tm.assert_equal(tdarr + ts, expected) - expected2 = pd.date_range("2011-12-31", periods=3, freq="-1D", tz=tz) + expected2 = pd.date_range("2011-12-31", periods=3, freq="-24h", tz=tz) expected2 = tm.box_expected(expected2, box_with_array) tm.assert_equal(ts - tdarr, expected2) @@ -1822,6 +1822,16 @@ def test_td64arr_mod_tdscalar( expected = TimedeltaIndex(["1 Day", "2 Days", "0 Days"] * 3) expected = tm.box_expected(expected, box_with_array) + if isinstance(three_days, offsets.Day): + msg = "unsupported operand type" + with pytest.raises(TypeError, match=msg): + tdarr % three_days + with pytest.raises(TypeError, match=msg): + divmod(tdarr, three_days) + with pytest.raises(TypeError, match=msg): + tdarr // three_days + return + result = tdarr % three_days tm.assert_equal(result, expected) @@ -1865,6 +1875,12 @@ def test_td64arr_rmod_tdscalar(self, box_with_array, three_days): expected = TimedeltaIndex(expected) expected = tm.box_expected(expected, box_with_array) + if isinstance(three_days, offsets.Day): + msg = "Cannot divide Day by TimedeltaArray" + with pytest.raises(TypeError, match=msg): + three_days % tdarr + return + result = three_days % tdarr tm.assert_equal(result, expected) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 3d8f8d791b763..af4c76704b6c9 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -93,9 +93,9 @@ def arr1d(self): """Fixture returning DatetimeArray with daily frequency.""" data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 if self.array_cls is PeriodArray: - arr = self.array_cls(data, freq="D") + arr = self.array_cls(data, freq="24h") else: - arr = self.index_cls(data, freq="D")._data + arr = self.index_cls(data, freq="24h")._data return arr def test_compare_len1_raises(self, arr1d): diff --git a/pandas/tests/indexes/datetimes/methods/test_round.py b/pandas/tests/indexes/datetimes/methods/test_round.py index cde4a3a65804d..68a0f91be4c4f 100644 --- a/pandas/tests/indexes/datetimes/methods/test_round.py +++ b/pandas/tests/indexes/datetimes/methods/test_round.py @@ -193,7 +193,7 @@ def test_ceil_floor_edge(self, test_input, rounder, freq, expected): ) def test_round_int64(self, start, index_freq, periods, round_freq): dt = date_range(start=start, freq=index_freq, periods=periods) - unit = to_offset(round_freq).nanos + unit = to_offset(round_freq)._maybe_to_hours().nanos # test floor result = dt.floor(round_freq) diff --git a/pandas/tests/indexes/datetimes/test_arithmetic.py b/pandas/tests/indexes/datetimes/test_arithmetic.py index 3a7c418b27de6..099be7053d2b4 100644 --- a/pandas/tests/indexes/datetimes/test_arithmetic.py +++ b/pandas/tests/indexes/datetimes/test_arithmetic.py @@ -1,7 +1,6 @@ # Arithmetic tests specific to DatetimeIndex are generally about `freq` # rentention or inference. Other arithmetic tests belong in # tests/arithmetic/test_datetime64.py -import pytest from pandas import ( Timedelta, @@ -16,28 +15,30 @@ class TestDatetimeIndexArithmetic: def test_add_timedelta_preserves_freq(self): # GH#37295 should hold for any DTI with freq=None or Tick freq + # In pandas3 "D" preserves time-of-day across DST transitions, so + # is not preserved by subtraction. Ticks offsets like "24h" + # are still preserved tz = "Canada/Eastern" dti = date_range( start=Timestamp("2019-03-26 00:00:00-0400", tz=tz), end=Timestamp("2020-10-17 00:00:00-0400", tz=tz), - freq="D", + freq="24h", ) result = dti + Timedelta(days=1) assert result.freq == dti.freq def test_sub_datetime_preserves_freq(self, tz_naive_fixture): # GH#48818 - dti = date_range("2016-01-01", periods=12, tz=tz_naive_fixture) + # In pandas3 "D" preserves time-of-day across DST transitions, so + # is not preserved by subtraction. Ticks offsets like "24h" + # are still preserved + dti = date_range("2016-01-01", periods=12, tz=tz_naive_fixture, freq="24h") res = dti - dti[0] expected = timedelta_range("0 Days", "11 Days") tm.assert_index_equal(res, expected) assert res.freq == expected.freq - @pytest.mark.xfail( - reason="The inherited freq is incorrect bc dti.freq is incorrect " - "https://github.com/pandas-dev/pandas/pull/48818/files#r982793461" - ) def test_sub_datetime_preserves_freq_across_dst(self): # GH#48818 ts = Timestamp("2016-03-11", tz="US/Pacific") diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py new file mode 100644 index 0000000000000..3bbd31b71c36e --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -0,0 +1,307 @@ +import calendar +from datetime import datetime +import locale +import unicodedata + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DatetimeIndex, + Index, + Timedelta, + Timestamp, + date_range, + offsets, +) +import pandas._testing as tm +from pandas.core.arrays import DatetimeArray + +from pandas.tseries.frequencies import to_offset + + +class TestDatetime64: + def test_no_millisecond_field(self): + msg = "type object 'DatetimeIndex' has no attribute 'millisecond'" + with pytest.raises(AttributeError, match=msg): + DatetimeIndex.millisecond + + msg = "'DatetimeIndex' object has no attribute 'millisecond'" + with pytest.raises(AttributeError, match=msg): + DatetimeIndex([]).millisecond + + def test_datetimeindex_accessors(self): + dti_naive = date_range(freq="D", start=datetime(1998, 1, 1), periods=365) + # GH#13303 + dti_tz = date_range( + freq="D", start=datetime(1998, 1, 1), periods=365, tz="US/Eastern" + ) + for dti in [dti_naive, dti_tz]: + assert dti.year[0] == 1998 + assert dti.month[0] == 1 + assert dti.day[0] == 1 + assert dti.hour[0] == 0 + assert dti.minute[0] == 0 + assert dti.second[0] == 0 + assert dti.microsecond[0] == 0 + assert dti.dayofweek[0] == 3 + + assert dti.dayofyear[0] == 1 + assert dti.dayofyear[120] == 121 + + assert dti.isocalendar().week.iloc[0] == 1 + assert dti.isocalendar().week.iloc[120] == 18 + + assert dti.quarter[0] == 1 + assert dti.quarter[120] == 2 + + assert dti.days_in_month[0] == 31 + assert dti.days_in_month[90] == 30 + + assert dti.is_month_start[0] + assert not dti.is_month_start[1] + assert dti.is_month_start[31] + assert dti.is_quarter_start[0] + assert dti.is_quarter_start[90] + assert dti.is_year_start[0] + assert not dti.is_year_start[364] + assert not dti.is_month_end[0] + assert dti.is_month_end[30] + assert not dti.is_month_end[31] + assert dti.is_month_end[364] + assert not dti.is_quarter_end[0] + assert not dti.is_quarter_end[30] + assert dti.is_quarter_end[89] + assert dti.is_quarter_end[364] + assert not dti.is_year_end[0] + assert dti.is_year_end[364] + + assert len(dti.year) == 365 + assert len(dti.month) == 365 + assert len(dti.day) == 365 + assert len(dti.hour) == 365 + assert len(dti.minute) == 365 + assert len(dti.second) == 365 + assert len(dti.microsecond) == 365 + assert len(dti.dayofweek) == 365 + assert len(dti.dayofyear) == 365 + assert len(dti.isocalendar()) == 365 + assert len(dti.quarter) == 365 + assert len(dti.is_month_start) == 365 + assert len(dti.is_month_end) == 365 + assert len(dti.is_quarter_start) == 365 + assert len(dti.is_quarter_end) == 365 + assert len(dti.is_year_start) == 365 + assert len(dti.is_year_end) == 365 + + dti.name = "name" + + # non boolean accessors -> return Index + for accessor in DatetimeArray._field_ops: + res = getattr(dti, accessor) + assert len(res) == 365 + assert isinstance(res, Index) + assert res.name == "name" + + # boolean accessors -> return array + for accessor in DatetimeArray._bool_ops: + res = getattr(dti, accessor) + assert len(res) == 365 + assert isinstance(res, np.ndarray) + + # test boolean indexing + res = dti[dti.is_quarter_start] + exp = dti[[0, 90, 181, 273]] + tm.assert_index_equal(res, exp) + res = dti[dti.is_leap_year] + exp = DatetimeIndex([], freq="D", tz=dti.tz, name="name").as_unit("ns") + tm.assert_index_equal(res, exp) + + def test_datetimeindex_accessors2(self): + dti = date_range(freq="BQE-FEB", start=datetime(1998, 1, 1), periods=4) + + assert sum(dti.is_quarter_start) == 0 + assert sum(dti.is_quarter_end) == 4 + assert sum(dti.is_year_start) == 0 + assert sum(dti.is_year_end) == 1 + + def test_datetimeindex_accessors3(self): + # Ensure is_start/end accessors throw ValueError for CustomBusinessDay, + bday_egypt = offsets.CustomBusinessDay(weekmask="Sun Mon Tue Wed Thu") + dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt) + msg = "Custom business days is not supported by is_month_start" + with pytest.raises(ValueError, match=msg): + dti.is_month_start + + def test_datetimeindex_accessors4(self): + dti = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"]) + + assert dti.is_month_start[0] == 1 + + def test_datetimeindex_accessors5(self): + freq_m = to_offset("ME") + bm = to_offset("BME") + qfeb = to_offset("QE-FEB") + qsfeb = to_offset("QS-FEB") + bq = to_offset("BQE") + bqs_apr = to_offset("BQS-APR") + as_nov = to_offset("YS-NOV") + + tests = [ + (freq_m.is_month_start(Timestamp("2013-06-01")), 1), + (bm.is_month_start(Timestamp("2013-06-01")), 0), + (freq_m.is_month_start(Timestamp("2013-06-03")), 0), + (bm.is_month_start(Timestamp("2013-06-03")), 1), + (qfeb.is_month_end(Timestamp("2013-02-28")), 1), + (qfeb.is_quarter_end(Timestamp("2013-02-28")), 1), + (qfeb.is_year_end(Timestamp("2013-02-28")), 1), + (qfeb.is_month_start(Timestamp("2013-03-01")), 1), + (qfeb.is_quarter_start(Timestamp("2013-03-01")), 1), + (qfeb.is_year_start(Timestamp("2013-03-01")), 1), + (qsfeb.is_month_end(Timestamp("2013-03-31")), 1), + (qsfeb.is_quarter_end(Timestamp("2013-03-31")), 0), + (qsfeb.is_year_end(Timestamp("2013-03-31")), 0), + (qsfeb.is_month_start(Timestamp("2013-02-01")), 1), + (qsfeb.is_quarter_start(Timestamp("2013-02-01")), 1), + (qsfeb.is_year_start(Timestamp("2013-02-01")), 1), + (bq.is_month_end(Timestamp("2013-06-30")), 0), + (bq.is_quarter_end(Timestamp("2013-06-30")), 0), + (bq.is_year_end(Timestamp("2013-06-30")), 0), + (bq.is_month_end(Timestamp("2013-06-28")), 1), + (bq.is_quarter_end(Timestamp("2013-06-28")), 1), + (bq.is_year_end(Timestamp("2013-06-28")), 0), + (bqs_apr.is_month_end(Timestamp("2013-06-30")), 0), + (bqs_apr.is_quarter_end(Timestamp("2013-06-30")), 0), + (bqs_apr.is_year_end(Timestamp("2013-06-30")), 0), + (bqs_apr.is_month_end(Timestamp("2013-06-28")), 1), + (bqs_apr.is_quarter_end(Timestamp("2013-06-28")), 1), + (bqs_apr.is_year_end(Timestamp("2013-03-29")), 1), + (as_nov.is_year_start(Timestamp("2013-11-01")), 1), + (as_nov.is_year_end(Timestamp("2013-10-31")), 1), + (Timestamp("2012-02-01").days_in_month, 29), + (Timestamp("2013-02-01").days_in_month, 28), + ] + + for ts, value in tests: + assert ts == value + + def test_datetimeindex_accessors6(self): + # GH 6538: Check that DatetimeIndex and its TimeStamp elements + # return the same weekofyear accessor close to new year w/ tz + dates = ["2013/12/29", "2013/12/30", "2013/12/31"] + dates = DatetimeIndex(dates, tz="Europe/Brussels") + expected = [52, 1, 1] + assert dates.isocalendar().week.tolist() == expected + assert [d.weekofyear for d in dates] == expected + + # GH 12806 + # error: Unsupported operand types for + ("List[None]" and "List[str]") + @pytest.mark.parametrize( + "time_locale", + [None] + tm.get_locales(), # type: ignore[operator] + ) + def test_datetime_name_accessors(self, time_locale): + # Test Monday -> Sunday and January -> December, in that sequence + if time_locale is None: + # If the time_locale is None, day-name and month_name should + # return the english attributes + expected_days = [ + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + "Sunday", + ] + expected_months = [ + "January", + "February", + "March", + "April", + "May", + "June", + "July", + "August", + "September", + "October", + "November", + "December", + ] + else: + with tm.set_locale(time_locale, locale.LC_TIME): + expected_days = calendar.day_name[:] + expected_months = calendar.month_name[1:] + + # GH#11128 + dti = date_range(freq="D", start=datetime(1998, 1, 1), periods=365) + english_days = [ + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + "Sunday", + ] + for day, name, eng_name in zip(range(4, 11), expected_days, english_days): + name = name.capitalize() + assert dti.day_name(locale=time_locale)[day] == name + assert dti.day_name(locale=None)[day] == eng_name + ts = Timestamp(datetime(2016, 4, day)) + assert ts.day_name(locale=time_locale) == name + dti = dti.append(DatetimeIndex([pd.NaT])) + assert np.isnan(dti.day_name(locale=time_locale)[-1]) + ts = Timestamp(pd.NaT) + assert np.isnan(ts.day_name(locale=time_locale)) + + # GH#12805 + dti = date_range(freq="ME", start="2012", end="2013") + result = dti.month_name(locale=time_locale) + expected = Index([month.capitalize() for month in expected_months]) + + # work around different normalization schemes + # https://github.com/pandas-dev/pandas/issues/22342 + result = result.str.normalize("NFD") + expected = expected.str.normalize("NFD") + + tm.assert_index_equal(result, expected) + + for date, expected in zip(dti, expected_months): + result = date.month_name(locale=time_locale) + expected = expected.capitalize() + + result = unicodedata.normalize("NFD", result) + expected = unicodedata.normalize("NFD", result) + + assert result == expected + dti = dti.append(DatetimeIndex([pd.NaT])) + assert np.isnan(dti.month_name(locale=time_locale)[-1]) + + def test_nanosecond_field(self): + dti = DatetimeIndex(np.arange(10)) + expected = Index(np.arange(10, dtype=np.int32)) + + tm.assert_index_equal(dti.nanosecond, expected) + + +def test_iter_readonly(): + # GH#28055 ints_to_pydatetime with readonly array + arr = np.array([np.datetime64("2012-02-15T12:00:00.000000000")]) + arr.setflags(write=False) + dti = pd.to_datetime(arr) + list(dti) + + +def test_add_timedelta_preserves_freq(): + # GH#37295 should hold for any DTI with freq=None or Tick freq + # GH#51874 changed this, with tzaware we can no longer retain "D" in addition + tz = "Canada/Eastern" + dti = date_range( + start=Timestamp("2019-03-26 00:00:00-0400", tz=tz), + end=Timestamp("2020-10-17 00:00:00-0400", tz=tz), + freq="D", + ) + result = dti + Timedelta(days=1) + assert result.freq is None diff --git a/pandas/tests/indexes/timedeltas/methods/test_insert.py b/pandas/tests/indexes/timedeltas/methods/test_insert.py index f8164102815f6..cfac475c1f47d 100644 --- a/pandas/tests/indexes/timedeltas/methods/test_insert.py +++ b/pandas/tests/indexes/timedeltas/methods/test_insert.py @@ -136,7 +136,7 @@ def test_insert_empty(self): td = idx[0] result = idx[:0].insert(0, td) - assert result.freq == "D" + assert result.freq == "24h" with pytest.raises(IndexError, match="loc must be an integer between"): result = idx[:0].insert(1, td) diff --git a/pandas/tests/indexes/timedeltas/test_constructors.py b/pandas/tests/indexes/timedeltas/test_constructors.py index 12ac5dd63bd8c..6e7d411aba807 100644 --- a/pandas/tests/indexes/timedeltas/test_constructors.py +++ b/pandas/tests/indexes/timedeltas/test_constructors.py @@ -177,7 +177,7 @@ def test_constructor_coverage(self): # non-conforming freq msg = ( "Inferred frequency None from passed values does not conform to " - "passed frequency D" + "passed frequency 24h" ) with pytest.raises(ValueError, match=msg): TimedeltaIndex(["1 days", "2 days", "4 days"], freq="D") diff --git a/pandas/tests/indexes/timedeltas/test_formats.py b/pandas/tests/indexes/timedeltas/test_formats.py index 607336060cbbc..ebd9757a26af5 100644 --- a/pandas/tests/indexes/timedeltas/test_formats.py +++ b/pandas/tests/indexes/timedeltas/test_formats.py @@ -13,7 +13,7 @@ def test_repr_round_days_non_nano(self): # we should get "1 days", not "1 days 00:00:00" with non-nano tdi = TimedeltaIndex(["1 days"], freq="D").as_unit("s") result = repr(tdi) - expected = "TimedeltaIndex(['1 days'], dtype='timedelta64[s]', freq='D')" + expected = "TimedeltaIndex(['1 days'], dtype='timedelta64[s]', freq='24h')" assert result == expected result2 = repr(Series(tdi)) @@ -28,15 +28,17 @@ def test_representation(self, method): idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D") idx5 = TimedeltaIndex(["1 days 00:00:01", "2 days", "3 days"]) - exp1 = "TimedeltaIndex([], dtype='timedelta64[ns]', freq='D')" + exp1 = "TimedeltaIndex([], dtype='timedelta64[ns]', freq='24h')" - exp2 = "TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', freq='D')" + exp2 = "TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', freq='24h')" - exp3 = "TimedeltaIndex(['1 days', '2 days'], dtype='timedelta64[ns]', freq='D')" + exp3 = ( + "TimedeltaIndex(['1 days', '2 days'], dtype='timedelta64[ns]', freq='24h')" + ) exp4 = ( "TimedeltaIndex(['1 days', '2 days', '3 days'], " - "dtype='timedelta64[ns]', freq='D')" + "dtype='timedelta64[ns]', freq='24h')" ) exp5 = ( @@ -89,13 +91,13 @@ def test_summary(self): idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D") idx5 = TimedeltaIndex(["1 days 00:00:01", "2 days", "3 days"]) - exp1 = "TimedeltaIndex: 0 entries\nFreq: D" + exp1 = "TimedeltaIndex: 0 entries\nFreq: 24h" - exp2 = "TimedeltaIndex: 1 entries, 1 days to 1 days\nFreq: D" + exp2 = "TimedeltaIndex: 1 entries, 1 days to 1 days\nFreq: 24h" - exp3 = "TimedeltaIndex: 2 entries, 1 days to 2 days\nFreq: D" + exp3 = "TimedeltaIndex: 2 entries, 1 days to 2 days\nFreq: 24h" - exp4 = "TimedeltaIndex: 3 entries, 1 days to 3 days\nFreq: D" + exp4 = "TimedeltaIndex: 3 entries, 1 days to 3 days\nFreq: 24h" exp5 = "TimedeltaIndex: 3 entries, 1 days 00:00:01 to 3 days 00:00:00" diff --git a/pandas/tests/indexes/timedeltas/test_freq_attr.py b/pandas/tests/indexes/timedeltas/test_freq_attr.py index 1912c49d3000f..f497595e584cb 100644 --- a/pandas/tests/indexes/timedeltas/test_freq_attr.py +++ b/pandas/tests/indexes/timedeltas/test_freq_attr.py @@ -4,7 +4,6 @@ from pandas.tseries.offsets import ( DateOffset, - Day, Hour, MonthEnd, ) @@ -12,7 +11,7 @@ class TestFreq: @pytest.mark.parametrize("values", [["0 days", "2 days", "4 days"], []]) - @pytest.mark.parametrize("freq", ["2D", Day(2), "48h", Hour(48)]) + @pytest.mark.parametrize("freq", ["48h", Hour(48)]) def test_freq_setter(self, values, freq): # GH#20678 idx = TimedeltaIndex(values) @@ -42,11 +41,11 @@ def test_freq_setter_errors(self): # setting with an incompatible freq msg = ( - "Inferred frequency 2D from passed values does not conform to " - "passed frequency 5D" + "Inferred frequency 48h from passed values does not conform to " + "passed frequency 120h" ) with pytest.raises(ValueError, match=msg): - idx._data.freq = "5D" + idx._data.freq = "120h" # setting with a non-fixed frequency msg = r"<2 \* BusinessDays> is a non-fixed frequency" @@ -61,12 +60,12 @@ def test_freq_view_safe(self): # Setting the freq for one TimedeltaIndex shouldn't alter the freq # for another that views the same data - tdi = TimedeltaIndex(["0 days", "2 days", "4 days"], freq="2D") + tdi = TimedeltaIndex(["0 days", "2 days", "4 days"], freq="48h") tda = tdi._data tdi2 = TimedeltaIndex(tda)._with_freq(None) assert tdi2.freq is None # Original was not altered - assert tdi.freq == "2D" - assert tda.freq == "2D" + assert tdi.freq == "48h" + assert tda.freq == "48h" diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index f6013baf86edc..ed433e42a5e0f 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -11,4 +11,12 @@ def test_infer_freq(self, freq_sample): idx = timedelta_range("1", freq=freq_sample, periods=10) result = TimedeltaIndex(idx.asi8, freq="infer") tm.assert_index_equal(idx, result) - assert result.freq == freq_sample + + if freq_sample == "D": + assert result.freq == "24h" + elif freq_sample == "3D": + assert result.freq == "72h" + elif freq_sample == "-3D": + assert result.freq == "-72h" + else: + assert result.freq == freq_sample diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py index fce10d9176d74..d3a19ac9ea375 100644 --- a/pandas/tests/indexes/timedeltas/test_setops.py +++ b/pandas/tests/indexes/timedeltas/test_setops.py @@ -90,7 +90,7 @@ def test_union_freq_infer(self): result = left.union(right) tm.assert_index_equal(result, tdi) - assert result.freq == "D" + assert result.freq == "24h" def test_intersection_bug_1708(self): index_1 = timedelta_range("1 day", periods=4, freq="h") diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index 1b645e2bc607f..5d74baba3decb 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -31,7 +31,9 @@ def test_timedelta_range(self): result = timedelta_range("0 days", "10 days", freq="D") tm.assert_index_equal(result, expected) - expected = to_timedelta(np.arange(5), unit="D") + Second(2) + Day() + expected = ( + to_timedelta(np.arange(5), unit="D") + Second(2) + Day()._maybe_to_hours() + ) result = timedelta_range("1 days, 00:00:02", "5 days, 00:00:02", freq="D") tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 4d232d5ed1312..0eebe5b7bc336 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -350,9 +350,9 @@ def test_partial_setting2(self): columns=["A", "B", "C", "D"], ) - expected = pd.concat( - [df_orig, DataFrame({"A": 7}, index=dates[-1:] + dates.freq)], sort=True - ) + exp_index = dates[-1:] + dates.freq + exp_index.freq = dates.freq + expected = pd.concat([df_orig, DataFrame({"A": 7}, index=exp_index)], sort=True) df = df_orig.copy() df.loc[dates[-1] + dates.freq, "A"] = 7 tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index f4ea6b1d3f3de..412b527499f25 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -191,7 +191,7 @@ def test_resample_empty_series(freq, index, resample_method): if freq == "ME" and isinstance(ser.index, TimedeltaIndex): msg = ( "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " - "e.g. '24h' or '3D', not " + "e.g. '24h' or '72h', not " ) with pytest.raises(ValueError, match=msg): ser.resample(freq) @@ -269,7 +269,7 @@ def test_resample_count_empty_series(freq, index, resample_method): if freq == "ME" and isinstance(ser.index, TimedeltaIndex): msg = ( "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " - "e.g. '24h' or '3D', not " + "e.g. '24h' or '72h', not " ) with pytest.raises(ValueError, match=msg): ser.resample(freq) @@ -305,7 +305,7 @@ def test_resample_empty_dataframe(index, freq, resample_method): if freq == "ME" and isinstance(df.index, TimedeltaIndex): msg = ( "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " - "e.g. '24h' or '3D', not " + "e.g. '24h' or '72h', not " ) with pytest.raises(ValueError, match=msg): df.resample(freq, group_keys=False) @@ -353,7 +353,7 @@ def test_resample_count_empty_dataframe(freq, index): if freq == "ME" and isinstance(empty_frame_dti.index, TimedeltaIndex): msg = ( "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " - "e.g. '24h' or '3D', not " + "e.g. '24h' or '72h', not " ) with pytest.raises(ValueError, match=msg): empty_frame_dti.resample(freq) @@ -389,7 +389,7 @@ def test_resample_size_empty_dataframe(freq, index): if freq == "ME" and isinstance(empty_frame_dti.index, TimedeltaIndex): msg = ( "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " - "e.g. '24h' or '3D', not " + "e.g. '24h' or '72h', not " ) with pytest.raises(ValueError, match=msg): empty_frame_dti.resample(freq) @@ -461,7 +461,7 @@ def test_apply_to_empty_series(index, freq): if freq == "ME" and isinstance(ser.index, TimedeltaIndex): msg = ( "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " - "e.g. '24h' or '3D', not " + "e.g. '24h' or '72h', not " ) with pytest.raises(ValueError, match=msg): ser.resample(freq) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 7f37ca6831faa..0c097c1cf0777 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -880,7 +880,7 @@ def test_resample_origin_epoch_with_tz_day_vs_24h(unit): result_1 = ts_1.resample("D", origin="epoch").mean() result_2 = ts_1.resample("24h", origin="epoch").mean() - tm.assert_series_equal(result_1, result_2) + tm.assert_series_equal(result_1, result_2, check_freq=False) # check that we have the same behavior with epoch even if we are not timezone aware ts_no_tz = ts_1.tz_localize(None) @@ -1834,9 +1834,17 @@ def test_resample_equivalent_offsets(n1, freq1, n2, freq2, k, unit): dti = date_range("1991-09-05", "1991-09-12", freq=freq1).as_unit(unit) ser = Series(range(len(dti)), index=dti) + if freq2 == "D" and n2 % 1 != 0: + msg = "Invalid frequency: (0.25|0.5|0.75|1.0|1.5)D" + with pytest.raises(ValueError, match=msg): + ser.resample(str(n2_) + freq2) + return + result1 = ser.resample(str(n1_) + freq1).mean() result2 = ser.resample(str(n2_) + freq2).mean() - tm.assert_series_equal(result1, result2) + assert result1.index.freq == str(n1_) + freq1 + assert result2.index.freq == str(n2_) + freq2 + tm.assert_series_equal(result1, result2, check_freq=False) @pytest.mark.parametrize( diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index a4e27ad46c59c..86d73c97c0198 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -907,6 +907,7 @@ def test_resample_with_offset(self, start, end, start_freq, end_freq, offset): result = result.to_timestamp(end_freq) expected = ser.to_timestamp().resample(end_freq, offset=offset).mean() + result.index._data._freq = result.index.freq._maybe_to_hours() tm.assert_series_equal(result, expected) def test_resample_with_offset_month(self): diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index bf1f6bd34b171..5f977f5ebaa60 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -748,7 +748,7 @@ def test_resample_agg_readonly(): arr.setflags(write=False) ser = Series(arr, index=index) - rs = ser.resample("1D") + rs = ser.resample("24h") expected = Series([pd.Timestamp(0), pd.Timestamp(0)], index=index[::24]) diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py index 309810b656ed3..f400fddcd33df 100644 --- a/pandas/tests/resample/test_timedelta.py +++ b/pandas/tests/resample/test_timedelta.py @@ -215,5 +215,5 @@ def test_arrow_duration_resample(): # GH 56371 idx = pd.Index(timedelta_range("1 day", periods=5), dtype="duration[ns][pyarrow]") expected = Series(np.arange(5, dtype=np.float64), index=idx) - result = expected.resample("1D").mean() + result = expected.resample("24h").mean() tm.assert_series_equal(result, expected) diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 49bd48b40e67a..f5ab43d2c1d1e 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -571,7 +571,7 @@ def test_period_cons_combined(self): with pytest.raises(ValueError, match=msg): Period(ordinal=1, freq="-1h1D") - msg = "Frequency must be positive, because it represents span: 0D" + msg = "Frequency must be positive, because it represents span: 0h" with pytest.raises(ValueError, match=msg): Period("2011-01", freq="0D0h") with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index 5509216f4daf4..e0e6e4fae66bb 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -245,7 +245,13 @@ def test_from_tick_reso(): assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value tick = offsets.Day() - assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value + msg = ( + "Value must be Timedelta, string, integer, float, timedelta or " + "convertible, not Day" + ) + with pytest.raises(ValueError, match=msg): + # TODO: should be TypeError? + Timedelta(tick) def test_construction(): diff --git a/pandas/tests/scalar/timestamp/methods/test_round.py b/pandas/tests/scalar/timestamp/methods/test_round.py index 2fb0e1a8d3397..3d3bf82cb23cf 100644 --- a/pandas/tests/scalar/timestamp/methods/test_round.py +++ b/pandas/tests/scalar/timestamp/methods/test_round.py @@ -246,7 +246,7 @@ def test_round_int64(self, timestamp, freq): # check that all rounding modes are accurate to int64 precision # see GH#22591 dt = Timestamp(timestamp).as_unit("ns") - unit = to_offset(freq).nanos + unit = to_offset(freq)._maybe_to_hours().nanos # test floor result = dt.floor(freq) diff --git a/pandas/tests/tseries/frequencies/test_freq_code.py b/pandas/tests/tseries/frequencies/test_freq_code.py index 16b7190753ee2..e854ba28fa285 100644 --- a/pandas/tests/tseries/frequencies/test_freq_code.py +++ b/pandas/tests/tseries/frequencies/test_freq_code.py @@ -28,7 +28,6 @@ def test_get_to_timestamp_base(freqstr, exp_freqstr): ((1.04, "h"), (3744, "s")), ((1, "D"), (1, "D")), ((0.342931, "h"), (1234551600, "us")), - ((1.2345, "D"), (106660800, "ms")), ], ) def test_resolution_bumping(args, expected): diff --git a/pandas/tests/tseries/offsets/test_dst.py b/pandas/tests/tseries/offsets/test_dst.py index 8ff80536fc69e..eca3ed97b398d 100644 --- a/pandas/tests/tseries/offsets/test_dst.py +++ b/pandas/tests/tseries/offsets/test_dst.py @@ -209,7 +209,7 @@ def test_springforward_singular(self, performance_warning): QuarterEnd: ["11/2/2012", "12/31/2012"], BQuarterBegin: ["11/2/2012", "12/3/2012"], BQuarterEnd: ["11/2/2012", "12/31/2012"], - Day: ["11/4/2012", "11/4/2012 23:00"], + Day: ["11/4/2012", "11/5/2012"], }.items() @pytest.mark.parametrize("tup", offset_classes) diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 1e5bfa6033216..86a65357d9731 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -47,6 +47,7 @@ CustomBusinessMonthBegin, CustomBusinessMonthEnd, DateOffset, + Day, Easter, FY5253Quarter, LastWeekOfMonth, @@ -240,7 +241,7 @@ def test_offset_freqstr(self, offset_types): assert offset.rule_code == code def _check_offsetfunc_works(self, offset, funcname, dt, expected, normalize=False): - if normalize and issubclass(offset, Tick): + if normalize and issubclass(offset, (Tick, Day)): # normalize=True disallowed for Tick subclasses GH#21427 return @@ -452,7 +453,7 @@ def test_is_on_offset(self, offset_types, expecteds): assert offset_s.is_on_offset(dt) # when normalize=True, is_on_offset checks time is 00:00:00 - if issubclass(offset_types, Tick): + if issubclass(offset_types, (Tick, Day)): # normalize=True disallowed for Tick subclasses GH#21427 return offset_n = _create_offset(offset_types, normalize=True) @@ -484,7 +485,7 @@ def test_add(self, offset_types, tz_naive_fixture, expecteds): assert result == expected_localize # normalize=True, disallowed for Tick subclasses GH#21427 - if issubclass(offset_types, Tick): + if issubclass(offset_types, (Tick, Day)): return offset_s = _create_offset(offset_types, normalize=True) expected = Timestamp(expected.date()) diff --git a/pandas/tests/tseries/offsets/test_ticks.py b/pandas/tests/tseries/offsets/test_ticks.py index f91230e1460c4..bd4888735269d 100644 --- a/pandas/tests/tseries/offsets/test_ticks.py +++ b/pandas/tests/tseries/offsets/test_ticks.py @@ -54,7 +54,7 @@ def test_delta_to_tick(): delta = timedelta(3) tick = delta_to_tick(delta) - assert tick == offsets.Day(3) + assert tick == offsets.Hour(72) td = Timedelta(nanoseconds=5) tick = delta_to_tick(td) diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py index 42d055326c2a5..89835ff4b7694 100644 --- a/pandas/tests/tslibs/test_api.py +++ b/pandas/tests/tslibs/test_api.py @@ -29,6 +29,7 @@ def test_namespace(): "NaTType", "iNaT", "nat_strings", + "Day", "OutOfBoundsDatetime", "OutOfBoundsTimedelta", "Period", diff --git a/pandas/tests/tslibs/test_to_offset.py b/pandas/tests/tslibs/test_to_offset.py index 07bdfca8f2f2d..0c68276c51335 100644 --- a/pandas/tests/tslibs/test_to_offset.py +++ b/pandas/tests/tslibs/test_to_offset.py @@ -141,7 +141,7 @@ def test_to_offset_leading_plus(freqstr, expected): ({"days": -1, "seconds": 1}, offsets.Second(-86399)), ({"hours": 1, "minutes": 10}, offsets.Minute(70)), ({"hours": 1, "minutes": -10}, offsets.Minute(50)), - ({"weeks": 1}, offsets.Day(7)), + ({"weeks": 1}, offsets.Hour(168)), ({"hours": 1}, offsets.Hour(1)), ({"hours": 1}, to_offset("60min")), ({"microseconds": 1}, offsets.Micro(1)),