Skip to content

Commit 0199d58

Browse files
authored
REF: de-duplicate freq pinning/validation (pandas-dev#55987)
* REF: de-duplicate freq pinning/validation * simplify, mypy fixup * update docstring
1 parent 120db44 commit 0199d58

File tree

3 files changed

+71
-73
lines changed

3 files changed

+71
-73
lines changed

pandas/core/arrays/datetimelike.py

+42-10
Original file line numberDiff line numberDiff line change
@@ -1938,7 +1938,7 @@ def __init__(
19381938
freq = values.freq
19391939
elif freq and values.freq:
19401940
freq = to_offset(freq)
1941-
freq, _ = validate_inferred_freq(freq, values.freq, False)
1941+
freq = _validate_inferred_freq(freq, values.freq)
19421942

19431943
if dtype is not None and dtype != values.dtype:
19441944
# TODO: we only have tests for this for DTA, not TDA (2022-07-01)
@@ -2025,6 +2025,39 @@ def freq(self, value) -> None:
20252025

20262026
self._freq = value
20272027

2028+
@final
2029+
def _maybe_pin_freq(self, freq, validate_kwds: dict):
2030+
"""
2031+
Constructor helper to pin the appropriate `freq` attribute. Assumes
2032+
that self._freq is currently set to any freq inferred in
2033+
_from_sequence_not_strict.
2034+
"""
2035+
if freq is None:
2036+
# user explicitly passed None -> override any inferred_freq
2037+
self._freq = None
2038+
elif freq == "infer":
2039+
# if self._freq is *not* None then we already inferred a freq
2040+
# and there is nothing left to do
2041+
if self._freq is None:
2042+
# Set _freq directly to bypass duplicative _validate_frequency
2043+
# check.
2044+
self._freq = to_offset(self.inferred_freq)
2045+
elif freq is lib.no_default:
2046+
# user did not specify anything, keep inferred freq if the original
2047+
# data had one, otherwise do nothing
2048+
pass
2049+
elif self._freq is None:
2050+
# We cannot inherit a freq from the data, so we need to validate
2051+
# the user-passed freq
2052+
freq = to_offset(freq)
2053+
type(self)._validate_frequency(self, freq, **validate_kwds)
2054+
self._freq = freq
2055+
else:
2056+
# Otherwise we just need to check that the user-passed freq
2057+
# doesn't conflict with the one we already have.
2058+
freq = to_offset(freq)
2059+
_validate_inferred_freq(freq, self._freq)
2060+
20282061
@final
20292062
@classmethod
20302063
def _validate_frequency(cls, index, freq: BaseOffset, **kwargs):
@@ -2353,7 +2386,9 @@ def _is_dates_only(self) -> bool:
23532386
# Shared Constructor Helpers
23542387

23552388

2356-
def ensure_arraylike_for_datetimelike(data, copy: bool, cls_name: str):
2389+
def ensure_arraylike_for_datetimelike(
2390+
data, copy: bool, cls_name: str
2391+
) -> tuple[ArrayLike, bool]:
23572392
if not hasattr(data, "dtype"):
23582393
# e.g. list, tuple
23592394
if not isinstance(data, (list, tuple)) and np.ndim(data) == 0:
@@ -2426,9 +2461,9 @@ def validate_periods(periods: int | float | None) -> int | None:
24262461
return periods
24272462

24282463

2429-
def validate_inferred_freq(
2430-
freq, inferred_freq, freq_infer
2431-
) -> tuple[BaseOffset | None, bool]:
2464+
def _validate_inferred_freq(
2465+
freq: BaseOffset | None, inferred_freq: BaseOffset | None
2466+
) -> BaseOffset | None:
24322467
"""
24332468
If the user passes a freq and another freq is inferred from passed data,
24342469
require that they match.
@@ -2437,12 +2472,10 @@ def validate_inferred_freq(
24372472
----------
24382473
freq : DateOffset or None
24392474
inferred_freq : DateOffset or None
2440-
freq_infer : bool
24412475
24422476
Returns
24432477
-------
24442478
freq : DateOffset or None
2445-
freq_infer : bool
24462479
24472480
Notes
24482481
-----
@@ -2458,12 +2491,11 @@ def validate_inferred_freq(
24582491
)
24592492
if freq is None:
24602493
freq = inferred_freq
2461-
freq_infer = False
24622494

2463-
return freq, freq_infer
2495+
return freq
24642496

24652497

2466-
def maybe_infer_freq(freq):
2498+
def maybe_infer_freq(freq) -> tuple[BaseOffset | None, bool]:
24672499
"""
24682500
Comparing a DateOffset to the string "infer" raises, so we need to
24692501
be careful about comparisons. Make a dummy variable `freq_infer` to

pandas/core/arrays/datetimes.py

+24-39
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
from collections.abc import Iterator
7777

7878
from pandas._typing import (
79+
ArrayLike,
7980
DateTimeErrorChoices,
8081
DtypeObj,
8182
IntervalClosedType,
@@ -327,13 +328,10 @@ def _from_sequence_not_strict(
327328
dayfirst: bool = False,
328329
yearfirst: bool = False,
329330
ambiguous: TimeAmbiguous = "raise",
330-
):
331+
) -> Self:
331332
"""
332333
A non-strict version of _from_sequence, called from DatetimeIndex.__new__.
333334
"""
334-
explicit_none = freq is None
335-
freq = freq if freq is not lib.no_default else None
336-
freq, freq_infer = dtl.maybe_infer_freq(freq)
337335

338336
# if the user either explicitly passes tz=None or a tz-naive dtype, we
339337
# disallows inferring a tz.
@@ -349,13 +347,16 @@ def _from_sequence_not_strict(
349347

350348
unit = None
351349
if dtype is not None:
352-
if isinstance(dtype, np.dtype):
353-
unit = np.datetime_data(dtype)[0]
354-
else:
355-
# DatetimeTZDtype
356-
unit = dtype.unit
350+
unit = dtl.dtype_to_unit(dtype)
351+
352+
data, copy = dtl.ensure_arraylike_for_datetimelike(
353+
data, copy, cls_name="DatetimeArray"
354+
)
355+
inferred_freq = None
356+
if isinstance(data, DatetimeArray):
357+
inferred_freq = data.freq
357358

358-
subarr, tz, inferred_freq = _sequence_to_dt64(
359+
subarr, tz = _sequence_to_dt64(
359360
data,
360361
copy=copy,
361362
tz=tz,
@@ -372,26 +373,15 @@ def _from_sequence_not_strict(
372373
"Use obj.tz_localize(None) instead."
373374
)
374375

375-
freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer)
376-
if explicit_none:
377-
freq = None
378-
379376
data_unit = np.datetime_data(subarr.dtype)[0]
380377
data_dtype = tz_to_dtype(tz, data_unit)
381-
result = cls._simple_new(subarr, freq=freq, dtype=data_dtype)
378+
result = cls._simple_new(subarr, freq=inferred_freq, dtype=data_dtype)
382379
if unit is not None and unit != result.unit:
383380
# If unit was specified in user-passed dtype, cast to it here
384381
result = result.as_unit(unit)
385382

386-
if inferred_freq is None and freq is not None:
387-
# this condition precludes `freq_infer`
388-
cls._validate_frequency(result, freq, ambiguous=ambiguous)
389-
390-
elif freq_infer:
391-
# Set _freq directly to bypass duplicative _validate_frequency
392-
# check.
393-
result._freq = to_offset(result.inferred_freq)
394-
383+
validate_kwds = {"ambiguous": ambiguous}
384+
result._maybe_pin_freq(freq, validate_kwds)
395385
return result
396386

397387
# error: Signature of "_generate_range" incompatible with supertype
@@ -2180,7 +2170,7 @@ def std(
21802170

21812171

21822172
def _sequence_to_dt64(
2183-
data,
2173+
data: ArrayLike,
21842174
*,
21852175
copy: bool = False,
21862176
tz: tzinfo | None = None,
@@ -2192,7 +2182,8 @@ def _sequence_to_dt64(
21922182
"""
21932183
Parameters
21942184
----------
2195-
data : list-like
2185+
data : np.ndarray or ExtensionArray
2186+
dtl.ensure_arraylike_for_datetimelike has already been called.
21962187
copy : bool, default False
21972188
tz : tzinfo or None, default None
21982189
dayfirst : bool, default False
@@ -2209,21 +2200,11 @@ def _sequence_to_dt64(
22092200
Where `unit` is "ns" unless specified otherwise by `out_unit`.
22102201
tz : tzinfo or None
22112202
Either the user-provided tzinfo or one inferred from the data.
2212-
inferred_freq : Tick or None
2213-
The inferred frequency of the sequence.
22142203
22152204
Raises
22162205
------
22172206
TypeError : PeriodDType data is passed
22182207
"""
2219-
inferred_freq = None
2220-
2221-
data, copy = dtl.ensure_arraylike_for_datetimelike(
2222-
data, copy, cls_name="DatetimeArray"
2223-
)
2224-
2225-
if isinstance(data, DatetimeArray):
2226-
inferred_freq = data.freq
22272208

22282209
# By this point we are assured to have either a numpy array or Index
22292210
data, copy = maybe_convert_dtype(data, copy, tz=tz)
@@ -2236,6 +2217,7 @@ def _sequence_to_dt64(
22362217
if data_dtype == object or is_string_dtype(data_dtype):
22372218
# TODO: We do not have tests specific to string-dtypes,
22382219
# also complex or categorical or other extension
2220+
data = cast(np.ndarray, data)
22392221
copy = False
22402222
if lib.infer_dtype(data, skipna=False) == "integer":
22412223
data = data.astype(np.int64)
@@ -2248,7 +2230,7 @@ def _sequence_to_dt64(
22482230
yearfirst=yearfirst,
22492231
creso=abbrev_to_npy_unit(out_unit),
22502232
)
2251-
return result, tz, None
2233+
return result, tz
22522234
else:
22532235
converted, inferred_tz = objects_to_datetime64(
22542236
data,
@@ -2273,14 +2255,15 @@ def _sequence_to_dt64(
22732255
result, _ = _construct_from_dt64_naive(
22742256
converted, tz=tz, copy=copy, ambiguous=ambiguous
22752257
)
2276-
return result, tz, None
2258+
return result, tz
22772259

22782260
data_dtype = data.dtype
22792261

22802262
# `data` may have originally been a Categorical[datetime64[ns, tz]],
22812263
# so we need to handle these types.
22822264
if isinstance(data_dtype, DatetimeTZDtype):
22832265
# DatetimeArray -> ndarray
2266+
data = cast(DatetimeArray, data)
22842267
tz = _maybe_infer_tz(tz, data.tz)
22852268
result = data._ndarray
22862269

@@ -2289,6 +2272,7 @@ def _sequence_to_dt64(
22892272
if isinstance(data, DatetimeArray):
22902273
data = data._ndarray
22912274

2275+
data = cast(np.ndarray, data)
22922276
result, copy = _construct_from_dt64_naive(
22932277
data, tz=tz, copy=copy, ambiguous=ambiguous
22942278
)
@@ -2299,6 +2283,7 @@ def _sequence_to_dt64(
22992283
if data.dtype != INT64_DTYPE:
23002284
data = data.astype(np.int64, copy=False)
23012285
copy = False
2286+
data = cast(np.ndarray, data)
23022287
result = data.view(out_dtype)
23032288

23042289
if copy:
@@ -2308,7 +2293,7 @@ def _sequence_to_dt64(
23082293
assert result.dtype.kind == "M"
23092294
assert result.dtype != "M8"
23102295
assert is_supported_unit(get_unit_from_dtype(result.dtype))
2311-
return result, tz, inferred_freq
2296+
return result, tz
23122297

23132298

23142299
def _construct_from_dt64_naive(

pandas/core/arrays/timedeltas.py

+5-24
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
is_supported_unit,
2727
npy_unit_to_abbrev,
2828
periods_per_second,
29-
to_offset,
3029
)
3130
from pandas._libs.tslibs.conversion import precision_from_unit
3231
from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit
@@ -236,9 +235,7 @@ def _from_sequence(cls, data, *, dtype=None, copy: bool = False) -> Self:
236235
if dtype:
237236
dtype = _validate_td64_dtype(dtype)
238237

239-
data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=None)
240-
freq, _ = dtl.validate_inferred_freq(None, inferred_freq, False)
241-
freq = cast("Tick | None", freq)
238+
data, freq = sequence_to_td64ns(data, copy=copy, unit=None)
242239

243240
if dtype is not None:
244241
data = astype_overflowsafe(data, dtype=dtype, copy=False)
@@ -256,38 +253,22 @@ def _from_sequence_not_strict(
256253
unit=None,
257254
) -> Self:
258255
"""
259-
A non-strict version of _from_sequence, called from TimedeltaIndex.__new__.
256+
_from_sequence_not_strict but without responsibility for finding the
257+
result's `freq`.
260258
"""
261259
if dtype:
262260
dtype = _validate_td64_dtype(dtype)
263261

264262
assert unit not in ["Y", "y", "M"] # caller is responsible for checking
265263

266-
explicit_none = freq is None
267-
freq = freq if freq is not lib.no_default else None
268-
269-
freq, freq_infer = dtl.maybe_infer_freq(freq)
270-
271264
data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit)
272-
freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer)
273-
freq = cast("Tick | None", freq)
274-
if explicit_none:
275-
freq = None
276265

277266
if dtype is not None:
278267
data = astype_overflowsafe(data, dtype=dtype, copy=False)
279268

280-
result = cls._simple_new(data, dtype=data.dtype, freq=freq)
281-
282-
if inferred_freq is None and freq is not None:
283-
# this condition precludes `freq_infer`
284-
cls._validate_frequency(result, freq)
285-
286-
elif freq_infer:
287-
# Set _freq directly to bypass duplicative _validate_frequency
288-
# check.
289-
result._freq = to_offset(result.inferred_freq)
269+
result = cls._simple_new(data, dtype=data.dtype, freq=inferred_freq)
290270

271+
result._maybe_pin_freq(freq, {})
291272
return result
292273

293274
# Signature of "_generate_range" incompatible with supertype

0 commit comments

Comments
 (0)