Skip to content

Commit 5ef20ae

Browse files
committed
CLN: refactor core/arrays/_range to support timedeltas
1 parent e35188e commit 5ef20ae

File tree

3 files changed

+70
-70
lines changed

3 files changed

+70
-70
lines changed

pandas/core/arrays/_ranges.py

+66-44
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@
77

88
import numpy as np
99

10-
from pandas._libs.tslibs import OutOfBoundsDatetime, Timestamp
10+
from pandas._libs.tslibs import OutOfBoundsDatetime, Timedelta, Timestamp
1111

1212
from pandas.tseries.offsets import DateOffset, Tick, generate_range
1313

1414

15-
def generate_regular_range(
15+
def generate_timestamps_range(
1616
start: Timestamp, end: Timestamp, periods: int, freq: DateOffset
1717
) -> Tuple[np.ndarray, str]:
1818
"""
@@ -32,57 +32,79 @@ def generate_regular_range(
3232
3333
Returns
3434
-------
35-
ndarray[np.int64] representing nanosecond unix timestamps
35+
(tuple): containing:
36+
37+
values : ndarray[np.int64] representing nanosecond unix timestamps
38+
tz : the timezone of the range
3639
"""
3740
if isinstance(freq, Tick):
38-
stride = freq.nanos
39-
if periods is None:
40-
b = Timestamp(start).value
41-
# cannot just use e = Timestamp(end) + 1 because arange breaks when
42-
# stride is too large, see GH10887
43-
e = b + (Timestamp(end).value - b) // stride * stride + stride // 2 + 1
44-
# end.tz == start.tz by this point due to _generate implementation
45-
tz = start.tz
46-
elif start is not None:
47-
b = Timestamp(start).value
48-
e = _generate_range_overflow_safe(b, periods, stride, side="start")
49-
tz = start.tz
50-
elif end is not None:
51-
e = Timestamp(end).value + stride
52-
b = _generate_range_overflow_safe(e, periods, stride, side="end")
53-
tz = end.tz
54-
else:
55-
raise ValueError(
56-
"at least 'start' or 'end' should be specified "
57-
"if a 'period' is given."
58-
)
59-
60-
with np.errstate(over="raise"):
61-
# If the range is sufficiently large, np.arange may overflow
62-
# and incorrectly return an empty array if not caught.
63-
try:
64-
values = np.arange(b, e, stride, dtype=np.int64)
65-
except FloatingPointError:
66-
xdr = [b]
67-
while xdr[-1] != e:
68-
xdr.append(xdr[-1] + stride)
69-
values = np.array(xdr[:-1], dtype=np.int64)
70-
41+
start_value = Timestamp(start).value if start is not None else None
42+
end_value = Timestamp(end).value if end is not None else None
43+
values = _generate_regular_range(start_value, end_value, periods, freq.nanos)
7144
else:
72-
tz = None
73-
# start and end should have the same timezone by this point
74-
if start is not None:
75-
tz = start.tz
76-
elif end is not None:
77-
tz = end.tz
78-
7945
xdr = generate_range(start=start, end=end, periods=periods, offset=freq)
80-
8146
values = np.array([x.value for x in xdr], dtype=np.int64)
8247

48+
tz = start.tz if start is not None else end.tz
8349
return values, tz
8450

8551

52+
def generate_timedeltas_range(
53+
start: Timedelta, end: Timedelta, periods: int, freq: DateOffset
54+
):
55+
"""
56+
Generate a range of dates with the spans between dates described by
57+
the given `freq` DateOffset.
58+
59+
Parameters
60+
----------
61+
start : Timedelta or None
62+
first point of produced date range
63+
end : Timedelta or None
64+
last point of produced date range
65+
periods : int
66+
number of periods in produced date range
67+
freq : DateOffset
68+
describes space between dates in produced date range
69+
70+
Returns
71+
-------
72+
ndarray[np.int64] representing nanosecond timedeltas
73+
"""
74+
start_value = Timedelta(start).value if start is not None else None
75+
end_value = Timedelta(end).value if end is not None else None
76+
return _generate_regular_range(start_value, end_value, periods, freq.nanos)
77+
78+
79+
def _generate_regular_range(start: int, end: int, periods: int, stride: int):
80+
b = start
81+
if periods is None:
82+
# cannot just use e = Timestamp(end) + 1 because arange breaks when
83+
# stride is too large, see GH10887
84+
e = b + (end - b) // stride * stride + stride // 2 + 1
85+
elif start is not None:
86+
e = _generate_range_overflow_safe(b, periods, stride, side="start")
87+
elif end is not None:
88+
e = end + stride
89+
b = _generate_range_overflow_safe(e, periods, stride, side="end")
90+
else:
91+
raise ValueError(
92+
"at least 'start' or 'end' should be specified if a 'period' is given."
93+
)
94+
95+
with np.errstate(over="raise"):
96+
# If the range is sufficiently large, np.arange may overflow
97+
# and incorrectly return an empty array if not caught.
98+
try:
99+
values = np.arange(b, e, stride, dtype=np.int64)
100+
except FloatingPointError:
101+
xdr = [b]
102+
while xdr[-1] != e:
103+
xdr.append(xdr[-1] + stride)
104+
values = np.array(xdr[:-1], dtype=np.int64)
105+
return values
106+
107+
86108
def _generate_range_overflow_safe(
87109
endpoint: int, periods: int, stride: int, side: str = "start"
88110
) -> int:

pandas/core/arrays/datetimes.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444

4545
from pandas.core.algorithms import checked_add_with_arr
4646
from pandas.core.arrays import datetimelike as dtl
47-
from pandas.core.arrays._ranges import generate_regular_range
47+
from pandas.core.arrays._ranges import generate_timestamps_range
4848
import pandas.core.common as com
4949

5050
from pandas.tseries.frequencies import get_period_alias, to_offset
@@ -404,7 +404,7 @@ def _generate_range(
404404
if end is not None:
405405
end = end.tz_localize(None)
406406
# TODO: consider re-implementing _cached_range; GH#17914
407-
values, _tz = generate_regular_range(start, end, periods, freq)
407+
values, _tz = generate_timestamps_range(start, end, periods, freq)
408408
index = cls._simple_new(values, freq=freq, dtype=tz_to_dtype(_tz))
409409

410410
if tz is not None and index.tz is None:

pandas/core/arrays/timedeltas.py

+2-24
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
from pandas.core import nanops
3939
from pandas.core.algorithms import checked_add_with_arr
4040
from pandas.core.arrays import datetimelike as dtl
41-
from pandas.core.arrays._ranges import _generate_range_overflow_safe
41+
from pandas.core.arrays._ranges import generate_timedeltas_range
4242
import pandas.core.common as com
4343
from pandas.core.construction import extract_array
4444

@@ -256,7 +256,7 @@ def _generate_range(cls, start, end, periods, freq, closed=None):
256256
left_closed, right_closed = dtl.validate_endpoints(closed)
257257

258258
if freq is not None:
259-
index = _generate_regular_range(start, end, periods, freq)
259+
index = generate_timedeltas_range(start, end, periods, freq)
260260
else:
261261
index = np.linspace(start.value, end.value, periods).astype("i8")
262262
if len(index) >= 2:
@@ -1056,25 +1056,3 @@ def _validate_td64_dtype(dtype):
10561056
raise ValueError(f"dtype {dtype} cannot be converted to timedelta64[ns]")
10571057

10581058
return dtype
1059-
1060-
1061-
def _generate_regular_range(start, end, periods, offset):
1062-
stride = offset.nanos
1063-
if periods is None:
1064-
b = Timedelta(start).value
1065-
# cannot just use e = Timestamp(end) + 1 because arange breaks when
1066-
# stride is too large, see GH 10887 & GH 30353
1067-
e = b + (Timedelta(end).value - b) // stride * stride + stride // 2 + 1
1068-
elif start is not None:
1069-
b = Timedelta(start).value
1070-
e = _generate_range_overflow_safe(b, periods, stride, side="start")
1071-
elif end is not None:
1072-
e = Timedelta(end).value + stride
1073-
b = _generate_range_overflow_safe(e, periods, stride, side="end")
1074-
else:
1075-
raise ValueError(
1076-
"at least 'start' or 'end' should be specified if a 'period' is given."
1077-
)
1078-
1079-
data = np.arange(b, e, stride, dtype=np.int64)
1080-
return data

0 commit comments

Comments
 (0)