Skip to content

Commit 648ca95

Browse files
jschendeljreback
authored andcommitted
ENH: Implement linspace behavior for timedelta_range/interval_range (pandas-dev#21009)
1 parent 0e00151 commit 648ca95

File tree

7 files changed

+257
-200
lines changed

7 files changed

+257
-200
lines changed

doc/source/whatsnew/v0.23.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -526,7 +526,7 @@ Other Enhancements
526526
- Added new writer for exporting Stata dta files in version 117, ``StataWriter117``. This format supports exporting strings with lengths up to 2,000,000 characters (:issue:`16450`)
527527
- :func:`to_hdf` and :func:`read_hdf` now accept an ``errors`` keyword argument to control encoding error handling (:issue:`20835`)
528528
- :func:`cut` has gained the ``duplicates='raise'|'drop'`` option to control whether to raise on duplicated edges (:issue:`20947`)
529-
- :func:`date_range` now returns a linearly spaced ``DatetimeIndex`` if ``start``, ``stop``, and ``periods`` are specified, but ``freq`` is not. (:issue:`20808`, :issue:`20983`)
529+
- :func:`date_range`, :func:`timedelta_range`, and :func:`interval_range` now return a linearly spaced index if ``start``, ``stop``, and ``periods`` are specified, but ``freq`` is not. (:issue:`20808`, :issue:`20983`, :issue:`20976`)
530530

531531
.. _whatsnew_0230.api_breaking:
532532

pandas/core/indexes/datetimes.py

+25-15
Original file line numberDiff line numberDiff line change
@@ -358,11 +358,6 @@ def __new__(cls, data=None,
358358
msg = 'periods must be a number, got {periods}'
359359
raise TypeError(msg.format(periods=periods))
360360

361-
if data is None and freq is None \
362-
and com._any_none(periods, start, end):
363-
raise ValueError("Must provide freq argument if no data is "
364-
"supplied")
365-
366361
# if dtype has an embedded tz, capture it
367362
if dtype is not None:
368363
try:
@@ -377,9 +372,13 @@ def __new__(cls, data=None,
377372
pass
378373

379374
if data is None:
380-
return cls._generate(start, end, periods, name, freq,
381-
tz=tz, normalize=normalize, closed=closed,
382-
ambiguous=ambiguous)
375+
if freq is None and com._any_none(periods, start, end):
376+
msg = 'Must provide freq argument if no data is supplied'
377+
raise ValueError(msg)
378+
else:
379+
return cls._generate(start, end, periods, name, freq, tz=tz,
380+
normalize=normalize, closed=closed,
381+
ambiguous=ambiguous)
383382

384383
if not isinstance(data, (np.ndarray, Index, ABCSeries)):
385384
if is_scalar(data):
@@ -2590,11 +2589,6 @@ def date_range(start=None, end=None, periods=None, freq=None, tz=None,
25902589
"""
25912590
Return a fixed frequency DatetimeIndex.
25922591
2593-
Of the three parameters `start`, `end`, `periods`, and `freq` exactly
2594-
three must be specified. If `freq` is omitted, the resulting DatetimeIndex
2595-
will have `periods` linearly spaced elements between `start` and `end`
2596-
(closed on both sides).
2597-
25982592
Parameters
25992593
----------
26002594
start : str or datetime-like, optional
@@ -2628,9 +2622,20 @@ def date_range(start=None, end=None, periods=None, freq=None, tz=None,
26282622
See Also
26292623
--------
26302624
pandas.DatetimeIndex : An immutable container for datetimes.
2625+
pandas.timedelta_range : Return a fixed frequency TimedeltaIndex.
26312626
pandas.period_range : Return a fixed frequency PeriodIndex.
26322627
pandas.interval_range : Return a fixed frequency IntervalIndex.
26332628
2629+
Notes
2630+
-----
2631+
Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
2632+
exactly three must be specified. If ``freq`` is omitted, the resulting
2633+
``DatetimeIndex`` will have ``periods`` linearly spaced elements between
2634+
``start`` and ``end`` (closed on both sides).
2635+
2636+
To learn more about the frequency strings, please see `this link
2637+
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
2638+
26342639
Examples
26352640
--------
26362641
**Specifying the values**
@@ -2769,8 +2774,10 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
27692774
27702775
Notes
27712776
-----
2772-
Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
2773-
must be specified.
2777+
Of the four parameters: ``start``, ``end``, ``periods``, and ``freq``,
2778+
exactly three must be specified. Specifying ``freq`` is a requirement
2779+
for ``bdate_range``. Use ``date_range`` if specifying ``freq`` is not
2780+
desired.
27742781
27752782
To learn more about the frequency strings, please see `this link
27762783
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
@@ -2779,6 +2786,9 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
27792786
-------
27802787
rng : DatetimeIndex
27812788
"""
2789+
if freq is None:
2790+
msg = 'freq must be specified for bdate_range; use date_range instead'
2791+
raise TypeError(msg)
27822792

27832793
if is_string_like(freq) and freq.startswith('C'):
27842794
try:

pandas/core/indexes/interval.py

+53-32
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
from pandas.core.dtypes.missing import notna, isna
77
from pandas.core.dtypes.generic import ABCDatetimeIndex, ABCPeriodIndex
88
from pandas.core.dtypes.dtypes import IntervalDtype
9-
from pandas.core.dtypes.cast import maybe_convert_platform, find_common_type
9+
from pandas.core.dtypes.cast import (
10+
maybe_convert_platform, find_common_type, maybe_downcast_to_dtype)
1011
from pandas.core.dtypes.common import (
1112
_ensure_platform_int,
1213
is_list_like,
@@ -1465,8 +1466,13 @@ def interval_range(start=None, end=None, periods=None, freq=None,
14651466
14661467
Notes
14671468
-----
1468-
Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
1469-
must be specified.
1469+
Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
1470+
exactly three must be specified. If ``freq`` is omitted, the resulting
1471+
``IntervalIndex`` will have ``periods`` linearly spaced elements between
1472+
``start`` and ``end``, inclusively.
1473+
1474+
To learn more about datetime-like frequency strings, please see `this link
1475+
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
14701476
14711477
Returns
14721478
-------
@@ -1505,6 +1511,14 @@ def interval_range(start=None, end=None, periods=None, freq=None,
15051511
(2017-03-01, 2017-04-01]]
15061512
closed='right', dtype='interval[datetime64[ns]]')
15071513
1514+
Specify ``start``, ``end``, and ``periods``; the frequency is generated
1515+
automatically (linearly spaced).
1516+
1517+
>>> pd.interval_range(start=0, end=6, periods=4)
1518+
IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]]
1519+
closed='right',
1520+
dtype='interval[float64]')
1521+
15081522
The ``closed`` parameter specifies which endpoints of the individual
15091523
intervals within the ``IntervalIndex`` are closed.
15101524
@@ -1516,19 +1530,21 @@ def interval_range(start=None, end=None, periods=None, freq=None,
15161530
--------
15171531
IntervalIndex : an Index of intervals that are all closed on the same side.
15181532
"""
1519-
if com._count_not_none(start, end, periods) != 2:
1520-
raise ValueError('Of the three parameters: start, end, and periods, '
1521-
'exactly two must be specified')
1522-
15231533
start = com._maybe_box_datetimelike(start)
15241534
end = com._maybe_box_datetimelike(end)
1525-
endpoint = next(com._not_none(start, end))
1535+
endpoint = start if start is not None else end
1536+
1537+
if freq is None and com._any_none(periods, start, end):
1538+
freq = 1 if is_number(endpoint) else 'D'
1539+
1540+
if com._count_not_none(start, end, periods, freq) != 3:
1541+
raise ValueError('Of the four parameters: start, end, periods, and '
1542+
'freq, exactly three must be specified')
15261543

15271544
if not _is_valid_endpoint(start):
15281545
msg = 'start must be numeric or datetime-like, got {start}'
15291546
raise ValueError(msg.format(start=start))
1530-
1531-
if not _is_valid_endpoint(end):
1547+
elif not _is_valid_endpoint(end):
15321548
msg = 'end must be numeric or datetime-like, got {end}'
15331549
raise ValueError(msg.format(end=end))
15341550

@@ -1538,8 +1554,7 @@ def interval_range(start=None, end=None, periods=None, freq=None,
15381554
msg = 'periods must be a number, got {periods}'
15391555
raise TypeError(msg.format(periods=periods))
15401556

1541-
freq = freq or (1 if is_number(endpoint) else 'D')
1542-
if not is_number(freq):
1557+
if freq is not None and not is_number(freq):
15431558
try:
15441559
freq = to_offset(freq)
15451560
except ValueError:
@@ -1552,28 +1567,34 @@ def interval_range(start=None, end=None, periods=None, freq=None,
15521567
_is_type_compatible(end, freq)]):
15531568
raise TypeError("start, end, freq need to be type compatible")
15541569

1570+
# +1 to convert interval count to breaks count (n breaks = n-1 intervals)
1571+
if periods is not None:
1572+
periods += 1
1573+
15551574
if is_number(endpoint):
1575+
# compute the period/start/end if unspecified (at most one)
15561576
if periods is None:
1557-
periods = int((end - start) // freq)
1558-
1559-
if start is None:
1560-
start = end - periods * freq
1561-
1562-
# force end to be consistent with freq (lower if freq skips over end)
1563-
end = start + periods * freq
1564-
1565-
# end + freq for inclusive endpoint
1566-
breaks = np.arange(start, end + freq, freq)
1567-
elif isinstance(endpoint, Timestamp):
1568-
# add one to account for interval endpoints (n breaks = n-1 intervals)
1569-
if periods is not None:
1570-
periods += 1
1571-
breaks = date_range(start=start, end=end, periods=periods, freq=freq)
1577+
periods = int((end - start) // freq) + 1
1578+
elif start is None:
1579+
start = end - (periods - 1) * freq
1580+
elif end is None:
1581+
end = start + (periods - 1) * freq
1582+
1583+
# force end to be consistent with freq (lower if freq skips end)
1584+
if freq is not None:
1585+
end -= end % freq
1586+
1587+
breaks = np.linspace(start, end, periods)
1588+
if all(is_integer(x) for x in com._not_none(start, end, freq)):
1589+
# np.linspace always produces float output
1590+
breaks = maybe_downcast_to_dtype(breaks, 'int64')
15721591
else:
1573-
# add one to account for interval endpoints (n breaks = n-1 intervals)
1574-
if periods is not None:
1575-
periods += 1
1576-
breaks = timedelta_range(start=start, end=end, periods=periods,
1577-
freq=freq)
1592+
# delegate to the appropriate range function
1593+
if isinstance(endpoint, Timestamp):
1594+
range_func = date_range
1595+
else:
1596+
range_func = timedelta_range
1597+
1598+
breaks = range_func(start=start, end=end, periods=periods, freq=freq)
15781599

15791600
return IntervalIndex.from_breaks(breaks, name=name, closed=closed)

pandas/core/indexes/timedeltas.py

+31-15
Original file line numberDiff line numberDiff line change
@@ -225,13 +225,13 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
225225
msg = 'periods must be a number, got {periods}'
226226
raise TypeError(msg.format(periods=periods))
227227

228-
if data is None and freq is None:
229-
raise ValueError("Must provide freq argument if no data is "
230-
"supplied")
231-
232228
if data is None:
233-
return cls._generate(start, end, periods, name, freq,
234-
closed=closed)
229+
if freq is None and com._any_none(periods, start, end):
230+
msg = 'Must provide freq argument if no data is supplied'
231+
raise ValueError(msg)
232+
else:
233+
return cls._generate(start, end, periods, name, freq,
234+
closed=closed)
235235

236236
if unit is not None:
237237
data = to_timedelta(data, unit=unit, box=False)
@@ -266,10 +266,10 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
266266
return cls._simple_new(data, name=name, freq=freq)
267267

268268
@classmethod
269-
def _generate(cls, start, end, periods, name, offset, closed=None):
270-
if com._count_not_none(start, end, periods) != 2:
271-
raise ValueError('Of the three parameters: start, end, and '
272-
'periods, exactly two must be specified')
269+
def _generate(cls, start, end, periods, name, freq, closed=None):
270+
if com._count_not_none(start, end, periods, freq) != 3:
271+
raise ValueError('Of the four parameters: start, end, periods, '
272+
'and freq, exactly three must be specified')
273273

274274
if start is not None:
275275
start = Timedelta(start)
@@ -295,8 +295,11 @@ def _generate(cls, start, end, periods, name, offset, closed=None):
295295
else:
296296
raise ValueError("Closed has to be either 'left', 'right' or None")
297297

298-
index = _generate_regular_range(start, end, periods, offset)
299-
index = cls._simple_new(index, name=name, freq=offset)
298+
if freq is not None:
299+
index = _generate_regular_range(start, end, periods, freq)
300+
index = cls._simple_new(index, name=name, freq=freq)
301+
else:
302+
index = to_timedelta(np.linspace(start.value, end.value, periods))
300303

301304
if not left_closed:
302305
index = index[1:]
@@ -1046,7 +1049,7 @@ def _generate_regular_range(start, end, periods, offset):
10461049
return data
10471050

10481051

1049-
def timedelta_range(start=None, end=None, periods=None, freq='D',
1052+
def timedelta_range(start=None, end=None, periods=None, freq=None,
10501053
name=None, closed=None):
10511054
"""
10521055
Return a fixed frequency TimedeltaIndex, with day as the default
@@ -1074,8 +1077,10 @@ def timedelta_range(start=None, end=None, periods=None, freq='D',
10741077
10751078
Notes
10761079
-----
1077-
Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
1078-
must be specified.
1080+
Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
1081+
exactly three must be specified. If ``freq`` is omitted, the resulting
1082+
``TimedeltaIndex`` will have ``periods`` linearly spaced elements between
1083+
``start`` and ``end`` (closed on both sides).
10791084
10801085
To learn more about the frequency strings, please see `this link
10811086
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
@@ -1102,6 +1107,17 @@ def timedelta_range(start=None, end=None, periods=None, freq='D',
11021107
TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00',
11031108
'1 days 18:00:00', '2 days 00:00:00'],
11041109
dtype='timedelta64[ns]', freq='6H')
1110+
1111+
Specify ``start``, ``end``, and ``periods``; the frequency is generated
1112+
automatically (linearly spaced).
1113+
1114+
>>> pd.timedelta_range(start='1 day', end='5 days', periods=4)
1115+
TimedeltaIndex(['1 days 00:00:00', '2 days 08:00:00', '3 days 16:00:00',
1116+
'5 days 00:00:00'],
1117+
dtype='timedelta64[ns]', freq=None)
11051118
"""
1119+
if freq is None and com._any_none(periods, start, end):
1120+
freq = 'D'
1121+
11061122
return TimedeltaIndex(start=start, end=end, periods=periods,
11071123
freq=freq, name=name, closed=closed)

pandas/tests/indexes/datetimes/test_date_range.py

+4
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,10 @@ def test_constructor(self):
361361
with tm.assert_raises_regex(TypeError, msg):
362362
bdate_range('2011-1-1', '2012-1-1', 'B')
363363

364+
msg = 'freq must be specified for bdate_range; use date_range instead'
365+
with tm.assert_raises_regex(TypeError, msg):
366+
bdate_range(START, END, periods=10, freq=None)
367+
364368
def test_naive_aware_conflicts(self):
365369
naive = bdate_range(START, END, freq=BDay(), tz=None)
366370
aware = bdate_range(START, END, freq=BDay(), tz="Asia/Hong_Kong")

0 commit comments

Comments
 (0)