Skip to content

Commit 28dbae9

Browse files
onnoeberhardjreback
authored andcommitted
ENH: linearly spaced date_range (GH 20808) (pandas-dev#20846)
1 parent cb5c869 commit 28dbae9

File tree

3 files changed

+67
-28
lines changed

3 files changed

+67
-28
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,7 @@ Other Enhancements
523523
library. (:issue:`20564`)
524524
- Added new writer for exporting Stata dta files in version 117, ``StataWriter117``. This format supports exporting strings with lengths up to 2,000,000 characters (:issue:`16450`)
525525
- :func:`to_hdf` and :func:`read_hdf` now accept an ``errors`` keyword argument to control encoding error handling (:issue:`20835`)
526+
- :func:`date_range` now returns a linearly spaced ``DatetimeIndex`` if ``start``, ``stop``, and ``periods`` are specified, but ``freq`` is not. (:issue:`20808`)
526527

527528
.. _whatsnew_0230.api_breaking:
528529

pandas/core/indexes/datetimes.py

+45-24
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,8 @@ def __new__(cls, data=None,
358358
msg = 'periods must be a number, got {periods}'
359359
raise TypeError(msg.format(periods=periods))
360360

361-
if data is None and freq is None:
361+
if data is None and freq is None \
362+
and com._any_none(periods, start, end):
362363
raise ValueError("Must provide freq argument if no data is "
363364
"supplied")
364365

@@ -466,9 +467,9 @@ def __new__(cls, data=None,
466467
@classmethod
467468
def _generate(cls, start, end, periods, name, freq,
468469
tz=None, normalize=False, ambiguous='raise', closed=None):
469-
if com._count_not_none(start, end, periods) != 2:
470-
raise ValueError('Of the three parameters: start, end, and '
471-
'periods, exactly two must be specified')
470+
if com._count_not_none(start, end, periods, freq) != 3:
471+
raise ValueError('Of the four parameters: start, end, periods, '
472+
'and freq, exactly three must be specified')
472473

473474
_normalized = True
474475

@@ -566,23 +567,30 @@ def _generate(cls, start, end, periods, name, freq,
566567
if end.tz is None and start.tz is not None:
567568
start = start.replace(tzinfo=None)
568569

569-
if _use_cached_range(freq, _normalized, start, end):
570-
index = cls._cached_range(start, end, periods=periods,
571-
freq=freq, name=name)
570+
if freq is not None:
571+
if _use_cached_range(freq, _normalized, start, end):
572+
index = cls._cached_range(start, end, periods=periods,
573+
freq=freq, name=name)
574+
else:
575+
index = _generate_regular_range(start, end, periods, freq)
576+
577+
if tz is not None and getattr(index, 'tz', None) is None:
578+
index = conversion.tz_localize_to_utc(_ensure_int64(index),
579+
tz,
580+
ambiguous=ambiguous)
581+
index = index.view(_NS_DTYPE)
582+
583+
# index is localized datetime64 array -> have to convert
584+
# start/end as well to compare
585+
if start is not None:
586+
start = start.tz_localize(tz).asm8
587+
if end is not None:
588+
end = end.tz_localize(tz).asm8
572589
else:
573-
index = _generate_regular_range(start, end, periods, freq)
574-
575-
if tz is not None and getattr(index, 'tz', None) is None:
576-
index = conversion.tz_localize_to_utc(_ensure_int64(index), tz,
577-
ambiguous=ambiguous)
578-
index = index.view(_NS_DTYPE)
579-
580-
# index is localized datetime64 array -> have to convert
581-
# start/end as well to compare
582-
if start is not None:
583-
start = start.tz_localize(tz).asm8
584-
if end is not None:
585-
end = end.tz_localize(tz).asm8
590+
index = tools.to_datetime(np.linspace(start.value,
591+
end.value, periods))
592+
if tz is not None:
593+
index = index.tz_localize('UTC').tz_convert(tz)
586594

587595
if not left_closed and len(index) and index[0] == start:
588596
index = index[1:]
@@ -2565,13 +2573,15 @@ def _generate_regular_range(start, end, periods, freq):
25652573
return data
25662574

25672575

2568-
def date_range(start=None, end=None, periods=None, freq='D', tz=None,
2576+
def date_range(start=None, end=None, periods=None, freq=None, tz=None,
25692577
normalize=False, name=None, closed=None, **kwargs):
25702578
"""
25712579
Return a fixed frequency DatetimeIndex.
25722580
2573-
Exactly two of the three parameters `start`, `end` and `periods`
2574-
must be specified.
2581+
Of the three parameters `start`, `end`, `periods`, and `freq` exactly
2582+
three must be specified. If `freq` is omitted, the resulting DatetimeIndex
2583+
will have `periods` linearly spaced elements between `start` and `end`
2584+
(closed on both sides).
25752585
25762586
Parameters
25772587
----------
@@ -2613,7 +2623,7 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None,
26132623
--------
26142624
**Specifying the values**
26152625
2616-
The next three examples generate the same `DatetimeIndex`, but vary
2626+
The next four examples generate the same `DatetimeIndex`, but vary
26172627
the combination of `start`, `end` and `periods`.
26182628
26192629
Specify `start` and `end`, with the default daily frequency.
@@ -2637,6 +2647,13 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None,
26372647
'2017-12-29', '2017-12-30', '2017-12-31', '2018-01-01'],
26382648
dtype='datetime64[ns]', freq='D')
26392649
2650+
Specify `start`, `end`, and `periods`; the frequency is generated
2651+
automatically (linearly spaced).
2652+
2653+
>>> pd.date_range(start='2018-04-24', end='2018-04-27', periods=3)
2654+
DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00',
2655+
'2018-04-27 00:00:00'], freq=None)
2656+
26402657
**Other Parameters**
26412658
26422659
Changed the `freq` (frequency) to ``'M'`` (month end frequency).
@@ -2687,6 +2704,10 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None,
26872704
DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'],
26882705
dtype='datetime64[ns]', freq='D')
26892706
"""
2707+
2708+
if freq is None and com._any_none(periods, start, end):
2709+
freq = 'D'
2710+
26902711
return DatetimeIndex(start=start, end=end, periods=periods,
26912712
freq=freq, tz=tz, normalize=normalize, name=name,
26922713
closed=closed, **kwargs)

pandas/tests/indexes/datetimes/test_date_range.py

+21-4
Original file line numberDiff line numberDiff line change
@@ -157,11 +157,28 @@ def test_date_range_ambiguous_arguments(self):
157157
start = datetime(2011, 1, 1, 5, 3, 40)
158158
end = datetime(2011, 1, 1, 8, 9, 40)
159159

160-
msg = ('Of the three parameters: start, end, and periods, '
161-
'exactly two must be specified')
160+
msg = ('Of the four parameters: start, end, periods, and '
161+
'freq, exactly three must be specified')
162162
with tm.assert_raises_regex(ValueError, msg):
163163
date_range(start, end, periods=10, freq='s')
164164

165+
def test_date_range_convenience_periods(self):
166+
# GH 20808
167+
rng = date_range('2018-04-24', '2018-04-27', periods=3)
168+
exp = DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00',
169+
'2018-04-27 00:00:00'], freq=None)
170+
171+
tm.assert_index_equal(rng, exp)
172+
173+
# Test if spacing remains linear if tz changes to dst in range
174+
rng = date_range('2018-04-01 01:00:00', '2018-04-01 04:00:00',
175+
tz='Australia/Sydney', periods=3)
176+
exp = DatetimeIndex(['2018-04-01 01:00:00+11:00',
177+
'2018-04-01 02:00:00+11:00',
178+
'2018-04-01 02:00:00+10:00',
179+
'2018-04-01 03:00:00+10:00',
180+
'2018-04-01 04:00:00+10:00'], freq=None)
181+
165182
def test_date_range_businesshour(self):
166183
idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00',
167184
'2014-07-04 11:00',
@@ -198,8 +215,8 @@ def test_date_range_businesshour(self):
198215

199216
def test_range_misspecified(self):
200217
# GH #1095
201-
msg = ('Of the three parameters: start, end, and periods, '
202-
'exactly two must be specified')
218+
msg = ('Of the four parameters: start, end, periods, and '
219+
'freq, exactly three must be specified')
203220

204221
with tm.assert_raises_regex(ValueError, msg):
205222
date_range(start='1/1/2000')

0 commit comments

Comments
 (0)