Skip to content

ENH: linearly spaced date_range (GH 20808) #20846

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
May 3, 2018
Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,7 @@ Other Enhancements
library. (:issue:`20564`)
- Added new writer for exporting Stata dta files in version 117, ``StataWriter117``. This format supports exporting strings with lengths up to 2,000,000 characters (:issue:`16450`)
- :func:`to_hdf` and :func:`read_hdf` now accept an ``errors`` keyword argument to control encoding error handling (:issue:`20835`)
- :func:`date_range` now returns a linearly spaced ``DatetimeIndex`` if ``start``, ``stop``, and ``periods`` are specified, but ``freq`` is not. (:issue:`20808`)

.. _whatsnew_0230.api_breaking:

Expand Down
69 changes: 45 additions & 24 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,8 @@ def __new__(cls, data=None,
msg = 'periods must be a number, got {periods}'
raise TypeError(msg.format(periods=periods))

if data is None and freq is None:
if data is None and freq is None \
and com._any_none(periods, start, end):
raise ValueError("Must provide freq argument if no data is "
"supplied")

Expand Down Expand Up @@ -466,9 +467,9 @@ def __new__(cls, data=None,
@classmethod
def _generate(cls, start, end, periods, name, freq,
tz=None, normalize=False, ambiguous='raise', closed=None):
if com._count_not_none(start, end, periods) != 2:
raise ValueError('Of the three parameters: start, end, and '
'periods, exactly two must be specified')
if com._count_not_none(start, end, periods, freq) != 3:
raise ValueError('Of the four parameters: start, end, periods, '
'and freq, exactly three must be specified')

_normalized = True

Expand Down Expand Up @@ -566,23 +567,30 @@ def _generate(cls, start, end, periods, name, freq,
if end.tz is None and start.tz is not None:
start = start.replace(tzinfo=None)

if _use_cached_range(freq, _normalized, start, end):
index = cls._cached_range(start, end, periods=periods,
freq=freq, name=name)
if freq is not None:
if _use_cached_range(freq, _normalized, start, end):
index = cls._cached_range(start, end, periods=periods,
freq=freq, name=name)
else:
index = _generate_regular_range(start, end, periods, freq)

if tz is not None and getattr(index, 'tz', None) is None:
index = conversion.tz_localize_to_utc(_ensure_int64(index),
tz,
ambiguous=ambiguous)
index = index.view(_NS_DTYPE)

# index is localized datetime64 array -> have to convert
# start/end as well to compare
if start is not None:
start = start.tz_localize(tz).asm8
if end is not None:
end = end.tz_localize(tz).asm8
else:
index = _generate_regular_range(start, end, periods, freq)

if tz is not None and getattr(index, 'tz', None) is None:
index = conversion.tz_localize_to_utc(_ensure_int64(index), tz,
ambiguous=ambiguous)
index = index.view(_NS_DTYPE)

# index is localized datetime64 array -> have to convert
# start/end as well to compare
if start is not None:
start = start.tz_localize(tz).asm8
if end is not None:
end = end.tz_localize(tz).asm8
index = tools.to_datetime(np.linspace(start.value,
end.value, periods))
if tz is not None:
index = index.tz_localize('UTC').tz_convert(tz)

if not left_closed and len(index) and index[0] == start:
index = index[1:]
Expand Down Expand Up @@ -2565,13 +2573,15 @@ def _generate_regular_range(start, end, periods, freq):
return data


def date_range(start=None, end=None, periods=None, freq='D', tz=None,
def date_range(start=None, end=None, periods=None, freq=None, tz=None,
normalize=False, name=None, closed=None, **kwargs):
"""
Return a fixed frequency DatetimeIndex.
Exactly two of the three parameters `start`, `end` and `periods`
must be specified.
Of the three parameters `start`, `end`, `periods`, and `freq` exactly
three must be specified. If `freq` is omitted, the resulting DatetimeIndex
will have `periods` linearly spaced elements between `start` and `end`
(closed on both sides).
Parameters
----------
Expand Down Expand Up @@ -2613,7 +2623,7 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None,
--------
**Specifying the values**
The next three examples generate the same `DatetimeIndex`, but vary
The next four examples generate the same `DatetimeIndex`, but vary
the combination of `start`, `end` and `periods`.
Specify `start` and `end`, with the default daily frequency.
Expand All @@ -2637,6 +2647,13 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None,
'2017-12-29', '2017-12-30', '2017-12-31', '2018-01-01'],
dtype='datetime64[ns]', freq='D')
Specify `start`, `end`, and `periods`; the frequency is generated
automatically (linearly spaced).
>>> pd.date_range(start='2018-04-24', end='2018-04-27', periods=3)
DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00',
'2018-04-27 00:00:00'], freq=None)
**Other Parameters**
Changed the `freq` (frequency) to ``'M'`` (month end frequency).
Expand Down Expand Up @@ -2687,6 +2704,10 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None,
DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'],
dtype='datetime64[ns]', freq='D')
"""

if freq is None and com._any_none(periods, start, end):
freq = 'D'

return DatetimeIndex(start=start, end=end, periods=periods,
freq=freq, tz=tz, normalize=normalize, name=name,
closed=closed, **kwargs)
Expand Down
25 changes: 21 additions & 4 deletions pandas/tests/indexes/datetimes/test_date_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,11 +157,28 @@ def test_date_range_ambiguous_arguments(self):
start = datetime(2011, 1, 1, 5, 3, 40)
end = datetime(2011, 1, 1, 8, 9, 40)

msg = ('Of the three parameters: start, end, and periods, '
'exactly two must be specified')
msg = ('Of the four parameters: start, end, periods, and '
'freq, exactly three must be specified')
with tm.assert_raises_regex(ValueError, msg):
date_range(start, end, periods=10, freq='s')

def test_date_range_convenience_periods(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you also test this with the tz arg specified? Would also be good to test a tz where there is a day light savings transition between start and end.

# GH 20808
rng = date_range('2018-04-24', '2018-04-27', periods=3)
exp = DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00',
'2018-04-27 00:00:00'], freq=None)

tm.assert_index_equal(rng, exp)

# Test if spacing remains linear if tz changes to dst in range
rng = date_range('2018-04-01 01:00:00', '2018-04-01 04:00:00',
tz='Australia/Sydney', periods=3)
exp = DatetimeIndex(['2018-04-01 01:00:00+11:00',
'2018-04-01 02:00:00+11:00',
'2018-04-01 02:00:00+10:00',
'2018-04-01 03:00:00+10:00',
'2018-04-01 04:00:00+10:00'], freq=None)

def test_date_range_businesshour(self):
idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00',
'2014-07-04 11:00',
Expand Down Expand Up @@ -198,8 +215,8 @@ def test_date_range_businesshour(self):

def test_range_misspecified(self):
# GH #1095
msg = ('Of the three parameters: start, end, and periods, '
'exactly two must be specified')
msg = ('Of the four parameters: start, end, periods, and '
'freq, exactly three must be specified')

with tm.assert_raises_regex(ValueError, msg):
date_range(start='1/1/2000')
Expand Down