Skip to content

ENH: Implement linspace behavior for timedelta_range and interval_range #21009

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 11, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ Other Enhancements
- Added new writer for exporting Stata dta files in version 117, ``StataWriter117``. This format supports exporting strings with lengths up to 2,000,000 characters (:issue:`16450`)
- :func:`to_hdf` and :func:`read_hdf` now accept an ``errors`` keyword argument to control encoding error handling (:issue:`20835`)
- :func:`cut` has gained the ``duplicates='raise'|'drop'`` option to control whether to raise on duplicated edges (:issue:`20947`)
- :func:`date_range` now returns a linearly spaced ``DatetimeIndex`` if ``start``, ``stop``, and ``periods`` are specified, but ``freq`` is not. (:issue:`20808`, :issue:`20983`)
- :func:`date_range`, :func:`timedelta_range`, and :func:`interval_range` now return a linearly spaced index if ``start``, ``stop``, and ``periods`` are specified, but ``freq`` is not. (:issue:`20808`, :issue:`20983`, :issue:`20976`)

.. _whatsnew_0230.api_breaking:

Expand Down
40 changes: 25 additions & 15 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,11 +358,6 @@ def __new__(cls, data=None,
msg = 'periods must be a number, got {periods}'
raise TypeError(msg.format(periods=periods))

if data is None and freq is None \
and com._any_none(periods, start, end):
raise ValueError("Must provide freq argument if no data is "
"supplied")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

small refactor here; moved this code a few lines down so all the if data is None: code is in one place.


# if dtype has an embedded tz, capture it
if dtype is not None:
try:
Expand All @@ -377,9 +372,13 @@ def __new__(cls, data=None,
pass

if data is None:
return cls._generate(start, end, periods, name, freq,
tz=tz, normalize=normalize, closed=closed,
ambiguous=ambiguous)
if freq is None and com._any_none(periods, start, end):
msg = 'Must provide freq argument if no data is supplied'
raise ValueError(msg)
else:
return cls._generate(start, end, periods, name, freq, tz=tz,
normalize=normalize, closed=closed,
ambiguous=ambiguous)

if not isinstance(data, (np.ndarray, Index, ABCSeries)):
if is_scalar(data):
Expand Down Expand Up @@ -2590,11 +2589,6 @@ def date_range(start=None, end=None, periods=None, freq=None, tz=None,
"""
Return a fixed frequency DatetimeIndex.

Of the three parameters `start`, `end`, `periods`, and `freq` exactly
three must be specified. If `freq` is omitted, the resulting DatetimeIndex
will have `periods` linearly spaced elements between `start` and `end`
(closed on both sides).

Parameters
----------
start : str or datetime-like, optional
Expand Down Expand Up @@ -2628,9 +2622,20 @@ def date_range(start=None, end=None, periods=None, freq=None, tz=None,
See Also
--------
pandas.DatetimeIndex : An immutable container for datetimes.
pandas.timedelta_range : Return a fixed frequency TimedeltaIndex.
pandas.period_range : Return a fixed frequency PeriodIndex.
pandas.interval_range : Return a fixed frequency IntervalIndex.

Notes
-----
Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
exactly three must be specified. If ``freq`` is omitted, the resulting
``DatetimeIndex`` will have ``periods`` linearly spaced elements between
``start`` and ``end`` (closed on both sides).

To learn more about the frequency strings, please see `this link
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.

Examples
--------
**Specifying the values**
Expand Down Expand Up @@ -2769,8 +2774,10 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,

Notes
-----
Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
must be specified.
Of the four parameters: ``start``, ``end``, ``periods``, and ``freq``,
exactly three must be specified. Specifying ``freq`` is a requirement
for ``bdate_range``. Use ``date_range`` if specifying ``freq`` is not
desired.

To learn more about the frequency strings, please see `this link
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
Expand All @@ -2779,6 +2786,9 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
-------
rng : DatetimeIndex
"""
if freq is None:
msg = 'freq must be specified for bdate_range; use date_range instead'
raise TypeError(msg)

if is_string_like(freq) and freq.startswith('C'):
try:
Expand Down
85 changes: 53 additions & 32 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from pandas.core.dtypes.missing import notna, isna
from pandas.core.dtypes.generic import ABCDatetimeIndex, ABCPeriodIndex
from pandas.core.dtypes.dtypes import IntervalDtype
from pandas.core.dtypes.cast import maybe_convert_platform, find_common_type
from pandas.core.dtypes.cast import (
maybe_convert_platform, find_common_type, maybe_downcast_to_dtype)
from pandas.core.dtypes.common import (
_ensure_platform_int,
is_list_like,
Expand Down Expand Up @@ -1465,8 +1466,13 @@ def interval_range(start=None, end=None, periods=None, freq=None,

Notes
-----
Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
must be specified.
Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
exactly three must be specified. If ``freq`` is omitted, the resulting
``IntervalIndex`` will have ``periods`` linearly spaced elements between
``start`` and ``end``, inclusively.

To learn more about datetime-like frequency strings, please see `this link
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.

Returns
-------
Expand Down Expand Up @@ -1505,6 +1511,14 @@ def interval_range(start=None, end=None, periods=None, freq=None,
(2017-03-01, 2017-04-01]]
closed='right', dtype='interval[datetime64[ns]]')

Specify ``start``, ``end``, and ``periods``; the frequency is generated
automatically (linearly spaced).

>>> pd.interval_range(start=0, end=6, periods=4)
IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]]
closed='right',
dtype='interval[float64]')

The ``closed`` parameter specifies which endpoints of the individual
intervals within the ``IntervalIndex`` are closed.

Expand All @@ -1516,19 +1530,21 @@ def interval_range(start=None, end=None, periods=None, freq=None,
--------
IntervalIndex : an Index of intervals that are all closed on the same side.
"""
if com._count_not_none(start, end, periods) != 2:
raise ValueError('Of the three parameters: start, end, and periods, '
'exactly two must be specified')

start = com._maybe_box_datetimelike(start)
end = com._maybe_box_datetimelike(end)
endpoint = next(com._not_none(start, end))
endpoint = start if start is not None else end

if freq is None and com._any_none(periods, start, end):
freq = 1 if is_number(endpoint) else 'D'

if com._count_not_none(start, end, periods, freq) != 3:
raise ValueError('Of the four parameters: start, end, periods, and '
'freq, exactly three must be specified')

if not _is_valid_endpoint(start):
msg = 'start must be numeric or datetime-like, got {start}'
raise ValueError(msg.format(start=start))

if not _is_valid_endpoint(end):
elif not _is_valid_endpoint(end):
msg = 'end must be numeric or datetime-like, got {end}'
raise ValueError(msg.format(end=end))

Expand All @@ -1538,8 +1554,7 @@ def interval_range(start=None, end=None, periods=None, freq=None,
msg = 'periods must be a number, got {periods}'
raise TypeError(msg.format(periods=periods))

freq = freq or (1 if is_number(endpoint) else 'D')
if not is_number(freq):
if freq is not None and not is_number(freq):
try:
freq = to_offset(freq)
except ValueError:
Expand All @@ -1552,28 +1567,34 @@ def interval_range(start=None, end=None, periods=None, freq=None,
_is_type_compatible(end, freq)]):
raise TypeError("start, end, freq need to be type compatible")

# +1 to convert interval count to breaks count (n breaks = n-1 intervals)
if periods is not None:
periods += 1

if is_number(endpoint):
# compute the period/start/end if unspecified (at most one)
if periods is None:
periods = int((end - start) // freq)

if start is None:
start = end - periods * freq

# force end to be consistent with freq (lower if freq skips over end)
end = start + periods * freq

# end + freq for inclusive endpoint
breaks = np.arange(start, end + freq, freq)
elif isinstance(endpoint, Timestamp):
# add one to account for interval endpoints (n breaks = n-1 intervals)
if periods is not None:
periods += 1
breaks = date_range(start=start, end=end, periods=periods, freq=freq)
periods = int((end - start) // freq) + 1
elif start is None:
start = end - (periods - 1) * freq
elif end is None:
end = start + (periods - 1) * freq

# force end to be consistent with freq (lower if freq skips end)
if freq is not None:
end -= end % freq

breaks = np.linspace(start, end, periods)
if all(is_integer(x) for x in com._not_none(start, end, freq)):
# np.linspace always produces float output
breaks = maybe_downcast_to_dtype(breaks, 'int64')
else:
# add one to account for interval endpoints (n breaks = n-1 intervals)
if periods is not None:
periods += 1
breaks = timedelta_range(start=start, end=end, periods=periods,
freq=freq)
# delegate to the appropriate range function
if isinstance(endpoint, Timestamp):
range_func = date_range
else:
range_func = timedelta_range

breaks = range_func(start=start, end=end, periods=periods, freq=freq)

return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
46 changes: 31 additions & 15 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,13 +225,13 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
msg = 'periods must be a number, got {periods}'
raise TypeError(msg.format(periods=periods))

if data is None and freq is None:
raise ValueError("Must provide freq argument if no data is "
"supplied")

if data is None:
return cls._generate(start, end, periods, name, freq,
closed=closed)
if freq is None and com._any_none(periods, start, end):
msg = 'Must provide freq argument if no data is supplied'
raise ValueError(msg)
else:
return cls._generate(start, end, periods, name, freq,
closed=closed)

if unit is not None:
data = to_timedelta(data, unit=unit, box=False)
Expand Down Expand Up @@ -266,10 +266,10 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
return cls._simple_new(data, name=name, freq=freq)

@classmethod
def _generate(cls, start, end, periods, name, offset, closed=None):
if com._count_not_none(start, end, periods) != 2:
raise ValueError('Of the three parameters: start, end, and '
'periods, exactly two must be specified')
def _generate(cls, start, end, periods, name, freq, closed=None):
if com._count_not_none(start, end, periods, freq) != 3:
raise ValueError('Of the four parameters: start, end, periods, '
'and freq, exactly three must be specified')

if start is not None:
start = Timedelta(start)
Expand All @@ -295,8 +295,11 @@ def _generate(cls, start, end, periods, name, offset, closed=None):
else:
raise ValueError("Closed has to be either 'left', 'right' or None")

index = _generate_regular_range(start, end, periods, offset)
index = cls._simple_new(index, name=name, freq=offset)
if freq is not None:
index = _generate_regular_range(start, end, periods, freq)
index = cls._simple_new(index, name=name, freq=freq)
else:
index = to_timedelta(np.linspace(start.value, end.value, periods))

if not left_closed:
index = index[1:]
Expand Down Expand Up @@ -1046,7 +1049,7 @@ def _generate_regular_range(start, end, periods, offset):
return data


def timedelta_range(start=None, end=None, periods=None, freq='D',
def timedelta_range(start=None, end=None, periods=None, freq=None,
name=None, closed=None):
"""
Return a fixed frequency TimedeltaIndex, with day as the default
Expand Down Expand Up @@ -1074,8 +1077,10 @@ def timedelta_range(start=None, end=None, periods=None, freq='D',

Notes
-----
Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
must be specified.
Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
exactly three must be specified. If ``freq`` is omitted, the resulting
``TimedeltaIndex`` will have ``periods`` linearly spaced elements between
``start`` and ``end`` (closed on both sides).

To learn more about the frequency strings, please see `this link
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
Expand All @@ -1102,6 +1107,17 @@ def timedelta_range(start=None, end=None, periods=None, freq='D',
TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00',
'1 days 18:00:00', '2 days 00:00:00'],
dtype='timedelta64[ns]', freq='6H')

Specify ``start``, ``end``, and ``periods``; the frequency is generated
automatically (linearly spaced).

>>> pd.timedelta_range(start='1 day', end='5 days', periods=4)
TimedeltaIndex(['1 days 00:00:00', '2 days 08:00:00', '3 days 16:00:00',
'5 days 00:00:00'],
dtype='timedelta64[ns]', freq=None)
"""
if freq is None and com._any_none(periods, start, end):
freq = 'D'

return TimedeltaIndex(start=start, end=end, periods=periods,
freq=freq, name=name, closed=closed)
4 changes: 4 additions & 0 deletions pandas/tests/indexes/datetimes/test_date_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,10 @@ def test_constructor(self):
with tm.assert_raises_regex(TypeError, msg):
bdate_range('2011-1-1', '2012-1-1', 'B')

msg = 'freq must be specified for bdate_range; use date_range instead'
with tm.assert_raises_regex(TypeError, msg):
bdate_range(START, END, periods=10, freq=None)

def test_naive_aware_conflicts(self):
naive = bdate_range(START, END, freq=BDay(), tz=None)
aware = bdate_range(START, END, freq=BDay(), tz="Asia/Hong_Kong")
Expand Down
Loading