Skip to content

Commit d34b82c

Browse files
committed
Review and datetime-like implementation
Made review related changes, implemented support for datetime-like input in interval_range.
1 parent 234fd67 commit d34b82c

File tree

10 files changed

+284
-49
lines changed

10 files changed

+284
-49
lines changed

doc/source/whatsnew/v0.21.0.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -339,11 +339,11 @@ New Behavior:
339339

340340
In [2]: pd.interval_range(start=0, end=4, periods=6)
341341
---------------------------------------------------------------------------
342-
ValueError: Of the three parameters, start, end, and periods, exactly two must be specified
342+
ValueError: Of the three parameters: start, end, and periods, exactly two must be specified
343343

344344
In [3]: pd.period_range(start='2017Q1', end='2017Q4', periods=6, freq='Q')
345345
---------------------------------------------------------------------------
346-
ValueError: Of the three parameters, start, end, and periods, exactly two must be specified
346+
ValueError: Of the three parameters: start, end, and periods, exactly two must be specified
347347

348348
Additionally, the endpoint parameter ``end`` was not included in the intervals produced by ``interval_range``. However, all other range functions include ``end`` in their output. To promote consistency among the range functions, ``interval_range`` will now include ``end`` as the right endpoint of the final interval, except if ``freq`` is specified in a way which skips ``end``.
349349

pandas/core/indexes/datetimes.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ def __new__(cls, data=None,
293293
periods = int(periods)
294294
elif not is_integer(periods):
295295
msg = 'periods must be a number, got {periods}'
296-
raise ValueError(msg.format(periods=periods))
296+
raise TypeError(msg.format(periods=periods))
297297

298298
if data is None and freq is None:
299299
raise ValueError("Must provide freq argument if no data is "
@@ -2061,7 +2061,7 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
20612061
periods : integer, default None
20622062
Number of dates to generate
20632063
freq : string or DateOffset, default 'B' (business daily)
2064-
Frequency strings can have multiples, e.g. '5, default
2064+
Frequency strings can have multiples, e.g. '5H'
20652065
tz : string or None
20662066
Time zone name for returning localized DatetimeIndex, for example
20672067
Asia/Beijing
@@ -2109,7 +2109,7 @@ def cdate_range(start=None, end=None, periods=None, freq='C', tz=None,
21092109
end : string or datetime-like, default None
21102110
Right bound for generating dates
21112111
periods : integer, default None
2112-
Number of dates to generate
2112+
Number of periods to generate
21132113
freq : string or DateOffset, default 'C' (CustomBusinessDay)
21142114
Frequency strings can have multiples, e.g. '5H'
21152115
tz : string, default None

pandas/core/indexes/interval.py

+104-24
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
is_interval_dtype,
1717
is_scalar,
1818
is_float,
19+
is_number,
1920
is_integer)
2021
from pandas.core.indexes.base import (
2122
Index, _ensure_index,
@@ -26,11 +27,14 @@
2627
Interval, IntervalMixin, IntervalTree,
2728
intervals_to_interval_bounds)
2829

30+
from pandas.core.indexes.datetimes import date_range
2931
from pandas.core.indexes.multi import MultiIndex
3032
from pandas.compat.numpy import function as nv
3133
from pandas.core import common as com
3234
from pandas.util._decorators import cache_readonly, Appender
3335
from pandas.core.config import get_option
36+
from pandas.tseries.offsets import DateOffset
37+
from pandas.tseries.frequencies import to_offset
3438

3539
import pandas.core.indexes.base as ibase
3640
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
@@ -1030,21 +1034,22 @@ def func(self, other):
10301034

10311035

10321036
def interval_range(start=None, end=None, periods=None, freq=None,
1033-
name=None, closed='right', **kwargs):
1037+
name=None, closed='right'):
10341038
"""
10351039
Return a fixed frequency IntervalIndex
10361040
10371041
Parameters
10381042
----------
1039-
start : numeric, string, or datetime-like, default None
1043+
start : numeric or datetime-like, default None
10401044
Left bound for generating intervals
1041-
end : numeric, string, or datetime-like, default None
1045+
end : numeric or datetime-like, default None
10421046
Right bound for generating intervals
10431047
periods : integer, default None
1044-
Number of intervals to generate
1045-
freq : numeric, string, or DateOffset, default 1
1046-
The length of each interval. Must be consistent with the
1047-
type of start and end
1048+
Number of periods to generate
1049+
freq : numeric, string, or DateOffset, default None
1050+
The length of each interval. Must be consistent with the type of start
1051+
and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1
1052+
for numeric and 'D' (calendar daily) for datetime-like.
10481053
name : string, default None
10491054
Name of the resulting IntervalIndex
10501055
closed : string, default 'right'
@@ -1058,32 +1063,107 @@ def interval_range(start=None, end=None, periods=None, freq=None,
10581063
Returns
10591064
-------
10601065
rng : IntervalIndex
1066+
1067+
Examples
1068+
--------
1069+
1070+
Numeric ``start`` and ``end`` is supported.
1071+
1072+
>>> pd.interval_range(start=0, end=5)
1073+
IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]]
1074+
closed='right', dtype='interval[int64]')
1075+
1076+
Additionally, datetime-like input is also supported.
1077+
1078+
>>> pd.interval_range(start='2017-01-01', end='2017-01-04')
1079+
IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],
1080+
(2017-01-03, 2017-01-04]]
1081+
closed='right', dtype='interval[datetime64[ns]]')
1082+
1083+
The ``freq`` parameter specifies the frequency between the left and right.
1084+
endpoints of the individual intervals within the ``IntervalIndex``. For
1085+
numeric ``start`` and ``end``, the frequency must also be numeric.
1086+
1087+
>>> pd.interval_range(start=0, periods=4, freq=1.5)
1088+
IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]]
1089+
closed='right', dtype='interval[float64]')
1090+
1091+
Similarly, for datetime-like ``start`` and ``end``, the frequency must be
1092+
convertible to a DateOffset.
1093+
1094+
>>> pd.interval_range(start='2017-01-01', periods=3, freq='MS')
1095+
IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],
1096+
(2017-03-01, 2017-04-01]]
1097+
closed='right', dtype='interval[datetime64[ns]]')
1098+
1099+
The ``closed`` parameter specifies which endpoints of the individual
1100+
intervals within the ``IntervalIndex`` are closed.
1101+
1102+
>>> pd.interval_range(end=5, periods=4, closed='both')
1103+
IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]]
1104+
closed='both', dtype='interval[int64]')
10611105
"""
10621106
if com._count_not_none(start, end, periods) != 2:
10631107
raise ValueError('Of the three parameters: start, end, and periods, '
10641108
'exactly two must be specified')
10651109

1066-
# must all be same units or None
1067-
arr = np.array(list(com._not_none(start, end, freq)))
1068-
if is_object_dtype(arr):
1069-
raise ValueError("start, end, freq need to be the same type")
1110+
# assume datetime-like unless we find numeric start or end
1111+
is_datetime_interval = True
1112+
1113+
if is_number(start):
1114+
is_datetime_interval = False
1115+
elif start is not None:
1116+
try:
1117+
start = Timestamp(start)
1118+
except (TypeError, ValueError):
1119+
raise ValueError('start must be numeric or datetime-like')
10701120

1071-
if freq is None:
1072-
freq = 1
1121+
if is_number(end):
1122+
is_datetime_interval = False
1123+
elif end is not None:
1124+
try:
1125+
end = Timestamp(end)
1126+
except (TypeError, ValueError):
1127+
raise ValueError('end must be numeric or datetime-like')
10731128

1074-
if periods is None:
1075-
periods = int((end - start) // freq)
1076-
elif is_float(periods):
1129+
if is_float(periods):
10771130
periods = int(periods)
1078-
elif not is_integer(periods):
1131+
elif not is_integer(periods) and periods is not None:
10791132
msg = 'periods must be a number, got {periods}'
1080-
raise ValueError(msg.format(periods=periods))
1133+
raise TypeError(msg.format(periods=periods))
10811134

1082-
if start is None:
1083-
start = end - periods * freq
1135+
if is_datetime_interval:
1136+
freq = freq or 'D'
1137+
if not isinstance(freq, DateOffset):
1138+
try:
1139+
freq = to_offset(freq)
1140+
except ValueError:
1141+
raise ValueError('freq must be convertible to DateOffset when '
1142+
'start/end are datetime-like')
1143+
else:
1144+
freq = freq or 1
10841145

1085-
# force end to be consistent with freq (truncate if freq skips over end)
1086-
end = start + periods * freq
1146+
# verify type compatibility
1147+
is_numeric_interval = all(map(is_number, com._not_none(start, end, freq)))
1148+
if not is_datetime_interval and not is_numeric_interval:
1149+
raise TypeError("start, end, freq need to be type compatible")
1150+
1151+
if is_numeric_interval:
1152+
if periods is None:
1153+
periods = int((end - start) // freq)
1154+
1155+
if start is None:
1156+
start = end - periods * freq
1157+
1158+
# force end to be consistent with freq (lower if freq skips over end)
1159+
end = start + periods * freq
1160+
1161+
# end + freq for inclusive endpoint
1162+
breaks = np.arange(start, end + freq, freq)
1163+
else:
1164+
# add one to account for interval endpoints (n breaks = n-1 intervals)
1165+
if periods is not None:
1166+
periods += 1
1167+
breaks = date_range(start=start, end=end, periods=periods, freq=freq)
10871168

1088-
return IntervalIndex.from_breaks(np.arange(start, end + freq, freq),
1089-
name=name, closed=closed, **kwargs)
1169+
return IntervalIndex.from_breaks(breaks, name=name, closed=closed)

pandas/core/indexes/period.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
200200
periods = int(periods)
201201
elif not is_integer(periods):
202202
msg = 'periods must be a number, got {periods}'
203-
raise ValueError(msg.format(periods=periods))
203+
raise TypeError(msg.format(periods=periods))
204204

205205
if name is None and hasattr(data, 'name'):
206206
name = data.name

pandas/core/indexes/timedeltas.py

+25-2
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ def __new__(cls, data=None, unit=None,
181181
periods = int(periods)
182182
elif not is_integer(periods):
183183
msg = 'periods must be a number, got {periods}'
184-
raise ValueError(msg.format(periods=periods))
184+
raise TypeError(msg.format(periods=periods))
185185

186186
if data is None and freq is None:
187187
raise ValueError("Must provide freq argument if no data is "
@@ -971,7 +971,7 @@ def timedelta_range(start=None, end=None, periods=None, freq='D',
971971
end : string or timedelta-like, default None
972972
Right bound for generating timedeltas
973973
periods : integer, default None
974-
Number of timedeltas to generate
974+
Number of periods to generate
975975
freq : string or DateOffset, default 'D' (calendar daily)
976976
Frequency strings can have multiples, e.g. '5H'
977977
name : string, default None
@@ -991,6 +991,29 @@ def timedelta_range(start=None, end=None, periods=None, freq='D',
991991
992992
To learn more about the frequency strings, please see `this link
993993
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
994+
995+
Examples
996+
--------
997+
998+
>>> pd.timedelta_range(start='1 day', periods=4)
999+
TimedeltaIndex(['1 days', '2 days', '3 days', '4 days'],
1000+
dtype='timedelta64[ns]', freq='D')
1001+
1002+
The ``closed`` parameter specifies which endpoint is included. The default
1003+
behavior is to include both endpoints.
1004+
1005+
>>> pd.timedelta_range(start='1 day', periods=4, closed='right')
1006+
TimedeltaIndex(['2 days', '3 days', '4 days'],
1007+
dtype='timedelta64[ns]', freq='D')
1008+
1009+
The ``freq`` parameter specifies the frequency of the TimedeltaIndex.
1010+
Only fixed frequencies can be passed, non-fixed frequencies, such as
1011+
'M' (month end) will raise.
1012+
1013+
>>> pd.timedelta_range(start='1 day', end='2 days', freq='6H')
1014+
TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00',
1015+
'1 days 18:00:00', '2 days 00:00:00'],
1016+
dtype='timedelta64[ns]', freq='6H')
9941017
"""
9951018
return TimedeltaIndex(start=start, end=end, periods=periods,
9961019
freq=freq, name=name, closed=closed)

pandas/tests/indexes/datetimes/test_construction.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -307,8 +307,9 @@ def test_constructor_coverage(self):
307307
exp = date_range('1/1/2000', periods=10)
308308
tm.assert_index_equal(rng, exp)
309309

310-
pytest.raises(ValueError, DatetimeIndex, start='1/1/2000',
311-
periods='foo', freq='D')
310+
msg = 'periods must be a number, got foo'
311+
with tm.assert_raises_regex(TypeError, msg):
312+
DatetimeIndex(start='1/1/2000', periods='foo', freq='D')
312313

313314
pytest.raises(ValueError, DatetimeIndex, start='1/1/2000',
314315
end='1/10/2000')

pandas/tests/indexes/datetimes/test_date_range.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -248,8 +248,13 @@ def test_constructor(self):
248248
bdate_range(START, END, freq=BDay())
249249
bdate_range(START, periods=20, freq=BDay())
250250
bdate_range(end=START, periods=20, freq=BDay())
251-
pytest.raises(ValueError, date_range, '2011-1-1', '2012-1-1', 'B')
252-
pytest.raises(ValueError, bdate_range, '2011-1-1', '2012-1-1', 'B')
251+
252+
msg = 'periods must be a number, got B'
253+
with tm.assert_raises_regex(TypeError, msg):
254+
date_range('2011-1-1', '2012-1-1', 'B')
255+
256+
with tm.assert_raises_regex(TypeError, msg):
257+
bdate_range('2011-1-1', '2012-1-1', 'B')
253258

254259
def test_naive_aware_conflicts(self):
255260
naive = bdate_range(START, END, freq=BDay(), tz=None)
@@ -527,8 +532,13 @@ def test_constructor(self):
527532
cdate_range(START, END, freq=CDay())
528533
cdate_range(START, periods=20, freq=CDay())
529534
cdate_range(end=START, periods=20, freq=CDay())
530-
pytest.raises(ValueError, date_range, '2011-1-1', '2012-1-1', 'C')
531-
pytest.raises(ValueError, cdate_range, '2011-1-1', '2012-1-1', 'C')
535+
536+
msg = 'periods must be a number, got C'
537+
with tm.assert_raises_regex(TypeError, msg):
538+
date_range('2011-1-1', '2012-1-1', 'C')
539+
540+
with tm.assert_raises_regex(TypeError, msg):
541+
cdate_range('2011-1-1', '2012-1-1', 'C')
532542

533543
def test_cached_range(self):
534544
DatetimeIndex._cached_range(START, END, offset=CDay())

pandas/tests/indexes/period/test_period_range.py

+5
Original file line numberDiff line numberDiff line change
@@ -87,3 +87,8 @@ def test_errors(self):
8787

8888
with tm.assert_raises_regex(ValueError, msg):
8989
period_range(start='2017Q1', end=NaT)
90+
91+
# invalid periods param
92+
msg = 'periods must be a number, got foo'
93+
with tm.assert_raises_regex(TypeError, msg):
94+
period_range(start='2017Q1', periods='foo')

0 commit comments

Comments
 (0)