diff --git a/doc/source/api.rst b/doc/source/api.rst index 27a4ab9cc6cbc..1541bbccefe21 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -218,10 +218,19 @@ Top-level dealing with datetimelike to_timedelta date_range bdate_range + cdate_range period_range timedelta_range infer_freq +Top-level dealing with intervals +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + + interval_range + Top-level evaluation ~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index c86c58c3183f6..5422d5c53043d 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -1705,6 +1705,15 @@ has multiplied span. pd.PeriodIndex(start='2014-01', freq='3M', periods=4) +If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor +endpoints for a ``PeriodIndex`` with frequency matching that of the +``PeriodIndex`` constructor. + +.. ipython:: python + + pd.PeriodIndex(start=pd.Period('2017Q1', freq='Q'), + end=pd.Period('2017Q2', freq='Q'), freq='M') + Just like ``DatetimeIndex``, a ``PeriodIndex`` can also be used to index pandas objects: diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 9da1f321ef574..939199d3f6fa6 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -218,7 +218,7 @@ Furthermore this will now correctly box the results of iteration for :func:`Data .. ipython:: ipython d = {'a':[1], 'b':['b']} - df = pd,DataFrame(d) + df = pd.DataFrame(d) Previously: @@ -358,6 +358,59 @@ Previously, :func:`to_datetime` did not localize datetime ``Series`` data when ` Additionally, DataFrames with datetime columns that were parsed by :func:`read_sql_table` and :func:`read_sql_query` will also be localized to UTC only if the original SQL columns were timezone aware datetime columns. +.. _whatsnew_0210.api.consistency_of_range_functions: + +Consistency of Range Functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions, there were some inconsistencies between the various range functions: func:`date_range`, func:`bdate_range`, func:`cdate_range`, func:`period_range`, func:`timedelta_range`, and func:`interval_range`. (:issue:`17471`). + +One of the inconsistent behaviors occurred when the ``start``, ``end`` and ``period`` parameters were all specified, potentially leading to ambiguous ranges. When all three parameters were passed, ``interval_range`` ignored the ``period`` parameter, ``period_range`` ignored the ``end`` parameter, and the other range functions raised. To promote consistency among the range functions, and avoid potentially ambiguous ranges, ``interval_range`` and ``period_range`` will now raise when all three parameters are passed. + +Previous Behavior: + +.. code-block:: ipython + + In [2]: pd.interval_range(start=0, end=4, periods=6) + Out[2]: + IntervalIndex([(0, 1], (1, 2], (2, 3]] + closed='right', + dtype='interval[int64]') + + In [3]: pd.period_range(start='2017Q1', end='2017Q4', periods=6, freq='Q') + Out[3]: PeriodIndex(['2017Q1', '2017Q2', '2017Q3', '2017Q4', '2018Q1', '2018Q2'], dtype='period[Q-DEC]', freq='Q-DEC') + +New Behavior: + +.. code-block:: ipython + + In [2]: pd.interval_range(start=0, end=4, periods=6) + --------------------------------------------------------------------------- + ValueError: Of the three parameters: start, end, and periods, exactly two must be specified + + In [3]: pd.period_range(start='2017Q1', end='2017Q4', periods=6, freq='Q') + --------------------------------------------------------------------------- + ValueError: Of the three parameters: start, end, and periods, exactly two must be specified + +Additionally, the endpoint parameter ``end`` was not included in the intervals produced by ``interval_range``. However, all other range functions include ``end`` in their output. To promote consistency among the range functions, ``interval_range`` will now include ``end`` as the right endpoint of the final interval, except if ``freq`` is specified in a way which skips ``end``. + +Previous Behavior: + +.. code-block:: ipython + + In [4]: pd.interval_range(start=0, end=4) + Out[4]: + IntervalIndex([(0, 1], (1, 2], (2, 3]] + closed='right', + dtype='interval[int64]') + + +New Behavior: + + .. ipython:: python + + pd.interval_range(start=0, end=4) + .. _whatsnew_0210.api: Other API Changes diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 4cfb7547e7d0a..1c8d0b334b91c 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -292,8 +292,8 @@ def __new__(cls, data=None, if is_float(periods): periods = int(periods) elif not is_integer(periods): - raise ValueError('Periods must be a number, got %s' % - str(periods)) + msg = 'periods must be a number, got {periods}' + raise TypeError(msg.format(periods=periods)) if data is None and freq is None: raise ValueError("Must provide freq argument if no data is " @@ -412,7 +412,8 @@ def __new__(cls, data=None, def _generate(cls, start, end, periods, name, offset, tz=None, normalize=False, ambiguous='raise', closed=None): if com._count_not_none(start, end, periods) != 2: - raise ValueError('Must specify two of start, end, or periods') + raise ValueError('Of the three parameters: start, end, and ' + 'periods, exactly two must be specified') _normalized = True @@ -2004,7 +2005,7 @@ def _generate_regular_range(start, end, periods, offset): def date_range(start=None, end=None, periods=None, freq='D', tz=None, normalize=False, name=None, closed=None, **kwargs): """ - Return a fixed frequency datetime index, with day (calendar) as the default + Return a fixed frequency DatetimeIndex, with day (calendar) as the default frequency Parameters @@ -2013,24 +2014,25 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None, Left bound for generating dates end : string or datetime-like, default None Right bound for generating dates - periods : integer or None, default None - If None, must specify start and end + periods : integer, default None + Number of periods to generate freq : string or DateOffset, default 'D' (calendar daily) Frequency strings can have multiples, e.g. '5H' - tz : string or None + tz : string, default None Time zone name for returning localized DatetimeIndex, for example Asia/Hong_Kong normalize : bool, default False Normalize start/end dates to midnight before generating date range - name : str, default None - Name of the resulting index - closed : string or None, default None + name : string, default None + Name of the resulting DatetimeIndex + closed : string, default None Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None) Notes ----- - 2 of start, end, or periods must be specified + Of the three parameters: ``start``, ``end``, and ``periods``, exactly two + must be specified. To learn more about the frequency strings, please see `this link `__. @@ -2047,7 +2049,7 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None, def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, normalize=True, name=None, closed=None, **kwargs): """ - Return a fixed frequency datetime index, with business day as the default + Return a fixed frequency DatetimeIndex, with business day as the default frequency Parameters @@ -2056,8 +2058,8 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, Left bound for generating dates end : string or datetime-like, default None Right bound for generating dates - periods : integer or None, default None - If None, must specify start and end + periods : integer, default None + Number of periods to generate freq : string or DateOffset, default 'B' (business daily) Frequency strings can have multiples, e.g. '5H' tz : string or None @@ -2065,15 +2067,16 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, Asia/Beijing normalize : bool, default False Normalize start/end dates to midnight before generating date range - name : str, default None - Name for the resulting index - closed : string or None, default None + name : string, default None + Name of the resulting DatetimeIndex + closed : string, default None Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None) Notes ----- - 2 of start, end, or periods must be specified + Of the three parameters: ``start``, ``end``, and ``periods``, exactly two + must be specified. To learn more about the frequency strings, please see `this link `__. @@ -2091,7 +2094,7 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, def cdate_range(start=None, end=None, periods=None, freq='C', tz=None, normalize=True, name=None, closed=None, **kwargs): """ - **EXPERIMENTAL** Return a fixed frequency datetime index, with + **EXPERIMENTAL** Return a fixed frequency DatetimeIndex, with CustomBusinessDay as the default frequency .. warning:: EXPERIMENTAL @@ -2105,29 +2108,30 @@ def cdate_range(start=None, end=None, periods=None, freq='C', tz=None, Left bound for generating dates end : string or datetime-like, default None Right bound for generating dates - periods : integer or None, default None - If None, must specify start and end + periods : integer, default None + Number of periods to generate freq : string or DateOffset, default 'C' (CustomBusinessDay) Frequency strings can have multiples, e.g. '5H' - tz : string or None + tz : string, default None Time zone name for returning localized DatetimeIndex, for example Asia/Beijing normalize : bool, default False Normalize start/end dates to midnight before generating date range - name : str, default None - Name for the resulting index - weekmask : str, Default 'Mon Tue Wed Thu Fri' + name : string, default None + Name of the resulting DatetimeIndex + weekmask : string, Default 'Mon Tue Wed Thu Fri' weekmask of valid business days, passed to ``numpy.busdaycalendar`` holidays : list list/array of dates to exclude from the set of valid business days, passed to ``numpy.busdaycalendar`` - closed : string or None, default None + closed : string, default None Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None) Notes ----- - 2 of start, end, or periods must be specified + Of the three parameters: ``start``, ``end``, and ``periods``, exactly two + must be specified. To learn more about the frequency strings, please see `this link `__. diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index e0ed6c7ea35c0..6e80f6c900386 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -15,6 +15,8 @@ is_float_dtype, is_interval_dtype, is_scalar, + is_float, + is_number, is_integer) from pandas.core.indexes.base import ( Index, _ensure_index, @@ -25,11 +27,15 @@ Interval, IntervalMixin, IntervalTree, intervals_to_interval_bounds) +from pandas.core.indexes.datetimes import date_range +from pandas.core.indexes.timedeltas import timedelta_range from pandas.core.indexes.multi import MultiIndex from pandas.compat.numpy import function as nv from pandas.core import common as com from pandas.util._decorators import cache_readonly, Appender from pandas.core.config import get_option +from pandas.tseries.frequencies import to_offset +from pandas.tseries.offsets import DateOffset import pandas.core.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -1028,54 +1034,152 @@ def func(self, other): IntervalIndex._add_logical_methods_disabled() -def interval_range(start=None, end=None, freq=None, periods=None, - name=None, closed='right', **kwargs): +def _is_valid_endpoint(endpoint): + """helper for interval_range to check if start/end are valid types""" + return any([is_number(endpoint), + isinstance(endpoint, Timestamp), + isinstance(endpoint, Timedelta), + endpoint is None]) + + +def _is_type_compatible(a, b): + """helper for interval_range to check type compat of start/end/freq""" + is_ts_compat = lambda x: isinstance(x, (Timestamp, DateOffset)) + is_td_compat = lambda x: isinstance(x, (Timedelta, DateOffset)) + return ((is_number(a) and is_number(b)) or + (is_ts_compat(a) and is_ts_compat(b)) or + (is_td_compat(a) and is_td_compat(b)) or + com._any_none(a, b)) + + +def interval_range(start=None, end=None, periods=None, freq=None, + name=None, closed='right'): """ Return a fixed frequency IntervalIndex Parameters ---------- - start : string or datetime-like, default None - Left bound for generating data - end : string or datetime-like, default None - Right bound for generating data - freq : interger, string or DateOffset, default 1 - periods : interger, default None - name : str, default None - Name of the resulting index + start : numeric or datetime-like, default None + Left bound for generating intervals + end : numeric or datetime-like, default None + Right bound for generating intervals + periods : integer, default None + Number of periods to generate + freq : numeric, string, or DateOffset, default None + The length of each interval. Must be consistent with the type of start + and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 + for numeric and 'D' (calendar daily) for datetime-like. + name : string, default None + Name of the resulting IntervalIndex closed : string, default 'right' options are: 'left', 'right', 'both', 'neither' Notes ----- - 2 of start, end, or periods must be specified + Of the three parameters: ``start``, ``end``, and ``periods``, exactly two + must be specified. Returns ------- rng : IntervalIndex + + Examples + -------- + + Numeric ``start`` and ``end`` is supported. + + >>> pd.interval_range(start=0, end=5) + IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]] + closed='right', dtype='interval[int64]') + + Additionally, datetime-like input is also supported. + + >>> pd.interval_range(start='2017-01-01', end='2017-01-04') + IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], + (2017-01-03, 2017-01-04]] + closed='right', dtype='interval[datetime64[ns]]') + + The ``freq`` parameter specifies the frequency between the left and right. + endpoints of the individual intervals within the ``IntervalIndex``. For + numeric ``start`` and ``end``, the frequency must also be numeric. + + >>> pd.interval_range(start=0, periods=4, freq=1.5) + IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]] + closed='right', dtype='interval[float64]') + + Similarly, for datetime-like ``start`` and ``end``, the frequency must be + convertible to a DateOffset. + + >>> pd.interval_range(start='2017-01-01', periods=3, freq='MS') + IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], + (2017-03-01, 2017-04-01]] + closed='right', dtype='interval[datetime64[ns]]') + + The ``closed`` parameter specifies which endpoints of the individual + intervals within the ``IntervalIndex`` are closed. + + >>> pd.interval_range(end=5, periods=4, closed='both') + IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]] + closed='both', dtype='interval[int64]') """ + if com._count_not_none(start, end, periods) != 2: + raise ValueError('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') + + start = com._maybe_box_datetimelike(start) + end = com._maybe_box_datetimelike(end) + endpoint = next(com._not_none(start, end)) + + if not _is_valid_endpoint(start): + msg = 'start must be numeric or datetime-like, got {start}' + raise ValueError(msg.format(start=start)) + + if not _is_valid_endpoint(end): + msg = 'end must be numeric or datetime-like, got {end}' + raise ValueError(msg.format(end=end)) + + if is_float(periods): + periods = int(periods) + elif not is_integer(periods) and periods is not None: + msg = 'periods must be a number, got {periods}' + raise TypeError(msg.format(periods=periods)) + + freq = freq or (1 if is_number(endpoint) else 'D') + if not is_number(freq): + try: + freq = to_offset(freq) + except ValueError: + raise ValueError('freq must be numeric or convertible to ' + 'DateOffset, got {freq}'.format(freq=freq)) - if freq is None: - freq = 1 + # verify type compatibility + if not all([_is_type_compatible(start, end), + _is_type_compatible(start, freq), + _is_type_compatible(end, freq)]): + raise TypeError("start, end, freq need to be type compatible") - if start is None: - if periods is None or end is None: - raise ValueError("must specify 2 of start, end, periods") - start = end - periods * freq - if end is None: - if periods is None or start is None: - raise ValueError("must specify 2 of start, end, periods") + if is_number(endpoint): + if periods is None: + periods = int((end - start) // freq) + + if start is None: + start = end - periods * freq + + # force end to be consistent with freq (lower if freq skips over end) end = start + periods * freq - if periods is None: - if start is None or end is None: - raise ValueError("must specify 2 of start, end, periods") - pass - - # must all be same units or None - arr = np.array([start, end, freq]) - if is_object_dtype(arr): - raise ValueError("start, end, freq need to be the same type") - - return IntervalIndex.from_breaks(np.arange(start, end, freq), - name=name, - closed=closed) + + # end + freq for inclusive endpoint + breaks = np.arange(start, end + freq, freq) + elif isinstance(endpoint, Timestamp): + # add one to account for interval endpoints (n breaks = n-1 intervals) + if periods is not None: + periods += 1 + breaks = date_range(start=start, end=end, periods=periods, freq=freq) + else: + # add one to account for interval endpoints (n breaks = n-1 intervals) + if periods is not None: + periods += 1 + breaks = timedelta_range(start=start, end=end, periods=periods, + freq=freq) + + return IntervalIndex.from_breaks(breaks, name=name, closed=closed) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 0915462d4d421..fb47d1db48610 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -199,8 +199,8 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, if is_float(periods): periods = int(periods) elif not is_integer(periods): - raise ValueError('Periods must be a number, got %s' % - str(periods)) + msg = 'periods must be a number, got {periods}' + raise TypeError(msg.format(periods=periods)) if name is None and hasattr(data, 'name'): name = data.name @@ -1051,8 +1051,9 @@ def tz_localize(self, tz, infer_dst=False): def _get_ordinal_range(start, end, periods, freq, mult=1): - if com._count_not_none(start, end, periods) < 2: - raise ValueError('Must specify 2 of start, end, periods') + if com._count_not_none(start, end, periods) != 2: + raise ValueError('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') if freq is not None: _, mult = _gfc(freq) @@ -1066,9 +1067,9 @@ def _get_ordinal_range(start, end, periods, freq, mult=1): is_end_per = isinstance(end, Period) if is_start_per and is_end_per and start.freq != end.freq: - raise ValueError('Start and end must have same freq') + raise ValueError('start and end must have same freq') if (start is tslib.NaT or end is tslib.NaT): - raise ValueError('Start and end must not be NaT') + raise ValueError('start and end must not be NaT') if freq is None: if is_start_per: @@ -1157,24 +1158,55 @@ def pnow(freq=None): def period_range(start=None, end=None, periods=None, freq='D', name=None): """ - Return a fixed frequency datetime index, with day (calendar) as the default + Return a fixed frequency PeriodIndex, with day (calendar) as the default frequency - Parameters ---------- - start : starting value, period-like, optional - end : ending value, period-like, optional - periods : int, default None - Number of periods in the index - freq : str/DateOffset, default 'D' + start : string or period-like, default None + Left bound for generating periods + end : string or period-like, default None + Right bound for generating periods + periods : integer, default None + Number of periods to generate + freq : string or DateOffset, default 'D' (calendar daily) Frequency alias - name : str, default None - Name for the resulting PeriodIndex + name : string, default None + Name of the resulting PeriodIndex + + Notes + ----- + Of the three parameters: ``start``, ``end``, and ``periods``, exactly two + must be specified. + + To learn more about the frequency strings, please see `this link + `__. Returns ------- prng : PeriodIndex + + Examples + -------- + + >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M') + PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', + '2017-06', '2017-06', '2017-07', '2017-08', '2017-09', + '2017-10', '2017-11', '2017-12', '2018-01'], + dtype='period[M]', freq='M') + + If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor + endpoints for a ``PeriodIndex`` with frequency matching that of the + ``period_range`` constructor. + + >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'), + ... end=pd.Period('2017Q2', freq='Q'), freq='M') + PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'], + dtype='period[M]', freq='M') """ + if com._count_not_none(start, end, periods) != 2: + raise ValueError('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') + return PeriodIndex(start=start, end=end, periods=periods, freq=freq, name=name) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 2823951c0f348..d7b7d56d74a3a 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -180,8 +180,8 @@ def __new__(cls, data=None, unit=None, if is_float(periods): periods = int(periods) elif not is_integer(periods): - raise ValueError('Periods must be a number, got %s' % - str(periods)) + msg = 'periods must be a number, got {periods}' + raise TypeError(msg.format(periods=periods)) if data is None and freq is None: raise ValueError("Must provide freq argument if no data is " @@ -234,7 +234,8 @@ def __new__(cls, data=None, unit=None, @classmethod def _generate(cls, start, end, periods, name, offset, closed=None): if com._count_not_none(start, end, periods) != 2: - raise ValueError('Must specify two of start, end, or periods') + raise ValueError('Of the three parameters: start, end, and ' + 'periods, exactly two must be specified') if start is not None: start = Timedelta(start) @@ -960,22 +961,22 @@ def _generate_regular_range(start, end, periods, offset): def timedelta_range(start=None, end=None, periods=None, freq='D', name=None, closed=None): """ - Return a fixed frequency timedelta index, with day as the default + Return a fixed frequency TimedeltaIndex, with day as the default frequency Parameters ---------- start : string or timedelta-like, default None - Left bound for generating dates - end : string or datetime-like, default None - Right bound for generating dates - periods : integer or None, default None - If None, must specify start and end + Left bound for generating timedeltas + end : string or timedelta-like, default None + Right bound for generating timedeltas + periods : integer, default None + Number of periods to generate freq : string or DateOffset, default 'D' (calendar daily) Frequency strings can have multiples, e.g. '5H' - name : str, default None - Name of the resulting index - closed : string or None, default None + name : string, default None + Name of the resulting TimedeltaIndex + closed : string, default None Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None) @@ -985,11 +986,34 @@ def timedelta_range(start=None, end=None, periods=None, freq='D', Notes ----- - 2 of start, end, or periods must be specified. + Of the three parameters: ``start``, ``end``, and ``periods``, exactly two + must be specified. To learn more about the frequency strings, please see `this link `__. + + Examples + -------- + + >>> pd.timedelta_range(start='1 day', periods=4) + TimedeltaIndex(['1 days', '2 days', '3 days', '4 days'], + dtype='timedelta64[ns]', freq='D') + + The ``closed`` parameter specifies which endpoint is included. The default + behavior is to include both endpoints. + + >>> pd.timedelta_range(start='1 day', periods=4, closed='right') + TimedeltaIndex(['2 days', '3 days', '4 days'], + dtype='timedelta64[ns]', freq='D') + + The ``freq`` parameter specifies the frequency of the TimedeltaIndex. + Only fixed frequencies can be passed, non-fixed frequencies such as + 'M' (month end) will raise. + + >>> pd.timedelta_range(start='1 day', end='2 days', freq='6H') + TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00', + '1 days 18:00:00', '2 days 00:00:00'], + dtype='timedelta64[ns]', freq='6H') """ return TimedeltaIndex(start=start, end=end, periods=periods, - freq=freq, name=name, - closed=closed) + freq=freq, name=name, closed=closed) diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index cf896b06130a2..a4706dd8a3767 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -307,8 +307,9 @@ def test_constructor_coverage(self): exp = date_range('1/1/2000', periods=10) tm.assert_index_equal(rng, exp) - pytest.raises(ValueError, DatetimeIndex, start='1/1/2000', - periods='foo', freq='D') + msg = 'periods must be a number, got foo' + with tm.assert_raises_regex(TypeError, msg): + DatetimeIndex(start='1/1/2000', periods='foo', freq='D') pytest.raises(ValueError, DatetimeIndex, start='1/1/2000', end='1/10/2000') diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index da4ca83c10dda..8d86bebdd4d5e 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -107,8 +107,10 @@ def test_date_range_ambiguous_arguments(self): start = datetime(2011, 1, 1, 5, 3, 40) end = datetime(2011, 1, 1, 8, 9, 40) - pytest.raises(ValueError, date_range, start, end, freq='s', - periods=10) + msg = ('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') + with tm.assert_raises_regex(ValueError, msg): + date_range(start, end, periods=10, freq='s') def test_date_range_businesshour(self): idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00', @@ -146,14 +148,29 @@ def test_date_range_businesshour(self): def test_range_misspecified(self): # GH #1095 + msg = ('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') + + with tm.assert_raises_regex(ValueError, msg): + date_range(start='1/1/2000') + + with tm.assert_raises_regex(ValueError, msg): + date_range(end='1/1/2000') + + with tm.assert_raises_regex(ValueError, msg): + date_range(periods=10) + + with tm.assert_raises_regex(ValueError, msg): + date_range(start='1/1/2000', freq='H') - pytest.raises(ValueError, date_range, '1/1/2000') - pytest.raises(ValueError, date_range, end='1/1/2000') - pytest.raises(ValueError, date_range, periods=10) + with tm.assert_raises_regex(ValueError, msg): + date_range(end='1/1/2000', freq='H') - pytest.raises(ValueError, date_range, '1/1/2000', freq='H') - pytest.raises(ValueError, date_range, end='1/1/2000', freq='H') - pytest.raises(ValueError, date_range, periods=10, freq='H') + with tm.assert_raises_regex(ValueError, msg): + date_range(periods=10, freq='H') + + with tm.assert_raises_regex(ValueError, msg): + date_range() def test_compat_replace(self): # https://github.com/statsmodels/statsmodels/issues/3349 @@ -231,8 +248,13 @@ def test_constructor(self): bdate_range(START, END, freq=BDay()) bdate_range(START, periods=20, freq=BDay()) bdate_range(end=START, periods=20, freq=BDay()) - pytest.raises(ValueError, date_range, '2011-1-1', '2012-1-1', 'B') - pytest.raises(ValueError, bdate_range, '2011-1-1', '2012-1-1', 'B') + + msg = 'periods must be a number, got B' + with tm.assert_raises_regex(TypeError, msg): + date_range('2011-1-1', '2012-1-1', 'B') + + with tm.assert_raises_regex(TypeError, msg): + bdate_range('2011-1-1', '2012-1-1', 'B') def test_naive_aware_conflicts(self): naive = bdate_range(START, END, freq=BDay(), tz=None) @@ -510,8 +532,13 @@ def test_constructor(self): cdate_range(START, END, freq=CDay()) cdate_range(START, periods=20, freq=CDay()) cdate_range(end=START, periods=20, freq=CDay()) - pytest.raises(ValueError, date_range, '2011-1-1', '2012-1-1', 'C') - pytest.raises(ValueError, cdate_range, '2011-1-1', '2012-1-1', 'C') + + msg = 'periods must be a number, got C' + with tm.assert_raises_regex(TypeError, msg): + date_range('2011-1-1', '2012-1-1', 'C') + + with tm.assert_raises_regex(TypeError, msg): + cdate_range('2011-1-1', '2012-1-1', 'C') def test_cached_range(self): DatetimeIndex._cached_range(START, END, offset=CDay()) diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index e5b889e100307..639a9272c3808 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -436,11 +436,12 @@ def test_constructor_error(self): start = Period('02-Apr-2005', 'B') end_intv = Period('2006-12-31', ('w', 1)) - msg = 'Start and end must have same freq' + msg = 'start and end must have same freq' with tm.assert_raises_regex(ValueError, msg): PeriodIndex(start=start, end=end_intv) - msg = 'Must specify 2 of start, end, periods' + msg = ('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') with tm.assert_raises_regex(ValueError, msg): PeriodIndex(start=start) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py new file mode 100644 index 0000000000000..640f24f67f72f --- /dev/null +++ b/pandas/tests/indexes/period/test_period_range.py @@ -0,0 +1,94 @@ +import pytest +import pandas.util.testing as tm +from pandas import date_range, NaT, period_range, Period, PeriodIndex + + +class TestPeriodRange(object): + + @pytest.mark.parametrize('freq', ['D', 'W', 'M', 'Q', 'A']) + def test_construction_from_string(self, freq): + # non-empty + expected = date_range(start='2017-01-01', periods=5, + freq=freq, name='foo').to_period() + start, end = str(expected[0]), str(expected[-1]) + + result = period_range(start=start, end=end, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(start=start, periods=5, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(end=end, periods=5, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + # empty + expected = PeriodIndex([], freq=freq, name='foo') + + result = period_range(start=start, periods=0, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(end=end, periods=0, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(start=end, end=start, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + def test_construction_from_period(self): + # upsampling + start, end = Period('2017Q1', freq='Q'), Period('2018Q1', freq='Q') + expected = date_range(start='2017-03-31', end='2018-03-31', freq='M', + name='foo').to_period() + result = period_range(start=start, end=end, freq='M', name='foo') + tm.assert_index_equal(result, expected) + + # downsampling + start, end = Period('2017-1', freq='M'), Period('2019-12', freq='M') + expected = date_range(start='2017-01-31', end='2019-12-31', freq='Q', + name='foo').to_period() + result = period_range(start=start, end=end, freq='Q', name='foo') + tm.assert_index_equal(result, expected) + + # empty + expected = PeriodIndex([], freq='W', name='foo') + + result = period_range(start=start, periods=0, freq='W', name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(end=end, periods=0, freq='W', name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(start=end, end=start, freq='W', name='foo') + tm.assert_index_equal(result, expected) + + def test_errors(self): + # not enough params + msg = ('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') + with tm.assert_raises_regex(ValueError, msg): + period_range(start='2017Q1') + + with tm.assert_raises_regex(ValueError, msg): + period_range(end='2017Q1') + + with tm.assert_raises_regex(ValueError, msg): + period_range(periods=5) + + with tm.assert_raises_regex(ValueError, msg): + period_range() + + # too many params + with tm.assert_raises_regex(ValueError, msg): + period_range(start='2017Q1', end='2018Q1', periods=8, freq='Q') + + # start/end NaT + msg = 'start and end must not be NaT' + with tm.assert_raises_regex(ValueError, msg): + period_range(start=NaT, end='2018Q1') + + with tm.assert_raises_regex(ValueError, msg): + period_range(start='2017Q1', end=NaT) + + # invalid periods param + msg = 'periods must be a number, got foo' + with tm.assert_raises_regex(TypeError, msg): + period_range(start='2017Q1', periods='foo') diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index 18eefc3fbdca6..13c3b35e4d85d 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -2,10 +2,11 @@ import pytest import numpy as np - +from datetime import timedelta from pandas import (Interval, IntervalIndex, Index, isna, interval_range, Timestamp, Timedelta, - compat) + compat, date_range, timedelta_range, DateOffset) +from pandas.tseries.offsets import Day from pandas._libs.interval import IntervalTree from pandas.tests.indexes.common import Base import pandas.util.testing as tm @@ -721,40 +722,278 @@ def test_is_non_overlapping_monotonic(self): class TestIntervalRange(object): - def test_construction(self): - result = interval_range(0, 5, name='foo', closed='both') + @pytest.mark.parametrize('closed', ['left', 'right', 'neither', 'both']) + def test_construction_from_numeric(self, closed): + # combinations of start/end/periods without freq expected = IntervalIndex.from_breaks( - np.arange(0, 5), name='foo', closed='both') + np.arange(0, 6), name='foo', closed=closed) + + result = interval_range(start=0, end=5, name='foo', closed=closed) tm.assert_index_equal(result, expected) - def test_errors(self): + result = interval_range(start=0, periods=5, name='foo', closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=5, periods=5, name='foo', closed=closed) + tm.assert_index_equal(result, expected) + + # combinations of start/end/periods with freq + expected = IntervalIndex.from_tuples([(0, 2), (2, 4), (4, 6)], + name='foo', closed=closed) + + result = interval_range(start=0, end=6, freq=2, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=0, periods=3, freq=2, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=6, periods=3, freq=2, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + # output truncates early if freq causes end to be skipped. + expected = IntervalIndex.from_tuples([(0.0, 1.5), (1.5, 3.0)], + name='foo', closed=closed) + result = interval_range(start=0, end=4, freq=1.5, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('closed', ['left', 'right', 'neither', 'both']) + def test_construction_from_timestamp(self, closed): + # combinations of start/end/periods without freq + start, end = Timestamp('2017-01-01'), Timestamp('2017-01-06') + breaks = date_range(start=start, end=end) + expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed) + + result = interval_range(start=start, end=end, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=5, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=5, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + # combinations of start/end/periods with fixed freq + freq = '2D' + start, end = Timestamp('2017-01-01'), Timestamp('2017-01-07') + breaks = date_range(start=start, end=end, freq=freq) + expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed) + + result = interval_range(start=start, end=end, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=3, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=3, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + # output truncates early if freq causes end to be skipped. + end = Timestamp('2017-01-08') + result = interval_range(start=start, end=end, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + # combinations of start/end/periods with non-fixed freq + freq = 'M' + start, end = Timestamp('2017-01-01'), Timestamp('2017-12-31') + breaks = date_range(start=start, end=end, freq=freq) + expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed) + + result = interval_range(start=start, end=end, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=11, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=11, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + # output truncates early if freq causes end to be skipped. + end = Timestamp('2018-01-15') + result = interval_range(start=start, end=end, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('closed', ['left', 'right', 'neither', 'both']) + def test_construction_from_timedelta(self, closed): + # combinations of start/end/periods without freq + start, end = Timedelta('1 day'), Timedelta('6 days') + breaks = timedelta_range(start=start, end=end) + expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed) + + result = interval_range(start=start, end=end, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=5, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=5, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + # combinations of start/end/periods with fixed freq + freq = '2D' + start, end = Timedelta('1 day'), Timedelta('7 days') + breaks = timedelta_range(start=start, end=end, freq=freq) + expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed) + + result = interval_range(start=start, end=end, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=3, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=3, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + # output truncates early if freq causes end to be skipped. + end = Timedelta('7 days 1 hour') + result = interval_range(start=start, end=end, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + def test_constructor_coverage(self): + # float value for periods + expected = pd.interval_range(start=0, periods=10) + result = pd.interval_range(start=0, periods=10.5) + tm.assert_index_equal(result, expected) + + # equivalent timestamp-like start/end + start, end = Timestamp('2017-01-01'), Timestamp('2017-01-15') + expected = pd.interval_range(start=start, end=end) + + result = pd.interval_range(start=start.to_pydatetime(), + end=end.to_pydatetime()) + tm.assert_index_equal(result, expected) + + result = pd.interval_range(start=start.tz_localize('UTC'), + end=end.tz_localize('UTC')) + tm.assert_index_equal(result, expected) + + result = pd.interval_range(start=start.asm8, end=end.asm8) + tm.assert_index_equal(result, expected) + + # equivalent freq with timestamp + equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1), + DateOffset(days=1)] + for freq in equiv_freq: + result = pd.interval_range(start=start, end=end, freq=freq) + tm.assert_index_equal(result, expected) + + # equivalent timedelta-like start/end + start, end = Timedelta(days=1), Timedelta(days=10) + expected = pd.interval_range(start=start, end=end) + + result = pd.interval_range(start=start.to_pytimedelta(), + end=end.to_pytimedelta()) + tm.assert_index_equal(result, expected) + + result = pd.interval_range(start=start.asm8, end=end.asm8) + tm.assert_index_equal(result, expected) + + # equivalent freq with timedelta + equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1)] + for freq in equiv_freq: + result = pd.interval_range(start=start, end=end, freq=freq) + tm.assert_index_equal(result, expected) + def test_errors(self): # not enough params - def f(): - interval_range(0) + msg = ('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') - pytest.raises(ValueError, f) + with tm.assert_raises_regex(ValueError, msg): + interval_range(start=0) - def f(): - interval_range(periods=2) + with tm.assert_raises_regex(ValueError, msg): + interval_range(end=5) - pytest.raises(ValueError, f) + with tm.assert_raises_regex(ValueError, msg): + interval_range(periods=2) - def f(): + with tm.assert_raises_regex(ValueError, msg): interval_range() - pytest.raises(ValueError, f) + # too many params + with tm.assert_raises_regex(ValueError, msg): + interval_range(start=0, end=5, periods=6) # mixed units - def f(): - interval_range(0, Timestamp('20130101'), freq=2) + msg = 'start, end, freq need to be type compatible' + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=0, end=Timestamp('20130101'), freq=2) - pytest.raises(ValueError, f) + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=0, end=Timedelta('1 day'), freq=2) - def f(): - interval_range(0, 10, freq=Timedelta('1day')) + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=0, end=10, freq='D') - pytest.raises(ValueError, f) + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timestamp('20130101'), end=10, freq='D') + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timestamp('20130101'), + end=Timedelta('1 day'), freq='D') + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timestamp('20130101'), + end=Timestamp('20130110'), freq=2) + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timedelta('1 day'), end=10, freq='D') + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timedelta('1 day'), + end=Timestamp('20130110'), freq='D') + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timedelta('1 day'), + end=Timedelta('10 days'), freq=2) + + # invalid periods + msg = 'periods must be a number, got foo' + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=0, periods='foo') + + # invalid start + msg = 'start must be numeric or datetime-like, got foo' + with tm.assert_raises_regex(ValueError, msg): + interval_range(start='foo', periods=10) + + # invalid end + msg = 'end must be numeric or datetime-like, got \(0, 1\]' + with tm.assert_raises_regex(ValueError, msg): + interval_range(end=Interval(0, 1), periods=10) + + # invalid freq for datetime-like + msg = 'freq must be numeric or convertible to DateOffset, got foo' + with tm.assert_raises_regex(ValueError, msg): + interval_range(start=0, end=10, freq='foo') + + with tm.assert_raises_regex(ValueError, msg): + interval_range(start=Timestamp('20130101'), periods=10, freq='foo') + + with tm.assert_raises_regex(ValueError, msg): + interval_range(end=Timedelta('1 day'), periods=10, freq='foo') class TestIntervalTree(object): diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index dd25e2cca2e55..70aadd9f57174 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -50,8 +50,9 @@ def test_constructor_coverage(self): exp = timedelta_range('1 days', periods=10) tm.assert_index_equal(rng, exp) - pytest.raises(ValueError, TimedeltaIndex, start='1 days', - periods='foo', freq='D') + msg = 'periods must be a number, got foo' + with tm.assert_raises_regex(TypeError, msg): + TimedeltaIndex(start='1 days', periods='foo', freq='D') pytest.raises(ValueError, TimedeltaIndex, start='1 days', end='10 days') diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index 4732a0ce110de..7624e1f79af15 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -1,5 +1,4 @@ import numpy as np - import pandas as pd import pandas.util.testing as tm from pandas.tseries.offsets import Day, Second @@ -49,3 +48,23 @@ def test_timedelta_range(self): expected = df.loc[pd.Timedelta('0s'):, :] result = df.loc['0s':, :] assert_frame_equal(expected, result) + + def test_errors(self): + # not enough params + msg = ('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') + with tm.assert_raises_regex(ValueError, msg): + timedelta_range(start='0 days') + + with tm.assert_raises_regex(ValueError, msg): + timedelta_range(end='5 days') + + with tm.assert_raises_regex(ValueError, msg): + timedelta_range(periods=2) + + with tm.assert_raises_regex(ValueError, msg): + timedelta_range() + + # too many params + with tm.assert_raises_regex(ValueError, msg): + timedelta_range(start='0 days', end='5 days', periods=10)