From 0d9355023035a9da5d272c8d5c2dcce2e7b378e9 Mon Sep 17 00:00:00 2001 From: jschendel Date: Fri, 8 Sep 2017 22:42:45 -0600 Subject: [PATCH 1/7] Make *_range functions consistent --- doc/source/whatsnew/v0.21.0.txt | 53 +++++++++++++++ pandas/core/indexes/datetimes.py | 9 +-- pandas/core/indexes/interval.py | 22 ++----- pandas/core/indexes/period.py | 12 +++- pandas/core/indexes/timedeltas.py | 5 +- .../indexes/datetimes/test_date_range.py | 28 +++++--- .../tests/indexes/period/test_construction.py | 2 +- .../tests/indexes/period/test_period_range.py | 52 +++++++++++++++ pandas/tests/indexes/test_interval.py | 66 +++++++++++++------ .../timedeltas/test_timedelta_range.py | 20 +++++- 10 files changed, 215 insertions(+), 54 deletions(-) create mode 100644 pandas/tests/indexes/period/test_period_range.py diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 9da1f321ef574..6c3df0400c9a3 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -358,6 +358,59 @@ Previously, :func:`to_datetime` did not localize datetime ``Series`` data when ` Additionally, DataFrames with datetime columns that were parsed by :func:`read_sql_table` and :func:`read_sql_query` will also be localized to UTC only if the original SQL columns were timezone aware datetime columns. +.. _whatsnew_0200.api.consistency_of_range_functions: + +Consistency of Range Functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions, there were some inconsistencies between the various range functions: ``date_range``, ``bdate_range``, ``cdate_range``, ``interval_range``, ``period_range``, and ``timedelta_range``. (:issue:`17471`). + +One of the inconsistent behaviors occurred when the ``start``, ``end`` and ``period`` parameters were all specified, potentially leading to ambiguous ranges. When all three parameters were passed, ``interval_range`` ignored the ``period`` parameter, ``period_range`` ignored the ``end`` parameter, and the other range functions raised. To promote consistency among the range functions, and avoid potentially ambiguous ranges, ``interval_range`` and ``period_range`` will now raise when all three parameters are passed. + +Previous Behavior: + +.. code-block:: ipython + + In [2]: pd.interval_range(start=0, end=4, periods=6) + Out[2]: + IntervalIndex([(0, 1], (1, 2], (2, 3]] + closed='right', + dtype='interval[int64]') + + In [3]: pd.period_range(start='2017Q1', end='2017Q4', periods=6, freq='Q') + Out[3]: PeriodIndex(['2017Q1', '2017Q2', '2017Q3', '2017Q4', '2018Q1', '2018Q2'], dtype='period[Q-DEC]', freq='Q-DEC') + +New Behavior: + +.. code-block:: ipython + + In [2]: pd.interval_range(start=0, end=4, periods=6) + --------------------------------------------------------------------------- + ValueError: Must specify exactly two of start, end, or periods + + In [3]: pd.period_range(start='2017Q1', end='2017Q4', periods=6, freq='Q') + --------------------------------------------------------------------------- + ValueError: Must specify exactly two of start, end, or periods + +Additionally, the endpoint parameter ``end`` was not included in the intervals produced by ``interval_range``. However, all other range functions include ``end`` in their output. To promote consistency among the range functions, ``interval_range`` will now include ``end`` as the right endpoint of the final interval, except if ``freq`` is specified in a way which skips ``end``. + +Previous Behavior: + +.. code-block:: ipython + + In [4]: pd.interval_range(start=0, end=4) + Out[4]: + IntervalIndex([(0, 1], (1, 2], (2, 3]] + closed='right', + dtype='interval[int64]') + + +New Behavior: + + .. ipython:: python + + pd.interval_range(start=0, end=4) + .. _whatsnew_0210.api: Other API Changes diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 4cfb7547e7d0a..c5a84cfdb4d9d 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -412,7 +412,8 @@ def __new__(cls, data=None, def _generate(cls, start, end, periods, name, offset, tz=None, normalize=False, ambiguous='raise', closed=None): if com._count_not_none(start, end, periods) != 2: - raise ValueError('Must specify two of start, end, or periods') + msg = 'Must specify exactly two of start, end, or periods' + raise ValueError(msg) _normalized = True @@ -2030,7 +2031,7 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None, Notes ----- - 2 of start, end, or periods must be specified + Exactly two of start, end, or periods must be specified To learn more about the frequency strings, please see `this link `__. @@ -2073,7 +2074,7 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, Notes ----- - 2 of start, end, or periods must be specified + Exactly two of start, end, or periods must be specified To learn more about the frequency strings, please see `this link `__. @@ -2127,7 +2128,7 @@ def cdate_range(start=None, end=None, periods=None, freq='C', tz=None, Notes ----- - 2 of start, end, or periods must be specified + Exactly two of start, end, or periods must be specified To learn more about the frequency strings, please see `this link `__. diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index e0ed6c7ea35c0..5d84063e649da 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1039,8 +1039,8 @@ def interval_range(start=None, end=None, freq=None, periods=None, Left bound for generating data end : string or datetime-like, default None Right bound for generating data - freq : interger, string or DateOffset, default 1 - periods : interger, default None + freq : integer, string or DateOffset, default 1 + periods : integer, default None name : str, default None Name of the resulting index closed : string, default 'right' @@ -1048,34 +1048,26 @@ def interval_range(start=None, end=None, freq=None, periods=None, Notes ----- - 2 of start, end, or periods must be specified + Exactly two of start, end, or periods must be specified Returns ------- rng : IntervalIndex """ + if com._count_not_none(start, end, periods) != 2: + raise ValueError('Must specify exactly two of start, end, or periods') if freq is None: freq = 1 - if start is None: - if periods is None or end is None: - raise ValueError("must specify 2 of start, end, periods") start = end - periods * freq if end is None: - if periods is None or start is None: - raise ValueError("must specify 2 of start, end, periods") end = start + periods * freq - if periods is None: - if start is None or end is None: - raise ValueError("must specify 2 of start, end, periods") - pass # must all be same units or None arr = np.array([start, end, freq]) if is_object_dtype(arr): raise ValueError("start, end, freq need to be the same type") - return IntervalIndex.from_breaks(np.arange(start, end, freq), - name=name, - closed=closed) + return IntervalIndex.from_breaks(np.arange(start, end + 1, freq), + name=name, closed=closed, **kwargs) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 0915462d4d421..04c1771138921 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -1051,8 +1051,8 @@ def tz_localize(self, tz, infer_dst=False): def _get_ordinal_range(start, end, periods, freq, mult=1): - if com._count_not_none(start, end, periods) < 2: - raise ValueError('Must specify 2 of start, end, periods') + if com._count_not_none(start, end, periods) != 2: + raise ValueError('Must specify exactly two of start, end, or periods') if freq is not None: _, mult = _gfc(freq) @@ -1160,7 +1160,6 @@ def period_range(start=None, end=None, periods=None, freq='D', name=None): Return a fixed frequency datetime index, with day (calendar) as the default frequency - Parameters ---------- start : starting value, period-like, optional @@ -1172,6 +1171,13 @@ def period_range(start=None, end=None, periods=None, freq='D', name=None): name : str, default None Name for the resulting PeriodIndex + Notes + ----- + Exactly two of start, end, or periods must be specified + + To learn more about the frequency strings, please see `this link + `__. + Returns ------- prng : PeriodIndex diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 2823951c0f348..38177fabd1e72 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -234,7 +234,8 @@ def __new__(cls, data=None, unit=None, @classmethod def _generate(cls, start, end, periods, name, offset, closed=None): if com._count_not_none(start, end, periods) != 2: - raise ValueError('Must specify two of start, end, or periods') + msg = 'Must specify exactly two of start, end, or periods' + raise ValueError(msg) if start is not None: start = Timedelta(start) @@ -985,7 +986,7 @@ def timedelta_range(start=None, end=None, periods=None, freq='D', Notes ----- - 2 of start, end, or periods must be specified. + Exactly two of start, end, or periods must be specified. To learn more about the frequency strings, please see `this link `__. diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index da4ca83c10dda..9fd1164a59979 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -107,8 +107,8 @@ def test_date_range_ambiguous_arguments(self): start = datetime(2011, 1, 1, 5, 3, 40) end = datetime(2011, 1, 1, 8, 9, 40) - pytest.raises(ValueError, date_range, start, end, freq='s', - periods=10) + with pytest.raises(ValueError): + date_range(start, end, periods=10, freq='s') def test_date_range_businesshour(self): idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00', @@ -146,14 +146,26 @@ def test_date_range_businesshour(self): def test_range_misspecified(self): # GH #1095 + with pytest.raises(ValueError): + date_range(start='1/1/2000') + + with pytest.raises(ValueError): + date_range(end='1/1/2000') + + with pytest.raises(ValueError): + date_range(periods=10) + + with pytest.raises(ValueError): + date_range(start='1/1/2000', freq='H') + + with pytest.raises(ValueError): + date_range(end='1/1/2000', freq='H') - pytest.raises(ValueError, date_range, '1/1/2000') - pytest.raises(ValueError, date_range, end='1/1/2000') - pytest.raises(ValueError, date_range, periods=10) + with pytest.raises(ValueError): + date_range(periods=10, freq='H') - pytest.raises(ValueError, date_range, '1/1/2000', freq='H') - pytest.raises(ValueError, date_range, end='1/1/2000', freq='H') - pytest.raises(ValueError, date_range, periods=10, freq='H') + with pytest.raises(ValueError): + date_range() def test_compat_replace(self): # https://github.com/statsmodels/statsmodels/issues/3349 diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index e5b889e100307..821831ca8c75f 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -440,7 +440,7 @@ def test_constructor_error(self): with tm.assert_raises_regex(ValueError, msg): PeriodIndex(start=start, end=end_intv) - msg = 'Must specify 2 of start, end, periods' + msg = 'Must specify exactly two of start, end, or periods' with tm.assert_raises_regex(ValueError, msg): PeriodIndex(start=start) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py new file mode 100644 index 0000000000000..6e3e883c3c0f7 --- /dev/null +++ b/pandas/tests/indexes/period/test_period_range.py @@ -0,0 +1,52 @@ +import pytest +import pandas.util.testing as tm +from pandas import date_range, period_range, PeriodIndex + + +class TestPeriodRange(object): + + @pytest.mark.parametrize('freq', ['D', 'W', 'M', 'Q', 'A']) + def test_construction(self, freq): + # non-empty + expected = date_range(start='2017-01-01', periods=5, + freq=freq, name='foo').to_period() + start, end = str(expected[0]), str(expected[-1]) + + result = period_range(start=start, end=end, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(start=start, periods=5, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(end=end, periods=5, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + # empty + expected = PeriodIndex([], freq=freq, name='foo') + + result = period_range(start=start, periods=0, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(end=end, periods=0, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(start=end, end=start, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + def test_errors(self): + # not enough params + with pytest.raises(ValueError): + period_range(start='2017Q1') + + with pytest.raises(ValueError): + period_range(end='2017Q1') + + with pytest.raises(ValueError): + period_range(periods=5) + + with pytest.raises(ValueError): + period_range() + + # too many params + with pytest.raises(ValueError): + period_range(start='2017Q1', end='2018Q1', periods=8, freq='Q') diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index 18eefc3fbdca6..ad495b3251ca1 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -721,40 +721,66 @@ def test_is_non_overlapping_monotonic(self): class TestIntervalRange(object): - def test_construction(self): - result = interval_range(0, 5, name='foo', closed='both') + @pytest.mark.parametrize('closed', ['left', 'right', 'neither', 'both']) + def test_construction(self, closed): + # combinations of start/end/periods without freq expected = IntervalIndex.from_breaks( - np.arange(0, 5), name='foo', closed='both') + np.arange(0, 6), name='foo', closed=closed) + + result = interval_range(start=0, end=5, name='foo', closed=closed) tm.assert_index_equal(result, expected) - def test_errors(self): + result = interval_range(start=0, periods=5, name='foo', closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=5, periods=5, name='foo', closed=closed) + tm.assert_index_equal(result, expected) + + # combinations of start/end/periods with freq + expected = IntervalIndex.from_tuples([(0, 2), (2, 4), (4, 6)], + name='foo', closed=closed) + + result = interval_range(start=0, end=6, freq=2, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=0, periods=3, freq=2, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=6, periods=3, freq=2, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + # output truncates early if freq causes end to be skipped. + result = interval_range(start=0, end=7, freq=2, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + def test_errors(self): # not enough params - def f(): - interval_range(0) + with pytest.raises(ValueError): + interval_range(start=0) - pytest.raises(ValueError, f) + with pytest.raises(ValueError): + interval_range(end=5) - def f(): + with pytest.raises(ValueError): interval_range(periods=2) - pytest.raises(ValueError, f) - - def f(): + with pytest.raises(ValueError): interval_range() - pytest.raises(ValueError, f) + # too many params + with pytest.raises(ValueError): + interval_range(start=0, end=5, periods=6) # mixed units - def f(): - interval_range(0, Timestamp('20130101'), freq=2) - - pytest.raises(ValueError, f) - - def f(): - interval_range(0, 10, freq=Timedelta('1day')) + with pytest.raises(ValueError): + interval_range(start=0, end=Timestamp('20130101'), freq=2) - pytest.raises(ValueError, f) + with pytest.raises(ValueError): + interval_range(start=0, end=10, freq=Timedelta('1day')) class TestIntervalTree(object): diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index 4732a0ce110de..8ad633ee8f49d 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -1,5 +1,5 @@ import numpy as np - +import pytest import pandas as pd import pandas.util.testing as tm from pandas.tseries.offsets import Day, Second @@ -49,3 +49,21 @@ def test_timedelta_range(self): expected = df.loc[pd.Timedelta('0s'):, :] result = df.loc['0s':, :] assert_frame_equal(expected, result) + + def test_errors(self): + # not enough params + with pytest.raises(ValueError): + timedelta_range(start='0 days') + + with pytest.raises(ValueError): + timedelta_range(end='5 days') + + with pytest.raises(ValueError): + timedelta_range(periods=2) + + with pytest.raises(ValueError): + timedelta_range() + + # too many params + with pytest.raises(ValueError): + timedelta_range(start='0 days', end='5 days', periods=10) From c1735ea5f7eff31b9b08d9137c994cf086b958ed Mon Sep 17 00:00:00 2001 From: jschendel Date: Fri, 8 Sep 2017 23:35:04 -0600 Subject: [PATCH 2/7] gfyoung review Clean error message, check for error in tests, fix typo in whatsnew. --- doc/source/whatsnew/v0.21.0.txt | 6 +++--- pandas/core/indexes/datetimes.py | 13 +++++++----- pandas/core/indexes/interval.py | 6 ++++-- pandas/core/indexes/period.py | 10 +++++++-- pandas/core/indexes/timedeltas.py | 7 ++++--- .../indexes/datetimes/test_date_range.py | 21 ++++++++++++------- .../tests/indexes/period/test_construction.py | 3 ++- .../tests/indexes/period/test_period_range.py | 12 ++++++----- pandas/tests/indexes/test_interval.py | 18 +++++++++------- .../timedeltas/test_timedelta_range.py | 13 ++++++------ 10 files changed, 67 insertions(+), 42 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 6c3df0400c9a3..a411cf0e3000f 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -358,7 +358,7 @@ Previously, :func:`to_datetime` did not localize datetime ``Series`` data when ` Additionally, DataFrames with datetime columns that were parsed by :func:`read_sql_table` and :func:`read_sql_query` will also be localized to UTC only if the original SQL columns were timezone aware datetime columns. -.. _whatsnew_0200.api.consistency_of_range_functions: +.. _whatsnew_0210.api.consistency_of_range_functions: Consistency of Range Functions ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -386,11 +386,11 @@ New Behavior: In [2]: pd.interval_range(start=0, end=4, periods=6) --------------------------------------------------------------------------- - ValueError: Must specify exactly two of start, end, or periods + ValueError: Of the three parameters, start, end, and periods, exactly two must be specified In [3]: pd.period_range(start='2017Q1', end='2017Q4', periods=6, freq='Q') --------------------------------------------------------------------------- - ValueError: Must specify exactly two of start, end, or periods + ValueError: Of the three parameters, start, end, and periods, exactly two must be specified Additionally, the endpoint parameter ``end`` was not included in the intervals produced by ``interval_range``. However, all other range functions include ``end`` in their output. To promote consistency among the range functions, ``interval_range`` will now include ``end`` as the right endpoint of the final interval, except if ``freq`` is specified in a way which skips ``end``. diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index c5a84cfdb4d9d..cba70859e2290 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -412,8 +412,8 @@ def __new__(cls, data=None, def _generate(cls, start, end, periods, name, offset, tz=None, normalize=False, ambiguous='raise', closed=None): if com._count_not_none(start, end, periods) != 2: - msg = 'Must specify exactly two of start, end, or periods' - raise ValueError(msg) + raise ValueError('Of the three parameters, start, end, and ' + 'periods, exactly two must be specified') _normalized = True @@ -2031,7 +2031,8 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None, Notes ----- - Exactly two of start, end, or periods must be specified + Of the three parameters, ``start``, ``end``, and ``periods``, exactly two + must be specified. To learn more about the frequency strings, please see `this link `__. @@ -2074,7 +2075,8 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, Notes ----- - Exactly two of start, end, or periods must be specified + Of the three parameters, ``start``, ``end``, and ``periods``, exactly two + must be specified. To learn more about the frequency strings, please see `this link `__. @@ -2128,7 +2130,8 @@ def cdate_range(start=None, end=None, periods=None, freq='C', tz=None, Notes ----- - Exactly two of start, end, or periods must be specified + Of the three parameters, ``start``, ``end``, and ``periods``, exactly two + must be specified. To learn more about the frequency strings, please see `this link `__. diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 5d84063e649da..bb1ea58fd7e83 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1048,14 +1048,16 @@ def interval_range(start=None, end=None, freq=None, periods=None, Notes ----- - Exactly two of start, end, or periods must be specified + Of the three parameters, ``start``, ``end``, and ``periods``, exactly two + must be specified. Returns ------- rng : IntervalIndex """ if com._count_not_none(start, end, periods) != 2: - raise ValueError('Must specify exactly two of start, end, or periods') + raise ValueError('Of the three parameters, start, end, and periods, ' + 'exactly two must be specified') if freq is None: freq = 1 diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 04c1771138921..25e96afb53b66 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -1052,7 +1052,8 @@ def tz_localize(self, tz, infer_dst=False): def _get_ordinal_range(start, end, periods, freq, mult=1): if com._count_not_none(start, end, periods) != 2: - raise ValueError('Must specify exactly two of start, end, or periods') + raise ValueError('Of the three parameters, start, end, and periods, ' + 'exactly two must be specified') if freq is not None: _, mult = _gfc(freq) @@ -1173,7 +1174,8 @@ def period_range(start=None, end=None, periods=None, freq='D', name=None): Notes ----- - Exactly two of start, end, or periods must be specified + Of the three parameters, ``start``, ``end``, and ``periods``, exactly two + must be specified. To learn more about the frequency strings, please see `this link `__. @@ -1182,5 +1184,9 @@ def period_range(start=None, end=None, periods=None, freq='D', name=None): ------- prng : PeriodIndex """ + if com._count_not_none(start, end, periods) != 2: + raise ValueError('Of the three parameters, start, end, and periods, ' + 'exactly two must be specified') + return PeriodIndex(start=start, end=end, periods=periods, freq=freq, name=name) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 38177fabd1e72..59307f51b3061 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -234,8 +234,8 @@ def __new__(cls, data=None, unit=None, @classmethod def _generate(cls, start, end, periods, name, offset, closed=None): if com._count_not_none(start, end, periods) != 2: - msg = 'Must specify exactly two of start, end, or periods' - raise ValueError(msg) + raise ValueError('Of the three parameters, start, end, and ' + 'periods, exactly two must be specified') if start is not None: start = Timedelta(start) @@ -986,7 +986,8 @@ def timedelta_range(start=None, end=None, periods=None, freq='D', Notes ----- - Exactly two of start, end, or periods must be specified. + Of the three parameters, ``start``, ``end``, and ``periods``, exactly two + must be specified. To learn more about the frequency strings, please see `this link `__. diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 9fd1164a59979..2a85979d1c942 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -107,7 +107,9 @@ def test_date_range_ambiguous_arguments(self): start = datetime(2011, 1, 1, 5, 3, 40) end = datetime(2011, 1, 1, 8, 9, 40) - with pytest.raises(ValueError): + msg = ('Of the three parameters, start, end, and periods, ' + 'exactly two must be specified') + with tm.assert_raises_regex(ValueError, msg): date_range(start, end, periods=10, freq='s') def test_date_range_businesshour(self): @@ -146,25 +148,28 @@ def test_date_range_businesshour(self): def test_range_misspecified(self): # GH #1095 - with pytest.raises(ValueError): + msg = ('Of the three parameters, start, end, and periods, ' + 'exactly two must be specified') + + with tm.assert_raises_regex(ValueError, msg): date_range(start='1/1/2000') - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): date_range(end='1/1/2000') - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): date_range(periods=10) - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): date_range(start='1/1/2000', freq='H') - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): date_range(end='1/1/2000', freq='H') - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): date_range(periods=10, freq='H') - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): date_range() def test_compat_replace(self): diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index 821831ca8c75f..3af0701f9a719 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -440,7 +440,8 @@ def test_constructor_error(self): with tm.assert_raises_regex(ValueError, msg): PeriodIndex(start=start, end=end_intv) - msg = 'Must specify exactly two of start, end, or periods' + msg = ('Of the three parameters, start, end, and periods, ' + 'exactly two must be specified') with tm.assert_raises_regex(ValueError, msg): PeriodIndex(start=start) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index 6e3e883c3c0f7..acfd9fad587ca 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -35,18 +35,20 @@ def test_construction(self, freq): def test_errors(self): # not enough params - with pytest.raises(ValueError): + msg = ('Of the three parameters, start, end, and periods, ' + 'exactly two must be specified') + with tm.assert_raises_regex(ValueError, msg): period_range(start='2017Q1') - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): period_range(end='2017Q1') - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): period_range(periods=5) - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): period_range() # too many params - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): period_range(start='2017Q1', end='2018Q1', periods=8, freq='Q') diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index ad495b3251ca1..7130f3790c9d2 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -759,27 +759,31 @@ def test_construction(self, closed): def test_errors(self): # not enough params - with pytest.raises(ValueError): + msg = ('Of the three parameters, start, end, and periods, ' + 'exactly two must be specified') + + with tm.assert_raises_regex(ValueError, msg): interval_range(start=0) - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): interval_range(end=5) - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): interval_range(periods=2) - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): interval_range() # too many params - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): interval_range(start=0, end=5, periods=6) # mixed units - with pytest.raises(ValueError): + msg = 'start, end, freq need to be the same type' + with tm.assert_raises_regex(ValueError, msg): interval_range(start=0, end=Timestamp('20130101'), freq=2) - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): interval_range(start=0, end=10, freq=Timedelta('1day')) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index 8ad633ee8f49d..8c5642787739d 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -1,5 +1,4 @@ import numpy as np -import pytest import pandas as pd import pandas.util.testing as tm from pandas.tseries.offsets import Day, Second @@ -52,18 +51,20 @@ def test_timedelta_range(self): def test_errors(self): # not enough params - with pytest.raises(ValueError): + msg = ('Of the three parameters, start, end, and periods, ' + 'exactly two must be specified') + with tm.assert_raises_regex(ValueError, msg): timedelta_range(start='0 days') - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): timedelta_range(end='5 days') - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): timedelta_range(periods=2) - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): timedelta_range() # too many params - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): timedelta_range(start='0 days', end='5 days', periods=10) From 93c7005e11270775c53cf872fe73070f67cb586f Mon Sep 17 00:00:00 2001 From: jschendel Date: Mon, 11 Sep 2017 00:27:21 -0600 Subject: [PATCH 3/7] jreback review and misc changes addressed jreback's review, cleaned up docstrings, added documention, update to interval_range endpoint fix, additional tests --- doc/source/timeseries.rst | 9 +++ pandas/core/indexes/datetimes.py | 56 +++++++++---------- pandas/core/indexes/interval.py | 48 ++++++++++------ pandas/core/indexes/period.py | 50 ++++++++++++----- pandas/core/indexes/timedeltas.py | 29 +++++----- .../indexes/datetimes/test_date_range.py | 4 +- .../tests/indexes/period/test_construction.py | 4 +- .../tests/indexes/period/test_period_range.py | 41 +++++++++++++- pandas/tests/indexes/test_interval.py | 20 ++++++- .../timedeltas/test_timedelta_range.py | 2 +- 10 files changed, 178 insertions(+), 85 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index c86c58c3183f6..5422d5c53043d 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -1705,6 +1705,15 @@ has multiplied span. pd.PeriodIndex(start='2014-01', freq='3M', periods=4) +If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor +endpoints for a ``PeriodIndex`` with frequency matching that of the +``PeriodIndex`` constructor. + +.. ipython:: python + + pd.PeriodIndex(start=pd.Period('2017Q1', freq='Q'), + end=pd.Period('2017Q2', freq='Q'), freq='M') + Just like ``DatetimeIndex``, a ``PeriodIndex`` can also be used to index pandas objects: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index cba70859e2290..d569c84abe946 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -292,8 +292,8 @@ def __new__(cls, data=None, if is_float(periods): periods = int(periods) elif not is_integer(periods): - raise ValueError('Periods must be a number, got %s' % - str(periods)) + msg = 'periods must be a number, got {periods}' + raise ValueError(msg.format(periods=periods)) if data is None and freq is None: raise ValueError("Must provide freq argument if no data is " @@ -412,7 +412,7 @@ def __new__(cls, data=None, def _generate(cls, start, end, periods, name, offset, tz=None, normalize=False, ambiguous='raise', closed=None): if com._count_not_none(start, end, periods) != 2: - raise ValueError('Of the three parameters, start, end, and ' + raise ValueError('Of the three parameters: start, end, and ' 'periods, exactly two must be specified') _normalized = True @@ -2005,7 +2005,7 @@ def _generate_regular_range(start, end, periods, offset): def date_range(start=None, end=None, periods=None, freq='D', tz=None, normalize=False, name=None, closed=None, **kwargs): """ - Return a fixed frequency datetime index, with day (calendar) as the default + Return a fixed frequency DatetimeIndex, with day (calendar) as the default frequency Parameters @@ -2014,24 +2014,24 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None, Left bound for generating dates end : string or datetime-like, default None Right bound for generating dates - periods : integer or None, default None - If None, must specify start and end + periods : integer, default None + Number of dates to generate freq : string or DateOffset, default 'D' (calendar daily) Frequency strings can have multiples, e.g. '5H' - tz : string or None + tz : string, default None Time zone name for returning localized DatetimeIndex, for example Asia/Hong_Kong normalize : bool, default False Normalize start/end dates to midnight before generating date range - name : str, default None - Name of the resulting index - closed : string or None, default None + name : string, default None + Name of the resulting DatetimeIndex + closed : string, default None Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None) Notes ----- - Of the three parameters, ``start``, ``end``, and ``periods``, exactly two + Of the three parameters: ``start``, ``end``, and ``periods``, exactly two must be specified. To learn more about the frequency strings, please see `this link @@ -2049,7 +2049,7 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None, def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, normalize=True, name=None, closed=None, **kwargs): """ - Return a fixed frequency datetime index, with business day as the default + Return a fixed frequency DatetimeIndex, with business day as the default frequency Parameters @@ -2058,24 +2058,24 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, Left bound for generating dates end : string or datetime-like, default None Right bound for generating dates - periods : integer or None, default None - If None, must specify start and end + periods : integer, default None + Number of dates to generate freq : string or DateOffset, default 'B' (business daily) - Frequency strings can have multiples, e.g. '5H' + Frequency strings can have multiples, e.g. '5, default tz : string or None Time zone name for returning localized DatetimeIndex, for example Asia/Beijing normalize : bool, default False Normalize start/end dates to midnight before generating date range - name : str, default None - Name for the resulting index - closed : string or None, default None + name : string, default None + Name of the resulting DatetimeIndex + closed : string, default None Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None) Notes ----- - Of the three parameters, ``start``, ``end``, and ``periods``, exactly two + Of the three parameters: ``start``, ``end``, and ``periods``, exactly two must be specified. To learn more about the frequency strings, please see `this link @@ -2094,7 +2094,7 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, def cdate_range(start=None, end=None, periods=None, freq='C', tz=None, normalize=True, name=None, closed=None, **kwargs): """ - **EXPERIMENTAL** Return a fixed frequency datetime index, with + **EXPERIMENTAL** Return a fixed frequency DatetimeIndex, with CustomBusinessDay as the default frequency .. warning:: EXPERIMENTAL @@ -2108,29 +2108,29 @@ def cdate_range(start=None, end=None, periods=None, freq='C', tz=None, Left bound for generating dates end : string or datetime-like, default None Right bound for generating dates - periods : integer or None, default None - If None, must specify start and end + periods : integer, default None + Number of dates to generate freq : string or DateOffset, default 'C' (CustomBusinessDay) Frequency strings can have multiples, e.g. '5H' - tz : string or None + tz : string, default None Time zone name for returning localized DatetimeIndex, for example Asia/Beijing normalize : bool, default False Normalize start/end dates to midnight before generating date range - name : str, default None - Name for the resulting index - weekmask : str, Default 'Mon Tue Wed Thu Fri' + name : string, default None + Name of the resulting DatetimeIndex + weekmask : string, Default 'Mon Tue Wed Thu Fri' weekmask of valid business days, passed to ``numpy.busdaycalendar`` holidays : list list/array of dates to exclude from the set of valid business days, passed to ``numpy.busdaycalendar`` - closed : string or None, default None + closed : string, default None Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None) Notes ----- - Of the three parameters, ``start``, ``end``, and ``periods``, exactly two + Of the three parameters: ``start``, ``end``, and ``periods``, exactly two must be specified. To learn more about the frequency strings, please see `this link diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index bb1ea58fd7e83..b79d4490efe37 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -15,6 +15,7 @@ is_float_dtype, is_interval_dtype, is_scalar, + is_float, is_integer) from pandas.core.indexes.base import ( Index, _ensure_index, @@ -1028,27 +1029,30 @@ def func(self, other): IntervalIndex._add_logical_methods_disabled() -def interval_range(start=None, end=None, freq=None, periods=None, +def interval_range(start=None, end=None, periods=None, freq=None, name=None, closed='right', **kwargs): """ Return a fixed frequency IntervalIndex Parameters ---------- - start : string or datetime-like, default None - Left bound for generating data - end : string or datetime-like, default None - Right bound for generating data - freq : integer, string or DateOffset, default 1 + start : numeric, string, or datetime-like, default None + Left bound for generating intervals + end : numeric, string, or datetime-like, default None + Right bound for generating intervals periods : integer, default None - name : str, default None - Name of the resulting index + Number of intervals to generate + freq : numeric, string, or DateOffset, default 1 + The length of each interval. Must be consistent with the + type of start and end + name : string, default None + Name of the resulting IntervalIndex closed : string, default 'right' options are: 'left', 'right', 'both', 'neither' Notes ----- - Of the three parameters, ``start``, ``end``, and ``periods``, exactly two + Of the three parameters: ``start``, ``end``, and ``periods``, exactly two must be specified. Returns @@ -1056,20 +1060,30 @@ def interval_range(start=None, end=None, freq=None, periods=None, rng : IntervalIndex """ if com._count_not_none(start, end, periods) != 2: - raise ValueError('Of the three parameters, start, end, and periods, ' + raise ValueError('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') + # must all be same units or None + arr = np.array(list(com._not_none(start, end, freq))) + if is_object_dtype(arr): + raise ValueError("start, end, freq need to be the same type") + if freq is None: freq = 1 + + if periods is None: + periods = int((end - start) // freq) + elif is_float(periods): + periods = int(periods) + elif not is_integer(periods): + msg = 'periods must be a number, got {periods}' + raise ValueError(msg.format(periods=periods)) + if start is None: start = end - periods * freq - if end is None: - end = start + periods * freq - # must all be same units or None - arr = np.array([start, end, freq]) - if is_object_dtype(arr): - raise ValueError("start, end, freq need to be the same type") + # force end to be consistent with freq (truncate if freq skips over end) + end = start + periods * freq - return IntervalIndex.from_breaks(np.arange(start, end + 1, freq), + return IntervalIndex.from_breaks(np.arange(start, end + freq, freq), name=name, closed=closed, **kwargs) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 25e96afb53b66..ebbb2a7c6c34e 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -199,8 +199,8 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, if is_float(periods): periods = int(periods) elif not is_integer(periods): - raise ValueError('Periods must be a number, got %s' % - str(periods)) + msg = 'periods must be a number, got {periods}' + raise ValueError(msg.format(periods=periods)) if name is None and hasattr(data, 'name'): name = data.name @@ -1052,7 +1052,7 @@ def tz_localize(self, tz, infer_dst=False): def _get_ordinal_range(start, end, periods, freq, mult=1): if com._count_not_none(start, end, periods) != 2: - raise ValueError('Of the three parameters, start, end, and periods, ' + raise ValueError('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') if freq is not None: @@ -1067,9 +1067,9 @@ def _get_ordinal_range(start, end, periods, freq, mult=1): is_end_per = isinstance(end, Period) if is_start_per and is_end_per and start.freq != end.freq: - raise ValueError('Start and end must have same freq') + raise ValueError('start and end must have same freq') if (start is tslib.NaT or end is tslib.NaT): - raise ValueError('Start and end must not be NaT') + raise ValueError('start and end must not be NaT') if freq is None: if is_start_per: @@ -1158,23 +1158,25 @@ def pnow(freq=None): def period_range(start=None, end=None, periods=None, freq='D', name=None): """ - Return a fixed frequency datetime index, with day (calendar) as the default + Return a fixed frequency PeriodIndex, with day (calendar) as the default frequency Parameters ---------- - start : starting value, period-like, optional - end : ending value, period-like, optional - periods : int, default None - Number of periods in the index - freq : str/DateOffset, default 'D' + start : string or period-like, default None + Left bound for generating periods + end : string or period-like, default None + Right bound for generating periods + periods : integer, default None + Number of periods to generate + freq : string or DateOffset, default 'D' (calendar daily) Frequency alias - name : str, default None - Name for the resulting PeriodIndex + name : string, default None + Name of the resulting PeriodIndex Notes ----- - Of the three parameters, ``start``, ``end``, and ``periods``, exactly two + Of the three parameters: ``start``, ``end``, and ``periods``, exactly two must be specified. To learn more about the frequency strings, please see `this link @@ -1183,9 +1185,27 @@ def period_range(start=None, end=None, periods=None, freq='D', name=None): Returns ------- prng : PeriodIndex + + Examples + -------- + + >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M') + PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', + '2017-06', '2017-06', '2017-07', '2017-08', '2017-09', + '2017-10', '2017-11', '2017-12', '2018-01'], + dtype='period[M]', freq='M') + + If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor + endpoints for a ``PeriodIndex`` with frequency matching that of the + ``period_range`` constructor. + + >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'), + ... end=pd.Period('2017Q2', freq='Q'), freq='M') + PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'], + dtype='period[M]', freq='M') """ if com._count_not_none(start, end, periods) != 2: - raise ValueError('Of the three parameters, start, end, and periods, ' + raise ValueError('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') return PeriodIndex(start=start, end=end, periods=periods, diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 59307f51b3061..ae7c3c5d09f58 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -180,8 +180,8 @@ def __new__(cls, data=None, unit=None, if is_float(periods): periods = int(periods) elif not is_integer(periods): - raise ValueError('Periods must be a number, got %s' % - str(periods)) + msg = 'periods must be a number, got {periods}' + raise ValueError(msg.format(periods=periods)) if data is None and freq is None: raise ValueError("Must provide freq argument if no data is " @@ -234,7 +234,7 @@ def __new__(cls, data=None, unit=None, @classmethod def _generate(cls, start, end, periods, name, offset, closed=None): if com._count_not_none(start, end, periods) != 2: - raise ValueError('Of the three parameters, start, end, and ' + raise ValueError('Of the three parameters: start, end, and ' 'periods, exactly two must be specified') if start is not None: @@ -961,22 +961,22 @@ def _generate_regular_range(start, end, periods, offset): def timedelta_range(start=None, end=None, periods=None, freq='D', name=None, closed=None): """ - Return a fixed frequency timedelta index, with day as the default + Return a fixed frequency TimedeltaIndex, with day as the default frequency Parameters ---------- start : string or timedelta-like, default None - Left bound for generating dates - end : string or datetime-like, default None - Right bound for generating dates - periods : integer or None, default None - If None, must specify start and end + Left bound for generating timedeltas + end : string or timedelta-like, default None + Right bound for generating timedeltas + periods : integer, default None + Number of timedeltas to generate freq : string or DateOffset, default 'D' (calendar daily) Frequency strings can have multiples, e.g. '5H' - name : str, default None - Name of the resulting index - closed : string or None, default None + name : string, default None + Name of the resulting TimedeltaIndex + closed : string, default None Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None) @@ -986,12 +986,11 @@ def timedelta_range(start=None, end=None, periods=None, freq='D', Notes ----- - Of the three parameters, ``start``, ``end``, and ``periods``, exactly two + Of the three parameters: ``start``, ``end``, and ``periods``, exactly two must be specified. To learn more about the frequency strings, please see `this link `__. """ return TimedeltaIndex(start=start, end=end, periods=periods, - freq=freq, name=name, - closed=closed) + freq=freq, name=name, closed=closed) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 2a85979d1c942..6ca0f7c37d8de 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -107,7 +107,7 @@ def test_date_range_ambiguous_arguments(self): start = datetime(2011, 1, 1, 5, 3, 40) end = datetime(2011, 1, 1, 8, 9, 40) - msg = ('Of the three parameters, start, end, and periods, ' + msg = ('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') with tm.assert_raises_regex(ValueError, msg): date_range(start, end, periods=10, freq='s') @@ -148,7 +148,7 @@ def test_date_range_businesshour(self): def test_range_misspecified(self): # GH #1095 - msg = ('Of the three parameters, start, end, and periods, ' + msg = ('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') with tm.assert_raises_regex(ValueError, msg): diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index 3af0701f9a719..639a9272c3808 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -436,11 +436,11 @@ def test_constructor_error(self): start = Period('02-Apr-2005', 'B') end_intv = Period('2006-12-31', ('w', 1)) - msg = 'Start and end must have same freq' + msg = 'start and end must have same freq' with tm.assert_raises_regex(ValueError, msg): PeriodIndex(start=start, end=end_intv) - msg = ('Of the three parameters, start, end, and periods, ' + msg = ('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') with tm.assert_raises_regex(ValueError, msg): PeriodIndex(start=start) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index acfd9fad587ca..6c8a9882c8187 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -1,12 +1,12 @@ import pytest import pandas.util.testing as tm -from pandas import date_range, period_range, PeriodIndex +from pandas import date_range, NaT, period_range, Period, PeriodIndex class TestPeriodRange(object): @pytest.mark.parametrize('freq', ['D', 'W', 'M', 'Q', 'A']) - def test_construction(self, freq): + def test_construction_from_string(self, freq): # non-empty expected = date_range(start='2017-01-01', periods=5, freq=freq, name='foo').to_period() @@ -33,9 +33,36 @@ def test_construction(self, freq): result = period_range(start=end, end=start, freq=freq, name='foo') tm.assert_index_equal(result, expected) + def test_construction_from_period(self): + # upsampling + start, end = Period('2017Q1', freq='Q'), Period('2018Q1', freq='Q') + expected = date_range(start='2017-03-31', end='2018-03-31', freq='M', + name='foo').to_period() + result = period_range(start=start, end=end, freq='M', name='foo') + tm.assert_index_equal(result, expected) + + # downsampling + start, end = Period('2017-1', freq='M'), Period('2019-12', freq='M') + expected = date_range(start='2017-01-31', end='2019-12-31', freq='Q', + name='foo').to_period() + result = period_range(start=start, end=end, freq='Q', name='foo') + tm.assert_index_equal(result, expected) + + # empty + expected = PeriodIndex([], freq='W', name='foo') + + result = period_range(start=start, periods=0, freq='W', name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(end=end, periods=0, freq='W', name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(start=end, end=start, freq='W', name='foo') + tm.assert_index_equal(result, expected) + def test_errors(self): # not enough params - msg = ('Of the three parameters, start, end, and periods, ' + msg = ('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') with tm.assert_raises_regex(ValueError, msg): period_range(start='2017Q1') @@ -52,3 +79,11 @@ def test_errors(self): # too many params with tm.assert_raises_regex(ValueError, msg): period_range(start='2017Q1', end='2018Q1', periods=8, freq='Q') + + # start/end NaT + msg = 'start and end must not be NaT' + with tm.assert_raises_regex(ValueError, msg): + period_range(start=NaT, end='2018Q1') + + with tm.assert_raises_regex(ValueError, msg): + period_range(start='2017Q1', end=NaT) diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index 7130f3790c9d2..2a7e2c6c61c8c 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -753,13 +753,21 @@ def test_construction(self, closed): tm.assert_index_equal(result, expected) # output truncates early if freq causes end to be skipped. - result = interval_range(start=0, end=7, freq=2, name='foo', + expected = IntervalIndex.from_tuples([(0.0, 1.5), (1.5, 3.0)], + name='foo', closed=closed) + result = interval_range(start=0, end=4, freq=1.5, name='foo', closed=closed) tm.assert_index_equal(result, expected) + def test_constructor_coverage(self): + # float value for periods + expected = pd.interval_range(start=0, periods=10) + result = pd.interval_range(start=0, periods=10.5) + tm.assert_index_equal(result, expected) + def test_errors(self): # not enough params - msg = ('Of the three parameters, start, end, and periods, ' + msg = ('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') with tm.assert_raises_regex(ValueError, msg): @@ -778,8 +786,16 @@ def test_errors(self): with tm.assert_raises_regex(ValueError, msg): interval_range(start=0, end=5, periods=6) + # invalid periods + msg = 'periods must be a number, got foo' + with tm.assert_raises_regex(ValueError, msg): + interval_range(start=0, periods='foo') + # mixed units msg = 'start, end, freq need to be the same type' + with tm.assert_raises_regex(ValueError, msg): + interval_range(start=Timestamp('20130101'), end=10, freq=2) + with tm.assert_raises_regex(ValueError, msg): interval_range(start=0, end=Timestamp('20130101'), freq=2) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index 8c5642787739d..7624e1f79af15 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -51,7 +51,7 @@ def test_timedelta_range(self): def test_errors(self): # not enough params - msg = ('Of the three parameters, start, end, and periods, ' + msg = ('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') with tm.assert_raises_regex(ValueError, msg): timedelta_range(start='0 days') From 8ff33dad64e88f2c37a93f13aaa61d95f813649e Mon Sep 17 00:00:00 2001 From: jschendel Date: Tue, 12 Sep 2017 01:36:17 -0600 Subject: [PATCH 4/7] Review and datetime-like implementation Made review related changes, implemented support for datetime-like input in interval_range. --- doc/source/whatsnew/v0.21.0.txt | 4 +- pandas/core/indexes/datetimes.py | 10 +- pandas/core/indexes/interval.py | 128 +++++++++++++---- pandas/core/indexes/period.py | 2 +- pandas/core/indexes/timedeltas.py | 27 +++- .../indexes/datetimes/test_construction.py | 5 +- .../indexes/datetimes/test_date_range.py | 18 ++- .../tests/indexes/period/test_period_range.py | 5 + pandas/tests/indexes/test_interval.py | 133 ++++++++++++++++-- .../indexes/timedeltas/test_construction.py | 5 +- 10 files changed, 286 insertions(+), 51 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index a411cf0e3000f..28a8b188898cb 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -386,11 +386,11 @@ New Behavior: In [2]: pd.interval_range(start=0, end=4, periods=6) --------------------------------------------------------------------------- - ValueError: Of the three parameters, start, end, and periods, exactly two must be specified + ValueError: Of the three parameters: start, end, and periods, exactly two must be specified In [3]: pd.period_range(start='2017Q1', end='2017Q4', periods=6, freq='Q') --------------------------------------------------------------------------- - ValueError: Of the three parameters, start, end, and periods, exactly two must be specified + ValueError: Of the three parameters: start, end, and periods, exactly two must be specified Additionally, the endpoint parameter ``end`` was not included in the intervals produced by ``interval_range``. However, all other range functions include ``end`` in their output. To promote consistency among the range functions, ``interval_range`` will now include ``end`` as the right endpoint of the final interval, except if ``freq`` is specified in a way which skips ``end``. diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index d569c84abe946..1c8d0b334b91c 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -293,7 +293,7 @@ def __new__(cls, data=None, periods = int(periods) elif not is_integer(periods): msg = 'periods must be a number, got {periods}' - raise ValueError(msg.format(periods=periods)) + raise TypeError(msg.format(periods=periods)) if data is None and freq is None: raise ValueError("Must provide freq argument if no data is " @@ -2015,7 +2015,7 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None, end : string or datetime-like, default None Right bound for generating dates periods : integer, default None - Number of dates to generate + Number of periods to generate freq : string or DateOffset, default 'D' (calendar daily) Frequency strings can have multiples, e.g. '5H' tz : string, default None @@ -2059,9 +2059,9 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, end : string or datetime-like, default None Right bound for generating dates periods : integer, default None - Number of dates to generate + Number of periods to generate freq : string or DateOffset, default 'B' (business daily) - Frequency strings can have multiples, e.g. '5, default + Frequency strings can have multiples, e.g. '5H' tz : string or None Time zone name for returning localized DatetimeIndex, for example Asia/Beijing @@ -2109,7 +2109,7 @@ def cdate_range(start=None, end=None, periods=None, freq='C', tz=None, end : string or datetime-like, default None Right bound for generating dates periods : integer, default None - Number of dates to generate + Number of periods to generate freq : string or DateOffset, default 'C' (CustomBusinessDay) Frequency strings can have multiples, e.g. '5H' tz : string, default None diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index b79d4490efe37..8d9c83842560b 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -16,6 +16,7 @@ is_interval_dtype, is_scalar, is_float, + is_number, is_integer) from pandas.core.indexes.base import ( Index, _ensure_index, @@ -26,11 +27,14 @@ Interval, IntervalMixin, IntervalTree, intervals_to_interval_bounds) +from pandas.core.indexes.datetimes import date_range from pandas.core.indexes.multi import MultiIndex from pandas.compat.numpy import function as nv from pandas.core import common as com from pandas.util._decorators import cache_readonly, Appender from pandas.core.config import get_option +from pandas.tseries.offsets import DateOffset +from pandas.tseries.frequencies import to_offset import pandas.core.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -1030,21 +1034,22 @@ def func(self, other): def interval_range(start=None, end=None, periods=None, freq=None, - name=None, closed='right', **kwargs): + name=None, closed='right'): """ Return a fixed frequency IntervalIndex Parameters ---------- - start : numeric, string, or datetime-like, default None + start : numeric or datetime-like, default None Left bound for generating intervals - end : numeric, string, or datetime-like, default None + end : numeric or datetime-like, default None Right bound for generating intervals periods : integer, default None - Number of intervals to generate - freq : numeric, string, or DateOffset, default 1 - The length of each interval. Must be consistent with the - type of start and end + Number of periods to generate + freq : numeric, string, or DateOffset, default None + The length of each interval. Must be consistent with the type of start + and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 + for numeric and 'D' (calendar daily) for datetime-like. name : string, default None Name of the resulting IntervalIndex closed : string, default 'right' @@ -1058,32 +1063,107 @@ def interval_range(start=None, end=None, periods=None, freq=None, Returns ------- rng : IntervalIndex + + Examples + -------- + + Numeric ``start`` and ``end`` is supported. + + >>> pd.interval_range(start=0, end=5) + IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]] + closed='right', dtype='interval[int64]') + + Additionally, datetime-like input is also supported. + + >>> pd.interval_range(start='2017-01-01', end='2017-01-04') + IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], + (2017-01-03, 2017-01-04]] + closed='right', dtype='interval[datetime64[ns]]') + + The ``freq`` parameter specifies the frequency between the left and right. + endpoints of the individual intervals within the ``IntervalIndex``. For + numeric ``start`` and ``end``, the frequency must also be numeric. + + >>> pd.interval_range(start=0, periods=4, freq=1.5) + IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]] + closed='right', dtype='interval[float64]') + + Similarly, for datetime-like ``start`` and ``end``, the frequency must be + convertible to a DateOffset. + + >>> pd.interval_range(start='2017-01-01', periods=3, freq='MS') + IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], + (2017-03-01, 2017-04-01]] + closed='right', dtype='interval[datetime64[ns]]') + + The ``closed`` parameter specifies which endpoints of the individual + intervals within the ``IntervalIndex`` are closed. + + >>> pd.interval_range(end=5, periods=4, closed='both') + IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]] + closed='both', dtype='interval[int64]') """ if com._count_not_none(start, end, periods) != 2: raise ValueError('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') - # must all be same units or None - arr = np.array(list(com._not_none(start, end, freq))) - if is_object_dtype(arr): - raise ValueError("start, end, freq need to be the same type") + # assume datetime-like unless we find numeric start or end + is_datetime_interval = True + + if is_number(start): + is_datetime_interval = False + elif start is not None: + try: + start = Timestamp(start) + except (TypeError, ValueError): + raise ValueError('start must be numeric or datetime-like') - if freq is None: - freq = 1 + if is_number(end): + is_datetime_interval = False + elif end is not None: + try: + end = Timestamp(end) + except (TypeError, ValueError): + raise ValueError('end must be numeric or datetime-like') - if periods is None: - periods = int((end - start) // freq) - elif is_float(periods): + if is_float(periods): periods = int(periods) - elif not is_integer(periods): + elif not is_integer(periods) and periods is not None: msg = 'periods must be a number, got {periods}' - raise ValueError(msg.format(periods=periods)) + raise TypeError(msg.format(periods=periods)) - if start is None: - start = end - periods * freq + if is_datetime_interval: + freq = freq or 'D' + if not isinstance(freq, DateOffset): + try: + freq = to_offset(freq) + except ValueError: + raise ValueError('freq must be convertible to DateOffset when ' + 'start/end are datetime-like') + else: + freq = freq or 1 - # force end to be consistent with freq (truncate if freq skips over end) - end = start + periods * freq + # verify type compatibility + is_numeric_interval = all(map(is_number, com._not_none(start, end, freq))) + if not is_datetime_interval and not is_numeric_interval: + raise TypeError("start, end, freq need to be type compatible") + + if is_numeric_interval: + if periods is None: + periods = int((end - start) // freq) + + if start is None: + start = end - periods * freq + + # force end to be consistent with freq (lower if freq skips over end) + end = start + periods * freq + + # end + freq for inclusive endpoint + breaks = np.arange(start, end + freq, freq) + else: + # add one to account for interval endpoints (n breaks = n-1 intervals) + if periods is not None: + periods += 1 + breaks = date_range(start=start, end=end, periods=periods, freq=freq) - return IntervalIndex.from_breaks(np.arange(start, end + freq, freq), - name=name, closed=closed, **kwargs) + return IntervalIndex.from_breaks(breaks, name=name, closed=closed) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index ebbb2a7c6c34e..fb47d1db48610 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -200,7 +200,7 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, periods = int(periods) elif not is_integer(periods): msg = 'periods must be a number, got {periods}' - raise ValueError(msg.format(periods=periods)) + raise TypeError(msg.format(periods=periods)) if name is None and hasattr(data, 'name'): name = data.name diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index ae7c3c5d09f58..d7b7d56d74a3a 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -181,7 +181,7 @@ def __new__(cls, data=None, unit=None, periods = int(periods) elif not is_integer(periods): msg = 'periods must be a number, got {periods}' - raise ValueError(msg.format(periods=periods)) + raise TypeError(msg.format(periods=periods)) if data is None and freq is None: raise ValueError("Must provide freq argument if no data is " @@ -971,7 +971,7 @@ def timedelta_range(start=None, end=None, periods=None, freq='D', end : string or timedelta-like, default None Right bound for generating timedeltas periods : integer, default None - Number of timedeltas to generate + Number of periods to generate freq : string or DateOffset, default 'D' (calendar daily) Frequency strings can have multiples, e.g. '5H' name : string, default None @@ -991,6 +991,29 @@ def timedelta_range(start=None, end=None, periods=None, freq='D', To learn more about the frequency strings, please see `this link `__. + + Examples + -------- + + >>> pd.timedelta_range(start='1 day', periods=4) + TimedeltaIndex(['1 days', '2 days', '3 days', '4 days'], + dtype='timedelta64[ns]', freq='D') + + The ``closed`` parameter specifies which endpoint is included. The default + behavior is to include both endpoints. + + >>> pd.timedelta_range(start='1 day', periods=4, closed='right') + TimedeltaIndex(['2 days', '3 days', '4 days'], + dtype='timedelta64[ns]', freq='D') + + The ``freq`` parameter specifies the frequency of the TimedeltaIndex. + Only fixed frequencies can be passed, non-fixed frequencies such as + 'M' (month end) will raise. + + >>> pd.timedelta_range(start='1 day', end='2 days', freq='6H') + TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00', + '1 days 18:00:00', '2 days 00:00:00'], + dtype='timedelta64[ns]', freq='6H') """ return TimedeltaIndex(start=start, end=end, periods=periods, freq=freq, name=name, closed=closed) diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index cf896b06130a2..a4706dd8a3767 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -307,8 +307,9 @@ def test_constructor_coverage(self): exp = date_range('1/1/2000', periods=10) tm.assert_index_equal(rng, exp) - pytest.raises(ValueError, DatetimeIndex, start='1/1/2000', - periods='foo', freq='D') + msg = 'periods must be a number, got foo' + with tm.assert_raises_regex(TypeError, msg): + DatetimeIndex(start='1/1/2000', periods='foo', freq='D') pytest.raises(ValueError, DatetimeIndex, start='1/1/2000', end='1/10/2000') diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 6ca0f7c37d8de..8d86bebdd4d5e 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -248,8 +248,13 @@ def test_constructor(self): bdate_range(START, END, freq=BDay()) bdate_range(START, periods=20, freq=BDay()) bdate_range(end=START, periods=20, freq=BDay()) - pytest.raises(ValueError, date_range, '2011-1-1', '2012-1-1', 'B') - pytest.raises(ValueError, bdate_range, '2011-1-1', '2012-1-1', 'B') + + msg = 'periods must be a number, got B' + with tm.assert_raises_regex(TypeError, msg): + date_range('2011-1-1', '2012-1-1', 'B') + + with tm.assert_raises_regex(TypeError, msg): + bdate_range('2011-1-1', '2012-1-1', 'B') def test_naive_aware_conflicts(self): naive = bdate_range(START, END, freq=BDay(), tz=None) @@ -527,8 +532,13 @@ def test_constructor(self): cdate_range(START, END, freq=CDay()) cdate_range(START, periods=20, freq=CDay()) cdate_range(end=START, periods=20, freq=CDay()) - pytest.raises(ValueError, date_range, '2011-1-1', '2012-1-1', 'C') - pytest.raises(ValueError, cdate_range, '2011-1-1', '2012-1-1', 'C') + + msg = 'periods must be a number, got C' + with tm.assert_raises_regex(TypeError, msg): + date_range('2011-1-1', '2012-1-1', 'C') + + with tm.assert_raises_regex(TypeError, msg): + cdate_range('2011-1-1', '2012-1-1', 'C') def test_cached_range(self): DatetimeIndex._cached_range(START, END, offset=CDay()) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index 6c8a9882c8187..640f24f67f72f 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -87,3 +87,8 @@ def test_errors(self): with tm.assert_raises_regex(ValueError, msg): period_range(start='2017Q1', end=NaT) + + # invalid periods param + msg = 'periods must be a number, got foo' + with tm.assert_raises_regex(TypeError, msg): + period_range(start='2017Q1', periods='foo') diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index 2a7e2c6c61c8c..de8e920696bf1 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -2,10 +2,10 @@ import pytest import numpy as np - +import datetime from pandas import (Interval, IntervalIndex, Index, isna, interval_range, Timestamp, Timedelta, - compat) + compat, date_range, DateOffset) from pandas._libs.interval import IntervalTree from pandas.tests.indexes.common import Base import pandas.util.testing as tm @@ -722,7 +722,7 @@ def test_is_non_overlapping_monotonic(self): class TestIntervalRange(object): @pytest.mark.parametrize('closed', ['left', 'right', 'neither', 'both']) - def test_construction(self, closed): + def test_construction_from_numeric(self, closed): # combinations of start/end/periods without freq expected = IntervalIndex.from_breaks( np.arange(0, 6), name='foo', closed=closed) @@ -759,12 +759,108 @@ def test_construction(self, closed): closed=closed) tm.assert_index_equal(result, expected) + @pytest.mark.parametrize('closed', ['left', 'right', 'neither', 'both']) + def test_construction_from_timestamp(self, closed): + # combinations of start/end/periods without freq + start, end = Timestamp('2017-01-01'), Timestamp('2017-01-06') + expected = IntervalIndex.from_breaks(date_range(start=start, end=end), + name='foo', closed=closed) + + result = interval_range(start=start, end=end, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=5, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=5, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + # combinations of start/end/periods with fixed freq + freq = '2D' + start, end = Timestamp('2017-01-01'), Timestamp('2017-01-07') + breaks = date_range(start=start, end=end, freq=freq) + expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed) + + result = interval_range(start=start, end=end, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=3, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=3, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + # output truncates early if freq causes end to be skipped. + end = Timestamp('2017-01-08') + result = interval_range(start=start, end=end, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + # combinations of start/end/periods with non-fixed freq + freq = 'M' + start, end = Timestamp('2017-01-01'), Timestamp('2017-12-31') + breaks = date_range(start=start, end=end, freq=freq) + expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed) + + result = interval_range(start=start, end=end, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=11, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=11, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + # output truncates early if freq causes end to be skipped. + end = Timestamp('2018-01-15') + result = interval_range(start=start, end=end, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + def test_constructor_coverage(self): # float value for periods expected = pd.interval_range(start=0, periods=10) result = pd.interval_range(start=0, periods=10.5) tm.assert_index_equal(result, expected) + # equivalent datetime-like start/end + start, end = Timestamp('2017-01-01'), Timestamp('2017-01-15') + expected = pd.interval_range(start=start, end=end) + + result = pd.interval_range(start=start.to_pydatetime(), + end=end.to_pydatetime()) + tm.assert_index_equal(result, expected) + + result = pd.interval_range(start=start.date(), end=end.date()) + tm.assert_index_equal(result, expected) + + result = pd.interval_range(start=str(start), end=str(end)) + tm.assert_index_equal(result, expected) + + result = pd.interval_range(start=start.strftime('%Y-%m-%d'), + end=end.strftime('%Y-%m-%d')) + tm.assert_index_equal(result, expected) + + result = pd.interval_range(start=start.strftime('%m/%d/%y'), + end=end.strftime('%m/%d/%y')) + tm.assert_index_equal(result, expected) + + # equivalent freq + equiv_freq = ['D', DateOffset(days=1), Timedelta(days=1), + datetime.timedelta(days=1)] + for freq in equiv_freq: + result = pd.interval_range(start=start, end=end, freq=freq) + tm.assert_index_equal(result, expected) + def test_errors(self): # not enough params msg = ('Of the three parameters: start, end, and periods, ' @@ -786,21 +882,40 @@ def test_errors(self): with tm.assert_raises_regex(ValueError, msg): interval_range(start=0, end=5, periods=6) + # mixed units + msg = 'start, end, freq need to be type compatible' + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timestamp('20130101'), end=10, freq=2) + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=0, end=Timestamp('20130101'), freq=2) + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=0, end=10, freq=Timedelta('1day')) + # invalid periods msg = 'periods must be a number, got foo' - with tm.assert_raises_regex(ValueError, msg): + with tm.assert_raises_regex(TypeError, msg): interval_range(start=0, periods='foo') - # mixed units - msg = 'start, end, freq need to be the same type' + # invalid start + msg = 'start must be numeric or datetime-like' with tm.assert_raises_regex(ValueError, msg): - interval_range(start=Timestamp('20130101'), end=10, freq=2) + interval_range(start='foo', periods=10) + # invalid end + msg = 'end must be numeric or datetime-like' with tm.assert_raises_regex(ValueError, msg): - interval_range(start=0, end=Timestamp('20130101'), freq=2) + interval_range(end=Interval(0, 1), periods=10) + # invalid freq for datetime-like + msg = ('freq must be convertible to DateOffset when start/end are ' + 'datetime-like') with tm.assert_raises_regex(ValueError, msg): - interval_range(start=0, end=10, freq=Timedelta('1day')) + interval_range(start=Timestamp('20130101'), periods=10, freq='foo') + + with tm.assert_raises_regex(ValueError, msg): + interval_range(end=Timestamp('20130101'), periods=10, freq='foo') class TestIntervalTree(object): diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index dd25e2cca2e55..70aadd9f57174 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -50,8 +50,9 @@ def test_constructor_coverage(self): exp = timedelta_range('1 days', periods=10) tm.assert_index_equal(rng, exp) - pytest.raises(ValueError, TimedeltaIndex, start='1 days', - periods='foo', freq='D') + msg = 'periods must be a number, got foo' + with tm.assert_raises_regex(TypeError, msg): + TimedeltaIndex(start='1 days', periods='foo', freq='D') pytest.raises(ValueError, TimedeltaIndex, start='1 days', end='10 days') From 8b73df969c877a2a62e08f6a86ebd4fdef11fc69 Mon Sep 17 00:00:00 2001 From: jschendel Date: Tue, 12 Sep 2017 16:36:33 -0600 Subject: [PATCH 5/7] fix datetimelike --- pandas/core/indexes/interval.py | 62 +++++++------ pandas/tests/indexes/test_interval.py | 122 +++++++++++++++++++++----- 2 files changed, 136 insertions(+), 48 deletions(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 8d9c83842560b..eb77edec917bc 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -28,12 +28,12 @@ intervals_to_interval_bounds) from pandas.core.indexes.datetimes import date_range +from pandas.core.indexes.timedeltas import timedelta_range from pandas.core.indexes.multi import MultiIndex from pandas.compat.numpy import function as nv from pandas.core import common as com from pandas.util._decorators import cache_readonly, Appender from pandas.core.config import get_option -from pandas.tseries.offsets import DateOffset from pandas.tseries.frequencies import to_offset import pandas.core.indexes.base as ibase @@ -1107,24 +1107,29 @@ def interval_range(start=None, end=None, periods=None, freq=None, raise ValueError('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') - # assume datetime-like unless we find numeric start or end - is_datetime_interval = True + iv_type = {'numeric': True, 'timestamp': True, 'timedelta': True} + start = com._maybe_box_datetimelike(start) if is_number(start): - is_datetime_interval = False + iv_type.update({k: False for k in iv_type if k != 'numeric'}) + elif isinstance(start, Timestamp): + iv_type.update({k: False for k in iv_type if k != 'timestamp'}) + elif isinstance(start, Timedelta): + iv_type.update({k: False for k in iv_type if k != 'timedelta'}) elif start is not None: - try: - start = Timestamp(start) - except (TypeError, ValueError): - raise ValueError('start must be numeric or datetime-like') + msg = 'start must be numeric or datetime-like, got {start}' + raise ValueError(msg.format(start=start)) + end = com._maybe_box_datetimelike(end) if is_number(end): - is_datetime_interval = False + iv_type.update({k: False for k in iv_type if k != 'numeric'}) + elif isinstance(end, Timestamp): + iv_type.update({k: False for k in iv_type if k != 'timestamp'}) + elif isinstance(end, Timedelta): + iv_type.update({k: False for k in iv_type if k != 'timedelta'}) elif end is not None: - try: - end = Timestamp(end) - except (TypeError, ValueError): - raise ValueError('end must be numeric or datetime-like') + msg = 'end must be numeric or datetime-like, got {end}' + raise ValueError(msg.format(end=end)) if is_float(periods): periods = int(periods) @@ -1132,23 +1137,22 @@ def interval_range(start=None, end=None, periods=None, freq=None, msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) - if is_datetime_interval: - freq = freq or 'D' - if not isinstance(freq, DateOffset): - try: - freq = to_offset(freq) - except ValueError: - raise ValueError('freq must be convertible to DateOffset when ' - 'start/end are datetime-like') + freq = freq or (1 if iv_type['numeric'] else 'D') + if is_number(freq): + iv_type.update({k: False for k in iv_type if k != 'numeric'}) else: - freq = freq or 1 + try: + freq = to_offset(freq) + iv_type['numeric'] = False + except ValueError: + raise ValueError('freq must be numeric or convertible to ' + 'DateOffset, got {freq}'.format(freq=freq)) # verify type compatibility - is_numeric_interval = all(map(is_number, com._not_none(start, end, freq))) - if not is_datetime_interval and not is_numeric_interval: + if not any(iv_type.values()): raise TypeError("start, end, freq need to be type compatible") - if is_numeric_interval: + if iv_type['numeric']: if periods is None: periods = int((end - start) // freq) @@ -1160,10 +1164,16 @@ def interval_range(start=None, end=None, periods=None, freq=None, # end + freq for inclusive endpoint breaks = np.arange(start, end + freq, freq) - else: + elif iv_type['timestamp']: # add one to account for interval endpoints (n breaks = n-1 intervals) if periods is not None: periods += 1 breaks = date_range(start=start, end=end, periods=periods, freq=freq) + else: + # add one to account for interval endpoints (n breaks = n-1 intervals) + if periods is not None: + periods += 1 + breaks = timedelta_range(start=start, end=end, periods=periods, + freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=closed) diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index de8e920696bf1..13c3b35e4d85d 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -2,10 +2,11 @@ import pytest import numpy as np -import datetime +from datetime import timedelta from pandas import (Interval, IntervalIndex, Index, isna, interval_range, Timestamp, Timedelta, - compat, date_range, DateOffset) + compat, date_range, timedelta_range, DateOffset) +from pandas.tseries.offsets import Day from pandas._libs.interval import IntervalTree from pandas.tests.indexes.common import Base import pandas.util.testing as tm @@ -763,8 +764,8 @@ def test_construction_from_numeric(self, closed): def test_construction_from_timestamp(self, closed): # combinations of start/end/periods without freq start, end = Timestamp('2017-01-01'), Timestamp('2017-01-06') - expected = IntervalIndex.from_breaks(date_range(start=start, end=end), - name='foo', closed=closed) + breaks = date_range(start=start, end=end) + expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed) result = interval_range(start=start, end=end, name='foo', closed=closed) @@ -826,13 +827,56 @@ def test_construction_from_timestamp(self, closed): closed=closed) tm.assert_index_equal(result, expected) + @pytest.mark.parametrize('closed', ['left', 'right', 'neither', 'both']) + def test_construction_from_timedelta(self, closed): + # combinations of start/end/periods without freq + start, end = Timedelta('1 day'), Timedelta('6 days') + breaks = timedelta_range(start=start, end=end) + expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed) + + result = interval_range(start=start, end=end, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=5, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=5, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + # combinations of start/end/periods with fixed freq + freq = '2D' + start, end = Timedelta('1 day'), Timedelta('7 days') + breaks = timedelta_range(start=start, end=end, freq=freq) + expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed) + + result = interval_range(start=start, end=end, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=3, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=3, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + # output truncates early if freq causes end to be skipped. + end = Timedelta('7 days 1 hour') + result = interval_range(start=start, end=end, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + def test_constructor_coverage(self): # float value for periods expected = pd.interval_range(start=0, periods=10) result = pd.interval_range(start=0, periods=10.5) tm.assert_index_equal(result, expected) - # equivalent datetime-like start/end + # equivalent timestamp-like start/end start, end = Timestamp('2017-01-01'), Timestamp('2017-01-15') expected = pd.interval_range(start=start, end=end) @@ -840,23 +884,33 @@ def test_constructor_coverage(self): end=end.to_pydatetime()) tm.assert_index_equal(result, expected) - result = pd.interval_range(start=start.date(), end=end.date()) + result = pd.interval_range(start=start.tz_localize('UTC'), + end=end.tz_localize('UTC')) tm.assert_index_equal(result, expected) - result = pd.interval_range(start=str(start), end=str(end)) + result = pd.interval_range(start=start.asm8, end=end.asm8) tm.assert_index_equal(result, expected) - result = pd.interval_range(start=start.strftime('%Y-%m-%d'), - end=end.strftime('%Y-%m-%d')) + # equivalent freq with timestamp + equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1), + DateOffset(days=1)] + for freq in equiv_freq: + result = pd.interval_range(start=start, end=end, freq=freq) + tm.assert_index_equal(result, expected) + + # equivalent timedelta-like start/end + start, end = Timedelta(days=1), Timedelta(days=10) + expected = pd.interval_range(start=start, end=end) + + result = pd.interval_range(start=start.to_pytimedelta(), + end=end.to_pytimedelta()) tm.assert_index_equal(result, expected) - result = pd.interval_range(start=start.strftime('%m/%d/%y'), - end=end.strftime('%m/%d/%y')) + result = pd.interval_range(start=start.asm8, end=end.asm8) tm.assert_index_equal(result, expected) - # equivalent freq - equiv_freq = ['D', DateOffset(days=1), Timedelta(days=1), - datetime.timedelta(days=1)] + # equivalent freq with timedelta + equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1)] for freq in equiv_freq: result = pd.interval_range(start=start, end=end, freq=freq) tm.assert_index_equal(result, expected) @@ -885,13 +939,35 @@ def test_errors(self): # mixed units msg = 'start, end, freq need to be type compatible' with tm.assert_raises_regex(TypeError, msg): - interval_range(start=Timestamp('20130101'), end=10, freq=2) + interval_range(start=0, end=Timestamp('20130101'), freq=2) + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=0, end=Timedelta('1 day'), freq=2) with tm.assert_raises_regex(TypeError, msg): - interval_range(start=0, end=Timestamp('20130101'), freq=2) + interval_range(start=0, end=10, freq='D') + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timestamp('20130101'), end=10, freq='D') + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timestamp('20130101'), + end=Timedelta('1 day'), freq='D') with tm.assert_raises_regex(TypeError, msg): - interval_range(start=0, end=10, freq=Timedelta('1day')) + interval_range(start=Timestamp('20130101'), + end=Timestamp('20130110'), freq=2) + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timedelta('1 day'), end=10, freq='D') + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timedelta('1 day'), + end=Timestamp('20130110'), freq='D') + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timedelta('1 day'), + end=Timedelta('10 days'), freq=2) # invalid periods msg = 'periods must be a number, got foo' @@ -899,23 +975,25 @@ def test_errors(self): interval_range(start=0, periods='foo') # invalid start - msg = 'start must be numeric or datetime-like' + msg = 'start must be numeric or datetime-like, got foo' with tm.assert_raises_regex(ValueError, msg): interval_range(start='foo', periods=10) # invalid end - msg = 'end must be numeric or datetime-like' + msg = 'end must be numeric or datetime-like, got \(0, 1\]' with tm.assert_raises_regex(ValueError, msg): interval_range(end=Interval(0, 1), periods=10) # invalid freq for datetime-like - msg = ('freq must be convertible to DateOffset when start/end are ' - 'datetime-like') + msg = 'freq must be numeric or convertible to DateOffset, got foo' + with tm.assert_raises_regex(ValueError, msg): + interval_range(start=0, end=10, freq='foo') + with tm.assert_raises_regex(ValueError, msg): interval_range(start=Timestamp('20130101'), periods=10, freq='foo') with tm.assert_raises_regex(ValueError, msg): - interval_range(end=Timestamp('20130101'), periods=10, freq='foo') + interval_range(end=Timedelta('1 day'), periods=10, freq='foo') class TestIntervalTree(object): From 37c24bc99ca16aee6e67469c719e9a38684507a9 Mon Sep 17 00:00:00 2001 From: jschendel Date: Tue, 12 Sep 2017 23:49:49 -0600 Subject: [PATCH 6/7] clean interval_range --- pandas/core/indexes/interval.py | 56 ++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index eb77edec917bc..6e80f6c900386 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -35,6 +35,7 @@ from pandas.util._decorators import cache_readonly, Appender from pandas.core.config import get_option from pandas.tseries.frequencies import to_offset +from pandas.tseries.offsets import DateOffset import pandas.core.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -1033,6 +1034,24 @@ def func(self, other): IntervalIndex._add_logical_methods_disabled() +def _is_valid_endpoint(endpoint): + """helper for interval_range to check if start/end are valid types""" + return any([is_number(endpoint), + isinstance(endpoint, Timestamp), + isinstance(endpoint, Timedelta), + endpoint is None]) + + +def _is_type_compatible(a, b): + """helper for interval_range to check type compat of start/end/freq""" + is_ts_compat = lambda x: isinstance(x, (Timestamp, DateOffset)) + is_td_compat = lambda x: isinstance(x, (Timedelta, DateOffset)) + return ((is_number(a) and is_number(b)) or + (is_ts_compat(a) and is_ts_compat(b)) or + (is_td_compat(a) and is_td_compat(b)) or + com._any_none(a, b)) + + def interval_range(start=None, end=None, periods=None, freq=None, name=None, closed='right'): """ @@ -1107,27 +1126,15 @@ def interval_range(start=None, end=None, periods=None, freq=None, raise ValueError('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') - iv_type = {'numeric': True, 'timestamp': True, 'timedelta': True} - start = com._maybe_box_datetimelike(start) - if is_number(start): - iv_type.update({k: False for k in iv_type if k != 'numeric'}) - elif isinstance(start, Timestamp): - iv_type.update({k: False for k in iv_type if k != 'timestamp'}) - elif isinstance(start, Timedelta): - iv_type.update({k: False for k in iv_type if k != 'timedelta'}) - elif start is not None: + end = com._maybe_box_datetimelike(end) + endpoint = next(com._not_none(start, end)) + + if not _is_valid_endpoint(start): msg = 'start must be numeric or datetime-like, got {start}' raise ValueError(msg.format(start=start)) - end = com._maybe_box_datetimelike(end) - if is_number(end): - iv_type.update({k: False for k in iv_type if k != 'numeric'}) - elif isinstance(end, Timestamp): - iv_type.update({k: False for k in iv_type if k != 'timestamp'}) - elif isinstance(end, Timedelta): - iv_type.update({k: False for k in iv_type if k != 'timedelta'}) - elif end is not None: + if not _is_valid_endpoint(end): msg = 'end must be numeric or datetime-like, got {end}' raise ValueError(msg.format(end=end)) @@ -1137,22 +1144,21 @@ def interval_range(start=None, end=None, periods=None, freq=None, msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) - freq = freq or (1 if iv_type['numeric'] else 'D') - if is_number(freq): - iv_type.update({k: False for k in iv_type if k != 'numeric'}) - else: + freq = freq or (1 if is_number(endpoint) else 'D') + if not is_number(freq): try: freq = to_offset(freq) - iv_type['numeric'] = False except ValueError: raise ValueError('freq must be numeric or convertible to ' 'DateOffset, got {freq}'.format(freq=freq)) # verify type compatibility - if not any(iv_type.values()): + if not all([_is_type_compatible(start, end), + _is_type_compatible(start, freq), + _is_type_compatible(end, freq)]): raise TypeError("start, end, freq need to be type compatible") - if iv_type['numeric']: + if is_number(endpoint): if periods is None: periods = int((end - start) // freq) @@ -1164,7 +1170,7 @@ def interval_range(start=None, end=None, periods=None, freq=None, # end + freq for inclusive endpoint breaks = np.arange(start, end + freq, freq) - elif iv_type['timestamp']: + elif isinstance(endpoint, Timestamp): # add one to account for interval endpoints (n breaks = n-1 intervals) if periods is not None: periods += 1 From f6cc86053ed1cdfaaaa0b5b23be259f087acc54e Mon Sep 17 00:00:00 2001 From: jschendel Date: Wed, 13 Sep 2017 23:22:15 -0600 Subject: [PATCH 7/7] minor doc changes --- doc/source/api.rst | 9 +++++++++ doc/source/whatsnew/v0.21.0.txt | 4 ++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 27a4ab9cc6cbc..1541bbccefe21 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -218,10 +218,19 @@ Top-level dealing with datetimelike to_timedelta date_range bdate_range + cdate_range period_range timedelta_range infer_freq +Top-level dealing with intervals +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + + interval_range + Top-level evaluation ~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 28a8b188898cb..939199d3f6fa6 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -218,7 +218,7 @@ Furthermore this will now correctly box the results of iteration for :func:`Data .. ipython:: ipython d = {'a':[1], 'b':['b']} - df = pd,DataFrame(d) + df = pd.DataFrame(d) Previously: @@ -363,7 +363,7 @@ Additionally, DataFrames with datetime columns that were parsed by :func:`read_s Consistency of Range Functions ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -In previous versions, there were some inconsistencies between the various range functions: ``date_range``, ``bdate_range``, ``cdate_range``, ``interval_range``, ``period_range``, and ``timedelta_range``. (:issue:`17471`). +In previous versions, there were some inconsistencies between the various range functions: func:`date_range`, func:`bdate_range`, func:`cdate_range`, func:`period_range`, func:`timedelta_range`, and func:`interval_range`. (:issue:`17471`). One of the inconsistent behaviors occurred when the ``start``, ``end`` and ``period`` parameters were all specified, potentially leading to ambiguous ranges. When all three parameters were passed, ``interval_range`` ignored the ``period`` parameter, ``period_range`` ignored the ``end`` parameter, and the other range functions raised. To promote consistency among the range functions, and avoid potentially ambiguous ranges, ``interval_range`` and ``period_range`` will now raise when all three parameters are passed.