From a220176476315b91cb6e2627861b5a00c6d96651 Mon Sep 17 00:00:00 2001 From: jschendel Date: Thu, 10 May 2018 22:55:30 -0600 Subject: [PATCH] ENH: Implement linspace behavior for timedelta_range/interval_range --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/core/indexes/datetimes.py | 40 ++- pandas/core/indexes/interval.py | 85 +++--- pandas/core/indexes/timedeltas.py | 46 ++- .../indexes/datetimes/test_date_range.py | 4 + .../indexes/interval/test_interval_range.py | 262 +++++++++--------- .../timedeltas/test_timedelta_range.py | 18 +- 7 files changed, 257 insertions(+), 200 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 3f7bc0e8d8c3f..a8a201558ec9b 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -526,7 +526,7 @@ Other Enhancements - Added new writer for exporting Stata dta files in version 117, ``StataWriter117``. This format supports exporting strings with lengths up to 2,000,000 characters (:issue:`16450`) - :func:`to_hdf` and :func:`read_hdf` now accept an ``errors`` keyword argument to control encoding error handling (:issue:`20835`) - :func:`cut` has gained the ``duplicates='raise'|'drop'`` option to control whether to raise on duplicated edges (:issue:`20947`) -- :func:`date_range` now returns a linearly spaced ``DatetimeIndex`` if ``start``, ``stop``, and ``periods`` are specified, but ``freq`` is not. (:issue:`20808`, :issue:`20983`) +- :func:`date_range`, :func:`timedelta_range`, and :func:`interval_range` now return a linearly spaced index if ``start``, ``stop``, and ``periods`` are specified, but ``freq`` is not. (:issue:`20808`, :issue:`20983`, :issue:`20976`) .. _whatsnew_0230.api_breaking: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 1d5c2d9a098ed..9761974d77d4b 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -358,11 +358,6 @@ def __new__(cls, data=None, msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) - if data is None and freq is None \ - and com._any_none(periods, start, end): - raise ValueError("Must provide freq argument if no data is " - "supplied") - # if dtype has an embedded tz, capture it if dtype is not None: try: @@ -377,9 +372,13 @@ def __new__(cls, data=None, pass if data is None: - return cls._generate(start, end, periods, name, freq, - tz=tz, normalize=normalize, closed=closed, - ambiguous=ambiguous) + if freq is None and com._any_none(periods, start, end): + msg = 'Must provide freq argument if no data is supplied' + raise ValueError(msg) + else: + return cls._generate(start, end, periods, name, freq, tz=tz, + normalize=normalize, closed=closed, + ambiguous=ambiguous) if not isinstance(data, (np.ndarray, Index, ABCSeries)): if is_scalar(data): @@ -2590,11 +2589,6 @@ def date_range(start=None, end=None, periods=None, freq=None, tz=None, """ Return a fixed frequency DatetimeIndex. - Of the three parameters `start`, `end`, `periods`, and `freq` exactly - three must be specified. If `freq` is omitted, the resulting DatetimeIndex - will have `periods` linearly spaced elements between `start` and `end` - (closed on both sides). - Parameters ---------- start : str or datetime-like, optional @@ -2628,9 +2622,20 @@ def date_range(start=None, end=None, periods=None, freq=None, tz=None, See Also -------- pandas.DatetimeIndex : An immutable container for datetimes. + pandas.timedelta_range : Return a fixed frequency TimedeltaIndex. pandas.period_range : Return a fixed frequency PeriodIndex. pandas.interval_range : Return a fixed frequency IntervalIndex. + Notes + ----- + Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, + exactly three must be specified. If ``freq`` is omitted, the resulting + ``DatetimeIndex`` will have ``periods`` linearly spaced elements between + ``start`` and ``end`` (closed on both sides). + + To learn more about the frequency strings, please see `this link + `__. + Examples -------- **Specifying the values** @@ -2769,8 +2774,10 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, Notes ----- - Of the three parameters: ``start``, ``end``, and ``periods``, exactly two - must be specified. + Of the four parameters: ``start``, ``end``, ``periods``, and ``freq``, + exactly three must be specified. Specifying ``freq`` is a requirement + for ``bdate_range``. Use ``date_range`` if specifying ``freq`` is not + desired. To learn more about the frequency strings, please see `this link `__. @@ -2779,6 +2786,9 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, ------- rng : DatetimeIndex """ + if freq is None: + msg = 'freq must be specified for bdate_range; use date_range instead' + raise TypeError(msg) if is_string_like(freq) and freq.startswith('C'): try: diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 766ac7b14120e..408a8cc435b63 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -6,7 +6,8 @@ from pandas.core.dtypes.missing import notna, isna from pandas.core.dtypes.generic import ABCDatetimeIndex, ABCPeriodIndex from pandas.core.dtypes.dtypes import IntervalDtype -from pandas.core.dtypes.cast import maybe_convert_platform, find_common_type +from pandas.core.dtypes.cast import ( + maybe_convert_platform, find_common_type, maybe_downcast_to_dtype) from pandas.core.dtypes.common import ( _ensure_platform_int, is_list_like, @@ -1465,8 +1466,13 @@ def interval_range(start=None, end=None, periods=None, freq=None, Notes ----- - Of the three parameters: ``start``, ``end``, and ``periods``, exactly two - must be specified. + Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, + exactly three must be specified. If ``freq`` is omitted, the resulting + ``IntervalIndex`` will have ``periods`` linearly spaced elements between + ``start`` and ``end``, inclusively. + + To learn more about datetime-like frequency strings, please see `this link + `__. Returns ------- @@ -1505,6 +1511,14 @@ def interval_range(start=None, end=None, periods=None, freq=None, (2017-03-01, 2017-04-01]] closed='right', dtype='interval[datetime64[ns]]') + Specify ``start``, ``end``, and ``periods``; the frequency is generated + automatically (linearly spaced). + + >>> pd.interval_range(start=0, end=6, periods=4) + IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]] + closed='right', + dtype='interval[float64]') + The ``closed`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. @@ -1516,19 +1530,21 @@ def interval_range(start=None, end=None, periods=None, freq=None, -------- IntervalIndex : an Index of intervals that are all closed on the same side. """ - if com._count_not_none(start, end, periods) != 2: - raise ValueError('Of the three parameters: start, end, and periods, ' - 'exactly two must be specified') - start = com._maybe_box_datetimelike(start) end = com._maybe_box_datetimelike(end) - endpoint = next(com._not_none(start, end)) + endpoint = start if start is not None else end + + if freq is None and com._any_none(periods, start, end): + freq = 1 if is_number(endpoint) else 'D' + + if com._count_not_none(start, end, periods, freq) != 3: + raise ValueError('Of the four parameters: start, end, periods, and ' + 'freq, exactly three must be specified') if not _is_valid_endpoint(start): msg = 'start must be numeric or datetime-like, got {start}' raise ValueError(msg.format(start=start)) - - if not _is_valid_endpoint(end): + elif not _is_valid_endpoint(end): msg = 'end must be numeric or datetime-like, got {end}' raise ValueError(msg.format(end=end)) @@ -1538,8 +1554,7 @@ def interval_range(start=None, end=None, periods=None, freq=None, msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) - freq = freq or (1 if is_number(endpoint) else 'D') - if not is_number(freq): + if freq is not None and not is_number(freq): try: freq = to_offset(freq) except ValueError: @@ -1552,28 +1567,34 @@ def interval_range(start=None, end=None, periods=None, freq=None, _is_type_compatible(end, freq)]): raise TypeError("start, end, freq need to be type compatible") + # +1 to convert interval count to breaks count (n breaks = n-1 intervals) + if periods is not None: + periods += 1 + if is_number(endpoint): + # compute the period/start/end if unspecified (at most one) if periods is None: - periods = int((end - start) // freq) - - if start is None: - start = end - periods * freq - - # force end to be consistent with freq (lower if freq skips over end) - end = start + periods * freq - - # end + freq for inclusive endpoint - breaks = np.arange(start, end + freq, freq) - elif isinstance(endpoint, Timestamp): - # add one to account for interval endpoints (n breaks = n-1 intervals) - if periods is not None: - periods += 1 - breaks = date_range(start=start, end=end, periods=periods, freq=freq) + periods = int((end - start) // freq) + 1 + elif start is None: + start = end - (periods - 1) * freq + elif end is None: + end = start + (periods - 1) * freq + + # force end to be consistent with freq (lower if freq skips end) + if freq is not None: + end -= end % freq + + breaks = np.linspace(start, end, periods) + if all(is_integer(x) for x in com._not_none(start, end, freq)): + # np.linspace always produces float output + breaks = maybe_downcast_to_dtype(breaks, 'int64') else: - # add one to account for interval endpoints (n breaks = n-1 intervals) - if periods is not None: - periods += 1 - breaks = timedelta_range(start=start, end=end, periods=periods, - freq=freq) + # delegate to the appropriate range function + if isinstance(endpoint, Timestamp): + range_func = date_range + else: + range_func = timedelta_range + + breaks = range_func(start=start, end=end, periods=periods, freq=freq) return IntervalIndex.from_breaks(breaks, name=name, closed=closed) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 6b278fc35c831..9707d19953418 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -225,13 +225,13 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, msg = 'periods must be a number, got {periods}' raise TypeError(msg.format(periods=periods)) - if data is None and freq is None: - raise ValueError("Must provide freq argument if no data is " - "supplied") - if data is None: - return cls._generate(start, end, periods, name, freq, - closed=closed) + if freq is None and com._any_none(periods, start, end): + msg = 'Must provide freq argument if no data is supplied' + raise ValueError(msg) + else: + return cls._generate(start, end, periods, name, freq, + closed=closed) if unit is not None: data = to_timedelta(data, unit=unit, box=False) @@ -266,10 +266,10 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, return cls._simple_new(data, name=name, freq=freq) @classmethod - def _generate(cls, start, end, periods, name, offset, closed=None): - if com._count_not_none(start, end, periods) != 2: - raise ValueError('Of the three parameters: start, end, and ' - 'periods, exactly two must be specified') + def _generate(cls, start, end, periods, name, freq, closed=None): + if com._count_not_none(start, end, periods, freq) != 3: + raise ValueError('Of the four parameters: start, end, periods, ' + 'and freq, exactly three must be specified') if start is not None: start = Timedelta(start) @@ -295,8 +295,11 @@ def _generate(cls, start, end, periods, name, offset, closed=None): else: raise ValueError("Closed has to be either 'left', 'right' or None") - index = _generate_regular_range(start, end, periods, offset) - index = cls._simple_new(index, name=name, freq=offset) + if freq is not None: + index = _generate_regular_range(start, end, periods, freq) + index = cls._simple_new(index, name=name, freq=freq) + else: + index = to_timedelta(np.linspace(start.value, end.value, periods)) if not left_closed: index = index[1:] @@ -1046,7 +1049,7 @@ def _generate_regular_range(start, end, periods, offset): return data -def timedelta_range(start=None, end=None, periods=None, freq='D', +def timedelta_range(start=None, end=None, periods=None, freq=None, name=None, closed=None): """ Return a fixed frequency TimedeltaIndex, with day as the default @@ -1074,8 +1077,10 @@ def timedelta_range(start=None, end=None, periods=None, freq='D', Notes ----- - Of the three parameters: ``start``, ``end``, and ``periods``, exactly two - must be specified. + Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, + exactly three must be specified. If ``freq`` is omitted, the resulting + ``TimedeltaIndex`` will have ``periods`` linearly spaced elements between + ``start`` and ``end`` (closed on both sides). To learn more about the frequency strings, please see `this link `__. @@ -1102,6 +1107,17 @@ def timedelta_range(start=None, end=None, periods=None, freq='D', TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00', '1 days 18:00:00', '2 days 00:00:00'], dtype='timedelta64[ns]', freq='6H') + + Specify ``start``, ``end``, and ``periods``; the frequency is generated + automatically (linearly spaced). + + >>> pd.timedelta_range(start='1 day', end='5 days', periods=4) + TimedeltaIndex(['1 days 00:00:00', '2 days 08:00:00', '3 days 16:00:00', + '5 days 00:00:00'], + dtype='timedelta64[ns]', freq=None) """ + if freq is None and com._any_none(periods, start, end): + freq = 'D' + return TimedeltaIndex(start=start, end=end, periods=periods, freq=freq, name=name, closed=closed) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 3fb088329f225..193804b66395b 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -361,6 +361,10 @@ def test_constructor(self): with tm.assert_raises_regex(TypeError, msg): bdate_range('2011-1-1', '2012-1-1', 'B') + msg = 'freq must be specified for bdate_range; use date_range instead' + with tm.assert_raises_regex(TypeError, msg): + bdate_range(START, END, periods=10, freq=None) + def test_naive_aware_conflicts(self): naive = bdate_range(START, END, freq=BDay(), tz=None) aware = bdate_range(START, END, freq=BDay(), tz="Asia/Hong_Kong") diff --git a/pandas/tests/indexes/interval/test_interval_range.py b/pandas/tests/indexes/interval/test_interval_range.py index 203e8e3128edc..0fadfcf0c7f28 100644 --- a/pandas/tests/indexes/interval/test_interval_range.py +++ b/pandas/tests/indexes/interval/test_interval_range.py @@ -6,9 +6,9 @@ from pandas import ( Interval, IntervalIndex, Timestamp, Timedelta, DateOffset, interval_range, date_range, timedelta_range) +from pandas.core.dtypes.common import is_integer from pandas.tseries.offsets import Day import pandas.util.testing as tm -import pandas as pd @pytest.fixture(scope='class', params=['left', 'right', 'both', 'neither']) @@ -23,200 +23,198 @@ def name(request): class TestIntervalRange(object): - def test_construction_from_numeric(self, closed, name): - # combinations of start/end/periods without freq - expected = IntervalIndex.from_breaks( - np.arange(0, 6), name=name, closed=closed) - - result = interval_range(start=0, end=5, name=name, closed=closed) - tm.assert_index_equal(result, expected) - - result = interval_range(start=0, periods=5, name=name, closed=closed) - tm.assert_index_equal(result, expected) - - result = interval_range(end=5, periods=5, name=name, closed=closed) - tm.assert_index_equal(result, expected) - - # combinations of start/end/periods with freq - expected = IntervalIndex.from_tuples([(0, 2), (2, 4), (4, 6)], - name=name, closed=closed) - - result = interval_range(start=0, end=6, freq=2, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - result = interval_range(start=0, periods=3, freq=2, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - result = interval_range(end=6, periods=3, freq=2, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - # output truncates early if freq causes end to be skipped. - expected = IntervalIndex.from_tuples([(0.0, 1.5), (1.5, 3.0)], - name=name, closed=closed) - result = interval_range(start=0, end=4, freq=1.5, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - @pytest.mark.parametrize('tz', [None, 'US/Eastern']) - def test_construction_from_timestamp(self, closed, name, tz): - # combinations of start/end/periods without freq - start = Timestamp('2017-01-01', tz=tz) - end = Timestamp('2017-01-06', tz=tz) - breaks = date_range(start=start, end=end) - expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) - - result = interval_range(start=start, end=end, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - result = interval_range(start=start, periods=5, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - result = interval_range(end=end, periods=5, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - # combinations of start/end/periods with fixed freq - freq = '2D' - start = Timestamp('2017-01-01', tz=tz) - end = Timestamp('2017-01-07', tz=tz) - breaks = date_range(start=start, end=end, freq=freq) + @pytest.mark.parametrize('freq, periods', [ + (1, 100), (2.5, 40), (5, 20), (25, 4)]) + def test_constructor_numeric(self, closed, name, freq, periods): + start, end = 0, 100 + breaks = np.arange(101, step=freq) expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) - result = interval_range(start=start, end=end, freq=freq, name=name, - closed=closed) + # defined from start/end/freq + result = interval_range( + start=start, end=end, freq=freq, name=name, closed=closed) tm.assert_index_equal(result, expected) - result = interval_range(start=start, periods=3, freq=freq, name=name, - closed=closed) + # defined from start/periods/freq + result = interval_range( + start=start, periods=periods, freq=freq, name=name, closed=closed) tm.assert_index_equal(result, expected) - result = interval_range(end=end, periods=3, freq=freq, name=name, - closed=closed) + # defined from end/periods/freq + result = interval_range( + end=end, periods=periods, freq=freq, name=name, closed=closed) tm.assert_index_equal(result, expected) - # output truncates early if freq causes end to be skipped. - end = Timestamp('2017-01-08', tz=tz) - result = interval_range(start=start, end=end, freq=freq, name=name, - closed=closed) + # GH 20976: linspace behavior defined from start/end/periods + result = interval_range( + start=start, end=end, periods=periods, name=name, closed=closed) tm.assert_index_equal(result, expected) - # combinations of start/end/periods with non-fixed freq - freq = 'M' - start = Timestamp('2017-01-01', tz=tz) - end = Timestamp('2017-12-31', tz=tz) + @pytest.mark.parametrize('tz', [None, 'US/Eastern']) + @pytest.mark.parametrize('freq, periods', [ + ('D', 364), ('2D', 182), ('22D18H', 16), ('M', 11)]) + def test_constructor_timestamp(self, closed, name, freq, periods, tz): + start, end = Timestamp('20180101', tz=tz), Timestamp('20181231', tz=tz) breaks = date_range(start=start, end=end, freq=freq) expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) - result = interval_range(start=start, end=end, freq=freq, name=name, - closed=closed) + # defined from start/end/freq + result = interval_range( + start=start, end=end, freq=freq, name=name, closed=closed) tm.assert_index_equal(result, expected) - result = interval_range(start=start, periods=11, freq=freq, name=name, - closed=closed) + # defined from start/periods/freq + result = interval_range( + start=start, periods=periods, freq=freq, name=name, closed=closed) tm.assert_index_equal(result, expected) - result = interval_range(end=end, periods=11, freq=freq, name=name, - closed=closed) + # defined from end/periods/freq + result = interval_range( + end=end, periods=periods, freq=freq, name=name, closed=closed) tm.assert_index_equal(result, expected) - # output truncates early if freq causes end to be skipped. - end = Timestamp('2018-01-15', tz=tz) - result = interval_range(start=start, end=end, freq=freq, name=name, - closed=closed) - tm.assert_index_equal(result, expected) + # GH 20976: linspace behavior defined from start/end/periods + if not breaks.freq.isAnchored() and tz is None: + # matches expected only for non-anchored offsets and tz naive + # (anchored/DST transitions cause unequal spacing in expected) + result = interval_range(start=start, end=end, periods=periods, + name=name, closed=closed) + tm.assert_index_equal(result, expected) - def test_construction_from_timedelta(self, closed, name): - # combinations of start/end/periods without freq - start, end = Timedelta('1 day'), Timedelta('6 days') - breaks = timedelta_range(start=start, end=end) + @pytest.mark.parametrize('freq, periods', [ + ('D', 100), ('2D12H', 40), ('5D', 20), ('25D', 4)]) + def test_constructor_timedelta(self, closed, name, freq, periods): + start, end = Timedelta('0 days'), Timedelta('100 days') + breaks = timedelta_range(start=start, end=end, freq=freq) expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) - result = interval_range(start=start, end=end, name=name, - closed=closed) + # defined from start/end/freq + result = interval_range( + start=start, end=end, freq=freq, name=name, closed=closed) tm.assert_index_equal(result, expected) - result = interval_range(start=start, periods=5, name=name, - closed=closed) + # defined from start/periods/freq + result = interval_range( + start=start, periods=periods, freq=freq, name=name, closed=closed) tm.assert_index_equal(result, expected) - result = interval_range(end=end, periods=5, name=name, - closed=closed) + # defined from end/periods/freq + result = interval_range( + end=end, periods=periods, freq=freq, name=name, closed=closed) tm.assert_index_equal(result, expected) - # combinations of start/end/periods with fixed freq - freq = '2D' - start, end = Timedelta('1 day'), Timedelta('7 days') - breaks = timedelta_range(start=start, end=end, freq=freq) - expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) - - result = interval_range(start=start, end=end, freq=freq, name=name, - closed=closed) - tm.assert_index_equal(result, expected) - - result = interval_range(start=start, periods=3, freq=freq, name=name, - closed=closed) + # GH 20976: linspace behavior defined from start/end/periods + result = interval_range( + start=start, end=end, periods=periods, name=name, closed=closed) tm.assert_index_equal(result, expected) - result = interval_range(end=end, periods=3, freq=freq, name=name, - closed=closed) + @pytest.mark.parametrize('start, end, freq, expected_endpoint', [ + (0, 10, 3, 9), + (Timedelta('0D'), Timedelta('10D'), '2D4H', Timedelta('8D16H')), + (Timestamp('2018-01-01'), + Timestamp('2018-02-09'), + 'MS', + Timestamp('2018-02-01')), + (Timestamp('2018-01-01', tz='US/Eastern'), + Timestamp('2018-01-20', tz='US/Eastern'), + '5D12H', + Timestamp('2018-01-17 12:00:00', tz='US/Eastern'))]) + def test_early_truncation(self, start, end, freq, expected_endpoint): + # index truncates early if freq causes end to be skipped + result = interval_range(start=start, end=end, freq=freq) + result_endpoint = result.right[-1] + assert result_endpoint == expected_endpoint + + @pytest.mark.parametrize('start, mid, end', [ + (Timestamp('2018-03-10', tz='US/Eastern'), + Timestamp('2018-03-10 23:30:00', tz='US/Eastern'), + Timestamp('2018-03-12', tz='US/Eastern')), + (Timestamp('2018-11-03', tz='US/Eastern'), + Timestamp('2018-11-04 00:30:00', tz='US/Eastern'), + Timestamp('2018-11-05', tz='US/Eastern'))]) + def test_linspace_dst_transition(self, start, mid, end): + # GH 20976: linspace behavior defined from start/end/periods + # accounts for the hour gained/lost during DST transition + result = interval_range(start=start, end=end, periods=2) + expected = IntervalIndex.from_breaks([start, mid, end]) tm.assert_index_equal(result, expected) - # output truncates early if freq causes end to be skipped. - end = Timedelta('7 days 1 hour') - result = interval_range(start=start, end=end, freq=freq, name=name, - closed=closed) - tm.assert_index_equal(result, expected) + @pytest.mark.parametrize('freq', [2, 2.0]) + @pytest.mark.parametrize('end', [10, 10.0]) + @pytest.mark.parametrize('start', [0, 0.0]) + def test_float_subtype(self, start, end, freq): + # Has float subtype if any of start/end/freq are float, even if all + # resulting endpoints can safely be upcast to integers + + # defined from start/end/freq + index = interval_range(start=start, end=end, freq=freq) + result = index.dtype.subtype + expected = 'int64' if is_integer(start + end + freq) else 'float64' + assert result == expected + + # defined from start/periods/freq + index = interval_range(start=start, periods=5, freq=freq) + result = index.dtype.subtype + expected = 'int64' if is_integer(start + freq) else 'float64' + assert result == expected + + # defined from end/periods/freq + index = interval_range(end=end, periods=5, freq=freq) + result = index.dtype.subtype + expected = 'int64' if is_integer(end + freq) else 'float64' + assert result == expected + + # GH 20976: linspace behavior defined from start/end/periods + index = interval_range(start=start, end=end, periods=5) + result = index.dtype.subtype + expected = 'int64' if is_integer(start + end) else 'float64' + assert result == expected def test_constructor_coverage(self): # float value for periods - expected = pd.interval_range(start=0, periods=10) - result = pd.interval_range(start=0, periods=10.5) + expected = interval_range(start=0, periods=10) + result = interval_range(start=0, periods=10.5) tm.assert_index_equal(result, expected) # equivalent timestamp-like start/end start, end = Timestamp('2017-01-01'), Timestamp('2017-01-15') - expected = pd.interval_range(start=start, end=end) + expected = interval_range(start=start, end=end) - result = pd.interval_range(start=start.to_pydatetime(), - end=end.to_pydatetime()) + result = interval_range(start=start.to_pydatetime(), + end=end.to_pydatetime()) tm.assert_index_equal(result, expected) - result = pd.interval_range(start=start.asm8, end=end.asm8) + result = interval_range(start=start.asm8, end=end.asm8) tm.assert_index_equal(result, expected) # equivalent freq with timestamp equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1), DateOffset(days=1)] for freq in equiv_freq: - result = pd.interval_range(start=start, end=end, freq=freq) + result = interval_range(start=start, end=end, freq=freq) tm.assert_index_equal(result, expected) # equivalent timedelta-like start/end start, end = Timedelta(days=1), Timedelta(days=10) - expected = pd.interval_range(start=start, end=end) + expected = interval_range(start=start, end=end) - result = pd.interval_range(start=start.to_pytimedelta(), - end=end.to_pytimedelta()) + result = interval_range(start=start.to_pytimedelta(), + end=end.to_pytimedelta()) tm.assert_index_equal(result, expected) - result = pd.interval_range(start=start.asm8, end=end.asm8) + result = interval_range(start=start.asm8, end=end.asm8) tm.assert_index_equal(result, expected) # equivalent freq with timedelta equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1)] for freq in equiv_freq: - result = pd.interval_range(start=start, end=end, freq=freq) + result = interval_range(start=start, end=end, freq=freq) tm.assert_index_equal(result, expected) def test_errors(self): # not enough params - msg = ('Of the three parameters: start, end, and periods, ' - 'exactly two must be specified') + msg = ('Of the four parameters: start, end, periods, and freq, ' + 'exactly three must be specified') with tm.assert_raises_regex(ValueError, msg): interval_range(start=0) @@ -232,7 +230,7 @@ def test_errors(self): # too many params with tm.assert_raises_regex(ValueError, msg): - interval_range(start=0, end=5, periods=6) + interval_range(start=0, end=5, periods=6, freq=1.5) # mixed units msg = 'start, end, freq need to be type compatible' diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index 784ef845fea10..87dff74cd04d7 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -1,9 +1,9 @@ +import pytest import numpy as np import pandas as pd import pandas.util.testing as tm from pandas.tseries.offsets import Day, Second from pandas import to_timedelta, timedelta_range -from pandas.util.testing import assert_frame_equal class TestTimedeltas(object): @@ -46,12 +46,20 @@ def test_timedelta_range(self): df.index = pd.timedelta_range(start='0s', periods=10, freq='s') expected = df.loc[pd.Timedelta('0s'):, :] result = df.loc['0s':, :] - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize('periods, freq', [ + (3, '2D'), (5, 'D'), (6, '19H12T'), (7, '16H'), (9, '12H')]) + def test_linspace_behavior(self, periods, freq): + # GH 20976 + result = timedelta_range(start='0 days', end='4 days', periods=periods) + expected = timedelta_range(start='0 days', end='4 days', freq=freq) + tm.assert_index_equal(result, expected) def test_errors(self): # not enough params - msg = ('Of the three parameters: start, end, and periods, ' - 'exactly two must be specified') + msg = ('Of the four parameters: start, end, periods, and freq, ' + 'exactly three must be specified') with tm.assert_raises_regex(ValueError, msg): timedelta_range(start='0 days') @@ -66,4 +74,4 @@ def test_errors(self): # too many params with tm.assert_raises_regex(ValueError, msg): - timedelta_range(start='0 days', end='5 days', periods=10) + timedelta_range(start='0 days', end='5 days', periods=10, freq='H')