From eb974ce814997323908675f277ae0ea628c565de Mon Sep 17 00:00:00 2001 From: tp Date: Tue, 17 Apr 2018 21:03:55 +0100 Subject: [PATCH 1/3] add Notes, Examples and See Also to at_time/between_time/first/last --- pandas/core/generic.py | 99 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 97 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 343f36eabc0d7..949116eaff8f6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6761,6 +6761,10 @@ def at_time(self, time, asof=False): """ Select values at particular time of day (e.g. 9:30AM). + Notes + ----- + For this method to work, the index must to be a :class:`DatetimeIndex` + Parameters ---------- time : datetime.time or string @@ -6768,6 +6772,28 @@ def at_time(self, time, asof=False): Returns ------- values_at_time : type of caller + + Examples + -------- + >>> i = pd.date_range('2018-04-09', periods=4, freq='4D2min') + >>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i) + >>> ts + A + date + 2018-04-09 00:00:00 1 + 2018-04-13 00:02:00 2 + 2018-04-17 00:04:00 3 + 2018-04-21 00:06:00 4 + >>> ts.at_time('0:02') + A + date + 2018-04-13 00:02:00 2 + + See Also + -------- + between_time : Select values between particular times of the day + first : Select initial periods of time series based on a date offset + last : Select final periods of time series based on a date offset """ try: indexer = self.index.indexer_at_time(time, asof=asof) @@ -6780,6 +6806,10 @@ def between_time(self, start_time, end_time, include_start=True, """ Select values between particular times of the day (e.g., 9:00-9:30 AM). + Notes + ----- + For this method to work, the index must to be a :class:`DatetimeIndex` + Parameters ---------- start_time : datetime.time or string @@ -6790,6 +6820,27 @@ def between_time(self, start_time, end_time, include_start=True, Returns ------- values_between_time : type of caller + + Examples + -------- + >>> i = pd.date_range('2018-04-09', periods=4, freq='2min') + >>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i) + >>> ts + A + 2018-04-09 00:00:00 1 + 2018-04-09 00:02:00 2 + 2018-04-09 00:04:00 3 + 2018-04-09 00:06:00 4 + >>> ts.between_time('0:02', '0:04') + A + 2018-04-09 00:02:00 2 + 2018-04-09 00:04:00 3 + + See Also + -------- + at_time : Select values at a particular time of the day + first : Select initial periods of time series based on a date offset + last : Select final periods of time series based on a date offset """ try: indexer = self.index.indexer_between_time( @@ -7043,17 +7094,39 @@ def first(self, offset): Convenience method for subsetting initial periods of time series data based on a date offset. + Notes + ----- + For this method to work, the index must to be a :class:`DatetimeIndex` + Parameters ---------- offset : string, DateOffset, dateutil.relativedelta Examples -------- - ts.first('10D') -> First 10 days + >>> i = pd.date_range('2018-04-09', periods=4, freq='D') + >>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i) + >>> ts + A + 2018-04-09 1 + 2018-04-10 2 + 2018-04-11 3 + 2018-04-12 4 + >>> ts.first('3D') + A + 2018-04-09 1 + 2018-04-10 2 + 2018-04-11 3 Returns ------- subset : type of caller + + See Also + -------- + last : Select final periods of time series based on a date offset + at_time : Select values at a particular time of the day + between_time : Select values between particular times of the day """ from pandas.tseries.frequencies import to_offset if not isinstance(self.index, DatetimeIndex): @@ -7079,17 +7152,39 @@ def last(self, offset): Convenience method for subsetting final periods of time series data based on a date offset. + Notes + ----- + For this method to work, the index must to be a :class:`DatetimeIndex` + Parameters ---------- offset : string, DateOffset, dateutil.relativedelta Examples -------- - ts.last('5M') -> Last 5 months + >>> i = pd.date_range('2018-04-09', periods=4, freq='D') + >>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i) + >>> ts + A + 2018-04-09 1 + 2018-04-10 2 + 2018-04-11 3 + 2018-04-12 4 + >>> ts.last('3D') + A + 2018-04-10 2 + 2018-04-11 3 + 2018-04-12 4 Returns ------- subset : type of caller + + See Also + -------- + first : Select initial periods of time series based on a date offset + at_time : Select values at a particular time of the day + between_time : Select values between particular times of the day """ from pandas.tseries.frequencies import to_offset if not isinstance(self.index, DatetimeIndex): From 295323cc08f04cf907dea24c7881699a95b07572 Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 21 Apr 2018 15:12:33 +0100 Subject: [PATCH 2/3] Improve doc strings, also indexer_at/between/_time --- doc/source/whatsnew/v0.23.0.txt | 4 ++ pandas/core/generic.py | 118 +++++++++++++++++++------------ pandas/core/indexes/datetimes.py | 27 ++++--- 3 files changed, 96 insertions(+), 53 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index eb6c212731822..bf148c80d1552 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -916,6 +916,10 @@ Datetimelike API Changes - :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`) - ``pandas.tseries.frequencies.get_freq_group()`` and ``pandas.tseries.frequencies.DAYS`` are removed from the public API (:issue:`18034`) - :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`) +- :attr:`Series.at_time` and :attr:`DataFrame.at_time` will now raise a ``TypeError`` + rather than ``NotImplementedError`` when index is not a :class:`DatetimeIndex`` (:issue:`20725`). +- :attr:`Series.between_time` and :attr:`DateFrame.between_time` will now raise + a ``TypeError`` rather than ``NotImplementedError`` when index is not a :class:`DatetimeIndex`` (:issue:`20725`). - Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`). - :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) - For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with ``freq=None``, addition or subtraction of integer-dtyped array or ``Index`` will raise ``NullFrequencyError`` instead of ``TypeError`` (:issue:`19895`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 949116eaff8f6..9e4eda1bc4dc7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6761,9 +6761,10 @@ def at_time(self, time, asof=False): """ Select values at particular time of day (e.g. 9:30AM). - Notes - ----- - For this method to work, the index must to be a :class:`DatetimeIndex` + Raises + ------ + TypeError + If the index is not a :class:`DatetimeIndex` Parameters ---------- @@ -6775,25 +6776,27 @@ def at_time(self, time, asof=False): Examples -------- - >>> i = pd.date_range('2018-04-09', periods=4, freq='4D2min') + >>> i = pd.date_range('2018-04-09', periods=4, freq='12H') >>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i) >>> ts A - date 2018-04-09 00:00:00 1 - 2018-04-13 00:02:00 2 - 2018-04-17 00:04:00 3 - 2018-04-21 00:06:00 4 - >>> ts.at_time('0:02') + 2018-04-09 12:00:00 2 + 2018-04-10 00:00:00 3 + 2018-04-10 12:00:00 4 + + >>> ts.at_time('12:00') A - date - 2018-04-13 00:02:00 2 + 2018-04-09 12:00:00 2 + 2018-04-10 12:00:00 4 See Also -------- between_time : Select values between particular times of the day first : Select initial periods of time series based on a date offset last : Select final periods of time series based on a date offset + DatetimeIndex.indexer_at_time : Get just the index locations for + values at particular time of the day """ try: indexer = self.index.indexer_at_time(time, asof=asof) @@ -6806,9 +6809,13 @@ def between_time(self, start_time, end_time, include_start=True, """ Select values between particular times of the day (e.g., 9:00-9:30 AM). - Notes - ----- - For this method to work, the index must to be a :class:`DatetimeIndex` + By setting ``start_time`` to be later than ``end_time``, + you can get the times that are *not* between the two times. + + Raises + ------ + TypeError + If the index is not a :class:`DatetimeIndex` Parameters ---------- @@ -6823,24 +6830,35 @@ def between_time(self, start_time, end_time, include_start=True, Examples -------- - >>> i = pd.date_range('2018-04-09', periods=4, freq='2min') + >>> i = pd.date_range('2018-04-09', periods=4, freq='1D20min') >>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i) >>> ts A 2018-04-09 00:00:00 1 - 2018-04-09 00:02:00 2 - 2018-04-09 00:04:00 3 - 2018-04-09 00:06:00 4 - >>> ts.between_time('0:02', '0:04') + 2018-04-10 00:20:00 2 + 2018-04-11 00:40:00 3 + 2018-04-12 01:00:00 4 + + >>> ts.between_time('0:15', '0:45') A - 2018-04-09 00:02:00 2 - 2018-04-09 00:04:00 3 + 2018-04-10 00:20:00 2 + 2018-04-11 00:40:00 3 + + You get the times that are *not* between two times by setting + ``start_time`` later than ``end_time``: + + >>> ts.between_time('0:45', '0:15') + A + 2018-04-09 00:00:00 1 + 2018-04-12 01:00:00 4 See Also -------- at_time : Select values at a particular time of the day first : Select initial periods of time series based on a date offset last : Select final periods of time series based on a date offset + DatetimeIndex.indexer_between_time : Get just the index locations for + values between particular times of the day """ try: indexer = self.index.indexer_between_time( @@ -7094,9 +7112,10 @@ def first(self, offset): Convenience method for subsetting initial periods of time series data based on a date offset. - Notes - ----- - For this method to work, the index must to be a :class:`DatetimeIndex` + Raises + ------ + TypeError + If the index is not a :class:`DatetimeIndex` Parameters ---------- @@ -7104,19 +7123,25 @@ def first(self, offset): Examples -------- - >>> i = pd.date_range('2018-04-09', periods=4, freq='D') + >>> i = pd.date_range('2018-04-09', periods=4, freq='2D') >>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i) >>> ts A 2018-04-09 1 - 2018-04-10 2 - 2018-04-11 3 - 2018-04-12 4 + 2018-04-11 2 + 2018-04-13 3 + 2018-04-15 4 + + Get the rows for the first 3 days: + >>> ts.first('3D') A 2018-04-09 1 - 2018-04-10 2 - 2018-04-11 3 + 2018-04-11 2 + + Notice the data for 3 first calender days were returned, not the first + 3 days observed in the dataset, and therefore data for 2018-04-13 was + not returned. Returns ------- @@ -7130,8 +7155,7 @@ def first(self, offset): """ from pandas.tseries.frequencies import to_offset if not isinstance(self.index, DatetimeIndex): - raise NotImplementedError("'first' only supports a DatetimeIndex " - "index") + raise TypeError("'first' only supports a DatetimeIndex index") if len(self.index) == 0: return self @@ -7152,9 +7176,10 @@ def last(self, offset): Convenience method for subsetting final periods of time series data based on a date offset. - Notes - ----- - For this method to work, the index must to be a :class:`DatetimeIndex` + Raises + ------ + TypeError + If the index is not a :class:`DatetimeIndex` Parameters ---------- @@ -7162,19 +7187,25 @@ def last(self, offset): Examples -------- - >>> i = pd.date_range('2018-04-09', periods=4, freq='D') + >>> i = pd.date_range('2018-04-09', periods=4, freq='2D') >>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i) >>> ts A 2018-04-09 1 - 2018-04-10 2 - 2018-04-11 3 - 2018-04-12 4 + 2018-04-11 2 + 2018-04-13 3 + 2018-04-15 4 + + Get the rows for the last 3 days: + >>> ts.last('3D') A - 2018-04-10 2 - 2018-04-11 3 - 2018-04-12 4 + 2018-04-13 3 + 2018-04-15 4 + + Notice the data for 3 last calender days were returned, not the last + 3 observed days in the dataset, and therefore data for 2018-04-11 was + not returned. Returns ------- @@ -7188,8 +7219,7 @@ def last(self, offset): """ from pandas.tseries.frequencies import to_offset if not isinstance(self.index, DatetimeIndex): - raise NotImplementedError("'last' only supports a DatetimeIndex " - "index") + raise TypeError("'last' only supports a DatetimeIndex index") if len(self.index) == 0: return self diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e9ab443a978f8..1b5aa3b45f3b5 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -2368,15 +2368,23 @@ def tz_localize(self, tz, ambiguous='raise', errors='raise'): def indexer_at_time(self, time, asof=False): """ - Select values at particular time of day (e.g. 9:30AM) + Returns index locations of index values at particular time of day + (e.g. 9:30AM). Parameters ---------- time : datetime.time or string + datetime.time or string in appropriate format ("%H:%M", "%H%M", + "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", + "%I%M%S%p"). Returns ------- - values_at_time : TimeSeries + values_at_time : array of integers + + See Also + -------- + indexer_between_time, DataFrame.at_time """ from dateutil.parser import parse @@ -2398,24 +2406,25 @@ def indexer_at_time(self, time, asof=False): def indexer_between_time(self, start_time, end_time, include_start=True, include_end=True): """ - Select values between particular times of day (e.g., 9:00-9:30AM). - - Return values of the index between two times. If start_time or - end_time are strings then tseries.tools.to_time is used to convert to - a time object. + Return index locations of values between particular times of day + (e.g., 9:00-9:30AM). Parameters ---------- start_time, end_time : datetime.time, str datetime.time or string in appropriate format ("%H:%M", "%H%M", "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", - "%I%M%S%p") + "%I%M%S%p"). include_start : boolean, default True include_end : boolean, default True Returns ------- - values_between_time : TimeSeries + values_between_time : array of integers + + See Also + -------- + indexer_at_time, DataFrame.between_time """ start_time = tools.to_time(start_time) end_time = tools.to_time(end_time) From 18025cf978da507fe94f2aefdf9f3a151981ce9d Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 5 May 2018 15:37:54 +0100 Subject: [PATCH 3/3] Add tests for TypeError when index isn't a DatetimeIndex --- doc/source/whatsnew/v0.23.0.txt | 6 +-- pandas/tests/frame/test_timeseries.py | 68 +++++++++++++++++++++++++- pandas/tests/series/test_timeseries.py | 24 +++++++++ 3 files changed, 93 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index bf148c80d1552..05e0028047941 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -916,10 +916,10 @@ Datetimelike API Changes - :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`) - ``pandas.tseries.frequencies.get_freq_group()`` and ``pandas.tseries.frequencies.DAYS`` are removed from the public API (:issue:`18034`) - :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`) -- :attr:`Series.at_time` and :attr:`DataFrame.at_time` will now raise a ``TypeError`` +- :attr:`Series.first` and :attr:`DataFrame.first` will now raise a ``TypeError`` + rather than ``NotImplementedError`` when index is not a :class:`DatetimeIndex`` (:issue:`20725`). +- :attr:`Series.last` and :attr:`DateFrame.last` will now raise a ``TypeError`` rather than ``NotImplementedError`` when index is not a :class:`DatetimeIndex`` (:issue:`20725`). -- :attr:`Series.between_time` and :attr:`DateFrame.between_time` will now raise - a ``TypeError`` rather than ``NotImplementedError`` when index is not a :class:`DatetimeIndex`` (:issue:`20725`). - Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`). - :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) - For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with ``freq=None``, addition or subtraction of integer-dtyped array or ``Index`` will raise ``NullFrequencyError`` instead of ``TypeError`` (:issue:`19895`) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 277c3c9bc5c23..90fbc6e628369 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -539,7 +539,59 @@ def test_first_last_valid(self): assert frame.first_valid_index().freq == frame.index.freq assert frame.last_valid_index().freq == frame.index.freq - def test_at_time_frame(self): + def test_first_subset(self): + ts = tm.makeTimeDataFrame(freq='12h') + result = ts.first('10d') + assert len(result) == 20 + + ts = tm.makeTimeDataFrame(freq='D') + result = ts.first('10d') + assert len(result) == 10 + + result = ts.first('3M') + expected = ts[:'3/31/2000'] + assert_frame_equal(result, expected) + + result = ts.first('21D') + expected = ts[:21] + assert_frame_equal(result, expected) + + result = ts[:0].first('3M') + assert_frame_equal(result, ts[:0]) + + def test_first_raises(self): + # GH20725 + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(TypeError): # index is not a DatetimeIndex + df.first('1D') + + def test_last_subset(self): + ts = tm.makeTimeDataFrame(freq='12h') + result = ts.last('10d') + assert len(result) == 20 + + ts = tm.makeTimeDataFrame(nper=30, freq='D') + result = ts.last('10d') + assert len(result) == 10 + + result = ts.last('21D') + expected = ts['2000-01-10':] + assert_frame_equal(result, expected) + + result = ts.last('21D') + expected = ts[-21:] + assert_frame_equal(result, expected) + + result = ts[:0].last('3M') + assert_frame_equal(result, ts[:0]) + + def test_last_raises(self): + # GH20725 + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(TypeError): # index is not a DatetimeIndex + df.last('1D') + + def test_at_time(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = DataFrame(np.random.randn(len(rng), 2), index=rng) rs = ts.at_time(rng[1]) @@ -569,7 +621,13 @@ def test_at_time_frame(self): rs = ts.at_time('16:00') assert len(rs) == 0 - def test_between_time_frame(self): + def test_at_time_raises(self): + # GH20725 + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(TypeError): # index is not a DatetimeIndex + df.at_time('00:00') + + def test_between_time(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = DataFrame(np.random.randn(len(rng), 2), index=rng) stime = time(0, 0) @@ -629,6 +687,12 @@ def test_between_time_frame(self): else: assert (t < etime) or (t >= stime) + def test_between_time_raises(self): + # GH20725 + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(TypeError): # index is not a DatetimeIndex + df.between_time(start_time='00:00', end_time='12:00') + def test_operation_on_NaT(self): # Both NaT and Timestamp are in DataFrame. df = pd.DataFrame({'foo': [pd.NaT, pd.NaT, diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 8e537b137baaf..376b4d71f81e8 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -628,6 +628,12 @@ def test_first_subset(self): result = ts[:0].first('3M') assert_series_equal(result, ts[:0]) + def test_first_raises(self): + # GH20725 + ser = pd.Series('a b c'.split()) + with pytest.raises(TypeError): # index is not a DatetimeIndex + ser.first('1D') + def test_last_subset(self): ts = _simple_ts('1/1/2000', '1/1/2010', freq='12h') result = ts.last('10d') @@ -648,6 +654,12 @@ def test_last_subset(self): result = ts[:0].last('3M') assert_series_equal(result, ts[:0]) + def test_last_raises(self): + # GH20725 + ser = pd.Series('a b c'.split()) + with pytest.raises(TypeError): # index is not a DatetimeIndex + ser.last('1D') + def test_format_pre_1900_dates(self): rng = date_range('1/1/1850', '1/1/1950', freq='A-DEC') rng.format() @@ -696,6 +708,12 @@ def test_at_time(self): rs = ts.at_time('16:00') assert len(rs) == 0 + def test_at_time_raises(self): + # GH20725 + ser = pd.Series('a b c'.split()) + with pytest.raises(TypeError): # index is not a DatetimeIndex + ser.at_time('00:00') + def test_between(self): series = Series(date_range('1/1/2000', periods=10)) left, right = series[[2, 7]] @@ -764,6 +782,12 @@ def test_between_time(self): else: assert (t < etime) or (t >= stime) + def test_between_time_raises(self): + # GH20725 + ser = pd.Series('a b c'.split()) + with pytest.raises(TypeError): # index is not a DatetimeIndex + ser.between_time(start_time='00:00', end_time='12:00') + def test_between_time_types(self): # GH11818 rng = date_range('1/1/2000', '1/5/2000', freq='5min')