Skip to content

DOC: add Raises, Examples and See Also sections to methods at_time/between_time/first/last #20725

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 7, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -916,6 +916,10 @@ Datetimelike API Changes
- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`)
- ``pandas.tseries.frequencies.get_freq_group()`` and ``pandas.tseries.frequencies.DAYS`` are removed from the public API (:issue:`18034`)
- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`)
- :attr:`Series.first` and :attr:`DataFrame.first` will now raise a ``TypeError``
rather than ``NotImplementedError`` when index is not a :class:`DatetimeIndex`` (:issue:`20725`).
- :attr:`Series.last` and :attr:`DateFrame.last` will now raise a ``TypeError``
rather than ``NotImplementedError`` when index is not a :class:`DatetimeIndex`` (:issue:`20725`).
- Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`).
- :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`)
- For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with ``freq=None``, addition or subtraction of integer-dtyped array or ``Index`` will raise ``NullFrequencyError`` instead of ``TypeError`` (:issue:`19895`)
Expand Down
137 changes: 131 additions & 6 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6761,13 +6761,42 @@ def at_time(self, time, asof=False):
"""
Select values at particular time of day (e.g. 9:30AM).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

put this in a Raises section instead

Raises
------
TypeError
If the index is not a :class:`DatetimeIndex`

Parameters
----------
time : datetime.time or string

Returns
-------
values_at_time : type of caller

Examples
--------
>>> i = pd.date_range('2018-04-09', periods=4, freq='12H')
>>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i)
>>> ts
A
2018-04-09 00:00:00 1
2018-04-09 12:00:00 2
2018-04-10 00:00:00 3
2018-04-10 12:00:00 4

>>> ts.at_time('12:00')
A
2018-04-09 12:00:00 2
2018-04-10 12:00:00 4

See Also
--------
between_time : Select values between particular times of the day
first : Select initial periods of time series based on a date offset
last : Select final periods of time series based on a date offset
DatetimeIndex.indexer_at_time : Get just the index locations for
values at particular time of the day
"""
try:
indexer = self.index.indexer_at_time(time, asof=asof)
Expand All @@ -6780,6 +6809,14 @@ def between_time(self, start_time, end_time, include_start=True,
"""
Select values between particular times of the day (e.g., 9:00-9:30 AM).

By setting ``start_time`` to be later than ``end_time``,
you can get the times that are *not* between the two times.

Raises
------
TypeError
If the index is not a :class:`DatetimeIndex`

Parameters
----------
start_time : datetime.time or string
Expand All @@ -6790,6 +6827,38 @@ def between_time(self, start_time, end_time, include_start=True,
Returns
-------
values_between_time : type of caller

Examples
--------
>>> i = pd.date_range('2018-04-09', periods=4, freq='1D20min')
>>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i)
>>> ts
A
2018-04-09 00:00:00 1
2018-04-10 00:20:00 2
2018-04-11 00:40:00 3
2018-04-12 01:00:00 4

>>> ts.between_time('0:15', '0:45')
A
2018-04-10 00:20:00 2
2018-04-11 00:40:00 3

You get the times that are *not* between two times by setting
``start_time`` later than ``end_time``:

>>> ts.between_time('0:45', '0:15')
A
2018-04-09 00:00:00 1
2018-04-12 01:00:00 4

See Also
--------
at_time : Select values at a particular time of the day
first : Select initial periods of time series based on a date offset
last : Select final periods of time series based on a date offset
DatetimeIndex.indexer_between_time : Get just the index locations for
values between particular times of the day
"""
try:
indexer = self.index.indexer_between_time(
Expand Down Expand Up @@ -7043,22 +7112,50 @@ def first(self, offset):
Convenience method for subsetting initial periods of time series data
based on a date offset.

Raises
------
TypeError
If the index is not a :class:`DatetimeIndex`

Parameters
----------
offset : string, DateOffset, dateutil.relativedelta

Examples
--------
ts.first('10D') -> First 10 days
>>> i = pd.date_range('2018-04-09', periods=4, freq='2D')
>>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i)
>>> ts
A
2018-04-09 1
2018-04-11 2
2018-04-13 3
2018-04-15 4

Get the rows for the first 3 days:

>>> ts.first('3D')
A
2018-04-09 1
2018-04-11 2

Notice the data for 3 first calender days were returned, not the first
3 days observed in the dataset, and therefore data for 2018-04-13 was
not returned.

Returns
-------
subset : type of caller

See Also
--------
last : Select final periods of time series based on a date offset
at_time : Select values at a particular time of the day
between_time : Select values between particular times of the day
"""
from pandas.tseries.frequencies import to_offset
if not isinstance(self.index, DatetimeIndex):
raise NotImplementedError("'first' only supports a DatetimeIndex "
"index")
raise TypeError("'first' only supports a DatetimeIndex index")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add some tests that assert the TypeError? (was this not tested before)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They were not there beforehand, so I've added.

A question: There exists test for Series.first/last, but not DataFrame.first/last. Should i copy the relevant tests from series/test_timeseries.py to frame/test_time_series.py, or is that not relevant?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure that would be great (do a search for them as well, it maybe that they are elsewhere, but they should be in test_timeseries


if len(self.index) == 0:
return self
Expand All @@ -7079,22 +7176,50 @@ def last(self, offset):
Convenience method for subsetting final periods of time series data
based on a date offset.

Raises
------
TypeError
If the index is not a :class:`DatetimeIndex`

Parameters
----------
offset : string, DateOffset, dateutil.relativedelta

Examples
--------
ts.last('5M') -> Last 5 months
>>> i = pd.date_range('2018-04-09', periods=4, freq='2D')
>>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i)
>>> ts
A
2018-04-09 1
2018-04-11 2
2018-04-13 3
2018-04-15 4

Get the rows for the last 3 days:

>>> ts.last('3D')
A
2018-04-13 3
2018-04-15 4

Notice the data for 3 last calender days were returned, not the last
3 observed days in the dataset, and therefore data for 2018-04-11 was
not returned.

Returns
-------
subset : type of caller

See Also
--------
first : Select initial periods of time series based on a date offset
at_time : Select values at a particular time of the day
between_time : Select values between particular times of the day
"""
from pandas.tseries.frequencies import to_offset
if not isinstance(self.index, DatetimeIndex):
raise NotImplementedError("'last' only supports a DatetimeIndex "
"index")
raise TypeError("'last' only supports a DatetimeIndex index")

if len(self.index) == 0:
return self
Expand Down
27 changes: 18 additions & 9 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2368,15 +2368,23 @@ def tz_localize(self, tz, ambiguous='raise', errors='raise'):

def indexer_at_time(self, time, asof=False):
"""
Select values at particular time of day (e.g. 9:30AM)
Returns index locations of index values at particular time of day
(e.g. 9:30AM).

Parameters
----------
time : datetime.time or string
datetime.time or string in appropriate format ("%H:%M", "%H%M",
"%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p",
"%I%M%S%p").

Returns
-------
values_at_time : TimeSeries
values_at_time : array of integers

See Also
--------
indexer_between_time, DataFrame.at_time
"""
from dateutil.parser import parse

Expand All @@ -2398,24 +2406,25 @@ def indexer_at_time(self, time, asof=False):
def indexer_between_time(self, start_time, end_time, include_start=True,
include_end=True):
"""
Select values between particular times of day (e.g., 9:00-9:30AM).

Return values of the index between two times. If start_time or
end_time are strings then tseries.tools.to_time is used to convert to
a time object.
Return index locations of values between particular times of day
(e.g., 9:00-9:30AM).

Parameters
----------
start_time, end_time : datetime.time, str
datetime.time or string in appropriate format ("%H:%M", "%H%M",
"%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p",
"%I%M%S%p")
"%I%M%S%p").
include_start : boolean, default True
include_end : boolean, default True

Returns
-------
values_between_time : TimeSeries
values_between_time : array of integers

See Also
--------
indexer_at_time, DataFrame.between_time
"""
start_time = tools.to_time(start_time)
end_time = tools.to_time(end_time)
Expand Down
68 changes: 66 additions & 2 deletions pandas/tests/frame/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,59 @@ def test_first_last_valid(self):
assert frame.first_valid_index().freq == frame.index.freq
assert frame.last_valid_index().freq == frame.index.freq

def test_at_time_frame(self):
def test_first_subset(self):
ts = tm.makeTimeDataFrame(freq='12h')
result = ts.first('10d')
assert len(result) == 20

ts = tm.makeTimeDataFrame(freq='D')
result = ts.first('10d')
assert len(result) == 10

result = ts.first('3M')
expected = ts[:'3/31/2000']
assert_frame_equal(result, expected)

result = ts.first('21D')
expected = ts[:21]
assert_frame_equal(result, expected)

result = ts[:0].first('3M')
assert_frame_equal(result, ts[:0])

def test_first_raises(self):
# GH20725
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
with pytest.raises(TypeError): # index is not a DatetimeIndex
df.first('1D')

def test_last_subset(self):
ts = tm.makeTimeDataFrame(freq='12h')
result = ts.last('10d')
assert len(result) == 20

ts = tm.makeTimeDataFrame(nper=30, freq='D')
result = ts.last('10d')
assert len(result) == 10

result = ts.last('21D')
expected = ts['2000-01-10':]
assert_frame_equal(result, expected)

result = ts.last('21D')
expected = ts[-21:]
assert_frame_equal(result, expected)

result = ts[:0].last('3M')
assert_frame_equal(result, ts[:0])

def test_last_raises(self):
# GH20725
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
with pytest.raises(TypeError): # index is not a DatetimeIndex
df.last('1D')

def test_at_time(self):
rng = date_range('1/1/2000', '1/5/2000', freq='5min')
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
rs = ts.at_time(rng[1])
Expand Down Expand Up @@ -569,7 +621,13 @@ def test_at_time_frame(self):
rs = ts.at_time('16:00')
assert len(rs) == 0

def test_between_time_frame(self):
def test_at_time_raises(self):
# GH20725
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
with pytest.raises(TypeError): # index is not a DatetimeIndex
df.at_time('00:00')

def test_between_time(self):
rng = date_range('1/1/2000', '1/5/2000', freq='5min')
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
stime = time(0, 0)
Expand Down Expand Up @@ -629,6 +687,12 @@ def test_between_time_frame(self):
else:
assert (t < etime) or (t >= stime)

def test_between_time_raises(self):
# GH20725
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
with pytest.raises(TypeError): # index is not a DatetimeIndex
df.between_time(start_time='00:00', end_time='12:00')

def test_operation_on_NaT(self):
# Both NaT and Timestamp are in DataFrame.
df = pd.DataFrame({'foo': [pd.NaT, pd.NaT,
Expand Down
Loading