Skip to content

Commit bcf7f0d

Browse files
committed
API: add is_beg_month/quarter/year, is_end_month/quarter/year accessors (pandas-dev#4565)
1 parent ad49095 commit bcf7f0d

File tree

10 files changed

+434
-30
lines changed

10 files changed

+434
-30
lines changed

doc/source/api.rst

+6
Original file line numberDiff line numberDiff line change
@@ -1143,6 +1143,12 @@ Time/Date Components
11431143
DatetimeIndex.tz
11441144
DatetimeIndex.freq
11451145
DatetimeIndex.freqstr
1146+
DatetimeIndex.is_month_start
1147+
DatetimeIndex.is_month_end
1148+
DatetimeIndex.is_quarter_start
1149+
DatetimeIndex.is_quarter_end
1150+
DatetiemIndex.is_year_start
1151+
DatetimeIndex.is_year_end
11461152

11471153
Selecting
11481154
~~~~~~~~~

doc/source/release.rst

+4
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,10 @@ API Changes
470470
- ``DataFrame.apply`` will use the ``reduce`` argument to determine whether a
471471
``Series`` or a ``DataFrame`` should be returned when the ``DataFrame`` is
472472
empty (:issue:`6007`).
473+
- Add ``is_month_start``, ``is_month_end``, ``is_quarter_start``, ``is_quarter_end``,
474+
``is_year_start``, ``is_year_end`` accessors for ``DateTimeIndex``/``Timestamp`` which return a boolean array
475+
of whether the timestamp(s) are at the start/end of the month/quarter/year defined by the
476+
frequency of the ``DateTimeIndex``/``Timestamp`` (:issue:`4565`, :issue:`6998`))
473477

474478
Experimental Features
475479
~~~~~~~~~~~~~~~~~~~~~

doc/source/timeseries.rst

+33
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,39 @@ regularity will result in a ``DatetimeIndex`` (but frequency is lost):
408408
409409
.. _timeseries.offsets:
410410

411+
Time/Date Components
412+
~~~~~~~~~~~~~~~~~~~~~~~~~~~
413+
414+
There are several time/date properties that one can access from ``Timestamp`` or a collection of timestamps like a ``DateTimeIndex``.
415+
416+
.. csv-table::
417+
:header: "Property", "Description"
418+
:widths: 15, 65
419+
420+
year, "The year of the datetime"
421+
month,"The month of the datetime"
422+
day,"The days of the datetime"
423+
hour,"The hour of the datetime"
424+
minute,"The minutes of the datetime"
425+
second,"The seconds of the datetime"
426+
microsecond,"The microseconds of the datetime"
427+
nanosecond,"The nanoseconds of the datetime"
428+
date,"Returns datetime.date"
429+
time,"Returns datetime.time"
430+
dayofyear,"The ordinal day of year"
431+
weekofyear,"The week ordinal of the year"
432+
week,"The week ordinal of the year"
433+
dayofweek,"The day of the week with Monday=0, Sunday=6"
434+
weekday,"The day of the week with Monday=0, Sunday=6"
435+
quarter,"Quarter of the date: Jan=Mar = 1, Apr-Jun = 2, etc."
436+
is_month_start,"Logical indicating if first day of month (defined by frequency)"
437+
is_month_end,"Logical indicating if last day of month (defined by frequency)"
438+
is_quarter_start,"Logical indicating if first day of quarter (defined by frequency)"
439+
is_quarter_end,"Logical indicating if last day of quarter (defined by frequency)"
440+
is_year_start,"Logical indicating if first day of year (defined by frequency)"
441+
is_year_end,"Logical indicating if last day of year (defined by frequency)"
442+
443+
411444
DateOffset objects
412445
------------------
413446

doc/source/v0.14.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ API changes
8989
s.year
9090
s.index.year
9191

92+
- Add ``is_month_start``, ``is_month_end``, ``is_quarter_start``, ``is_quarter_end``, ``is_year_start``, ``is_year_end`` accessors for ``DateTimeIndex``/``Timestamp`` which return a boolean array of whether the timestamp(s) are at the start/end of the month/quarter/year defined by the frequency of the ``DateTimeIndex``/``Timestamp`` (:issue:`4565`, :issue:`6998`)
93+
9294
- More consistent behaviour for some groupby methods:
9395

9496
groupby ``head`` and ``tail`` now act more like ``filter`` rather than an aggregation:

pandas/core/base.py

+6
Original file line numberDiff line numberDiff line change
@@ -336,3 +336,9 @@ def nunique(self):
336336
dayofyear = _field_accessor('dayofyear', "The ordinal day of the year")
337337
quarter = _field_accessor('quarter', "The quarter of the date")
338338
qyear = _field_accessor('qyear')
339+
is_month_start = _field_accessor('is_month_start', "Logical indicating if first day of month (defined by frequency)")
340+
is_month_end = _field_accessor('is_month_end', "Logical indicating if last day of month (defined by frequency)")
341+
is_quarter_start = _field_accessor('is_quarter_start', "Logical indicating if first day of quarter (defined by frequency)")
342+
is_quarter_end = _field_accessor('is_quarter_end', "Logical indicating if last day of quarter (defined by frequency)")
343+
is_year_start = _field_accessor('is_year_start', "Logical indicating if first day of year (defined by frequency)")
344+
is_year_end = _field_accessor('is_year_end', "Logical indicating if last day of year (defined by frequency)")

pandas/tests/test_base.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ def test_value_counts_unique_nunique(self):
214214
# freq must be specified because repeat makes freq ambiguous
215215
o = klass(np.repeat(values, range(1, len(o) + 1)), freq=o.freq)
216216
else:
217-
o = klass(np.repeat(values, range(1, len(o) + 1)))
217+
o = klass(np.repeat(values, range(1, len(o) + 1)))
218218

219219
expected_s = Series(range(10, 0, -1), index=values[::-1], dtype='int64')
220220
tm.assert_series_equal(o.value_counts(), expected_s)
@@ -246,7 +246,7 @@ def test_value_counts_unique_nunique(self):
246246
if isinstance(o, PeriodIndex):
247247
o = klass(np.repeat(values, range(1, len(o) + 1)), freq=o.freq)
248248
else:
249-
o = klass(np.repeat(values, range(1, len(o) + 1)))
249+
o = klass(np.repeat(values, range(1, len(o) + 1)))
250250

251251
if isinstance(o, DatetimeIndex):
252252
# DatetimeIndex: nan is casted to Nat and included
@@ -278,7 +278,7 @@ def test_value_counts_inferred(self):
278278
s = klass(s_values)
279279
expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c'])
280280
tm.assert_series_equal(s.value_counts(), expected)
281-
281+
282282
self.assert_numpy_array_equal(s.unique(), np.unique(s_values))
283283
self.assertEquals(s.nunique(), 4)
284284
# don't sort, have to sort after the fact as not sorting is platform-dep
@@ -410,7 +410,7 @@ def setUp(self):
410410

411411
def test_ops_properties(self):
412412
self.check_ops_properties(['year','month','day','hour','minute','second','weekofyear','week','dayofweek','dayofyear','quarter'])
413-
self.check_ops_properties(['date','time','microsecond','nanosecond'], lambda x: isinstance(x,DatetimeIndex))
413+
self.check_ops_properties(['date','time','microsecond','nanosecond', 'is_month_start', 'is_month_end', 'is_quarter_start', 'is_quarter_end', 'is_year_start', 'is_year_end'], lambda x: isinstance(x,DatetimeIndex))
414414

415415
class TestPeriodIndexOps(Ops):
416416
_allowed = '_allow_period_index_ops'

pandas/tseries/index.py

+18-3
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from pandas.compat import u
1515
from pandas.tseries.frequencies import (
1616
infer_freq, to_offset, get_period_alias,
17-
Resolution, get_reso_string)
17+
Resolution, get_reso_string, get_offset)
1818
from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay
1919
from pandas.tseries.tools import parse_time_string, normalize_date
2020
from pandas.util.decorators import cache_readonly
@@ -28,6 +28,7 @@
2828
import pandas.algos as _algos
2929
import pandas.index as _index
3030

31+
from pandas.tslib import isleapyear
3132

3233
def _utc():
3334
import pytz
@@ -43,7 +44,14 @@ def f(self):
4344
utc = _utc()
4445
if self.tz is not utc:
4546
values = self._local_timestamps()
46-
return tslib.get_date_field(values, field)
47+
if field in ['is_month_start', 'is_month_end',
48+
'is_quarter_start', 'is_quarter_end',
49+
'is_year_start', 'is_year_end']:
50+
month_kw = self.freq.kwds.get('startingMonth', self.freq.kwds.get('month', 12)) if self.freq else 12
51+
freqstr = self.freqstr if self.freq else None
52+
return tslib.get_start_end_field(values, field, freqstr, month_kw)
53+
else:
54+
return tslib.get_date_field(values, field)
4755
f.__name__ = name
4856
f.__doc__ = docstring
4957
return property(f)
@@ -1439,6 +1447,12 @@ def freqstr(self):
14391447
_weekday = _dayofweek
14401448
_dayofyear = _field_accessor('dayofyear', 'doy')
14411449
_quarter = _field_accessor('quarter', 'q')
1450+
_is_month_start = _field_accessor('is_month_start', 'is_month_start')
1451+
_is_month_end = _field_accessor('is_month_end', 'is_month_end')
1452+
_is_quarter_start = _field_accessor('is_quarter_start', 'is_quarter_start')
1453+
_is_quarter_end = _field_accessor('is_quarter_end', 'is_quarter_end')
1454+
_is_year_start = _field_accessor('is_year_start', 'is_year_start')
1455+
_is_year_end = _field_accessor('is_year_end', 'is_year_end')
14421456

14431457
@property
14441458
def _time(self):
@@ -1774,6 +1788,7 @@ def to_julian_date(self):
17741788
self.nanosecond/3600.0/1e+9
17751789
)/24.0)
17761790

1791+
17771792
def _generate_regular_range(start, end, periods, offset):
17781793
if isinstance(offset, Tick):
17791794
stride = offset.nanos
@@ -1831,7 +1846,7 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None,
18311846
Frequency strings can have multiples, e.g. '5H'
18321847
tz : string or None
18331848
Time zone name for returning localized DatetimeIndex, for example
1834-
Asia/Hong_Kong
1849+
Asia/Hong_Kong
18351850
normalize : bool, default False
18361851
Normalize start/end dates to midnight before generating date range
18371852
name : str, default None

pandas/tseries/tests/test_timeseries.py

+100-22
Original file line numberDiff line numberDiff line change
@@ -1473,7 +1473,7 @@ def test_timestamp_fields(self):
14731473
# extra fields from DatetimeIndex like quarter and week
14741474
idx = tm.makeDateIndex(100)
14751475

1476-
fields = ['dayofweek', 'dayofyear', 'week', 'weekofyear', 'quarter']
1476+
fields = ['dayofweek', 'dayofyear', 'week', 'weekofyear', 'quarter', 'is_month_start', 'is_month_end', 'is_quarter_start', 'is_quarter_end', 'is_year_start', 'is_year_end']
14771477
for f in fields:
14781478
expected = getattr(idx, f)[-1]
14791479
result = getattr(Timestamp(idx[-1]), f)
@@ -2192,7 +2192,7 @@ def test_join_with_period_index(self):
21922192

21932193
class TestDatetime64(tm.TestCase):
21942194
"""
2195-
Also test supoprt for datetime64[ns] in Series / DataFrame
2195+
Also test support for datetime64[ns] in Series / DataFrame
21962196
"""
21972197

21982198
def setUp(self):
@@ -2202,37 +2202,115 @@ def setUp(self):
22022202

22032203
def test_datetimeindex_accessors(self):
22042204
dti = DatetimeIndex(
2205-
freq='Q-JAN', start=datetime(1997, 12, 31), periods=100)
2205+
freq='D', start=datetime(1998, 1, 1), periods=365)
22062206

22072207
self.assertEquals(dti.year[0], 1998)
22082208
self.assertEquals(dti.month[0], 1)
2209-
self.assertEquals(dti.day[0], 31)
2209+
self.assertEquals(dti.day[0], 1)
22102210
self.assertEquals(dti.hour[0], 0)
22112211
self.assertEquals(dti.minute[0], 0)
22122212
self.assertEquals(dti.second[0], 0)
22132213
self.assertEquals(dti.microsecond[0], 0)
2214-
self.assertEquals(dti.dayofweek[0], 5)
2214+
self.assertEquals(dti.dayofweek[0], 3)
22152215

2216-
self.assertEquals(dti.dayofyear[0], 31)
2217-
self.assertEquals(dti.dayofyear[1], 120)
2216+
self.assertEquals(dti.dayofyear[0], 1)
2217+
self.assertEquals(dti.dayofyear[120], 121)
22182218

2219-
self.assertEquals(dti.weekofyear[0], 5)
2220-
self.assertEquals(dti.weekofyear[1], 18)
2219+
self.assertEquals(dti.weekofyear[0], 1)
2220+
self.assertEquals(dti.weekofyear[120], 18)
22212221

22222222
self.assertEquals(dti.quarter[0], 1)
2223-
self.assertEquals(dti.quarter[1], 2)
2224-
2225-
self.assertEquals(len(dti.year), 100)
2226-
self.assertEquals(len(dti.month), 100)
2227-
self.assertEquals(len(dti.day), 100)
2228-
self.assertEquals(len(dti.hour), 100)
2229-
self.assertEquals(len(dti.minute), 100)
2230-
self.assertEquals(len(dti.second), 100)
2231-
self.assertEquals(len(dti.microsecond), 100)
2232-
self.assertEquals(len(dti.dayofweek), 100)
2233-
self.assertEquals(len(dti.dayofyear), 100)
2234-
self.assertEquals(len(dti.weekofyear), 100)
2235-
self.assertEquals(len(dti.quarter), 100)
2223+
self.assertEquals(dti.quarter[120], 2)
2224+
2225+
self.assertEquals(dti.is_month_start[0], True)
2226+
self.assertEquals(dti.is_month_start[1], False)
2227+
self.assertEquals(dti.is_month_start[31], True)
2228+
self.assertEquals(dti.is_quarter_start[0], True)
2229+
self.assertEquals(dti.is_quarter_start[90], True)
2230+
self.assertEquals(dti.is_year_start[0], True)
2231+
self.assertEquals(dti.is_year_start[364], False)
2232+
self.assertEquals(dti.is_month_end[0], False)
2233+
self.assertEquals(dti.is_month_end[30], True)
2234+
self.assertEquals(dti.is_month_end[31], False)
2235+
self.assertEquals(dti.is_month_end[364], True)
2236+
self.assertEquals(dti.is_quarter_end[0], False)
2237+
self.assertEquals(dti.is_quarter_end[30], False)
2238+
self.assertEquals(dti.is_quarter_end[89], True)
2239+
self.assertEquals(dti.is_quarter_end[364], True)
2240+
self.assertEquals(dti.is_year_end[0], False)
2241+
self.assertEquals(dti.is_year_end[364], True)
2242+
2243+
self.assertEquals(len(dti.year), 365)
2244+
self.assertEquals(len(dti.month), 365)
2245+
self.assertEquals(len(dti.day), 365)
2246+
self.assertEquals(len(dti.hour), 365)
2247+
self.assertEquals(len(dti.minute), 365)
2248+
self.assertEquals(len(dti.second), 365)
2249+
self.assertEquals(len(dti.microsecond), 365)
2250+
self.assertEquals(len(dti.dayofweek), 365)
2251+
self.assertEquals(len(dti.dayofyear), 365)
2252+
self.assertEquals(len(dti.weekofyear), 365)
2253+
self.assertEquals(len(dti.quarter), 365)
2254+
self.assertEquals(len(dti.is_month_start), 365)
2255+
self.assertEquals(len(dti.is_month_end), 365)
2256+
self.assertEquals(len(dti.is_quarter_start), 365)
2257+
self.assertEquals(len(dti.is_quarter_end), 365)
2258+
self.assertEquals(len(dti.is_year_start), 365)
2259+
self.assertEquals(len(dti.is_year_end), 365)
2260+
2261+
dti = DatetimeIndex(
2262+
freq='BQ-FEB', start=datetime(1998, 1, 1), periods=4)
2263+
2264+
self.assertEquals(sum(dti.is_quarter_start), 0)
2265+
self.assertEquals(sum(dti.is_quarter_end), 4)
2266+
self.assertEquals(sum(dti.is_year_start), 0)
2267+
self.assertEquals(sum(dti.is_year_end), 1)
2268+
2269+
# Ensure is_start/end accessors throw ValueError for CustomBusinessDay, CBD requires np >= 1.7
2270+
if not _np_version_under1p7:
2271+
bday_egypt = offsets.CustomBusinessDay(weekmask='Sun Mon Tue Wed Thu')
2272+
dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt)
2273+
self.assertRaises(ValueError, lambda: dti.is_month_start)
2274+
2275+
dti = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03'])
2276+
2277+
self.assertEquals(dti.is_month_start[0], 1)
2278+
2279+
tests = [
2280+
(Timestamp('2013-06-01', offset='M').is_month_start, 1),
2281+
(Timestamp('2013-06-01', offset='BM').is_month_start, 0),
2282+
(Timestamp('2013-06-03', offset='M').is_month_start, 0),
2283+
(Timestamp('2013-06-03', offset='BM').is_month_start, 1),
2284+
(Timestamp('2013-02-28', offset='Q-FEB').is_month_end, 1),
2285+
(Timestamp('2013-02-28', offset='Q-FEB').is_quarter_end, 1),
2286+
(Timestamp('2013-02-28', offset='Q-FEB').is_year_end, 1),
2287+
(Timestamp('2013-03-01', offset='Q-FEB').is_month_start, 1),
2288+
(Timestamp('2013-03-01', offset='Q-FEB').is_quarter_start, 1),
2289+
(Timestamp('2013-03-01', offset='Q-FEB').is_year_start, 1),
2290+
(Timestamp('2013-03-31', offset='QS-FEB').is_month_end, 1),
2291+
(Timestamp('2013-03-31', offset='QS-FEB').is_quarter_end, 0),
2292+
(Timestamp('2013-03-31', offset='QS-FEB').is_year_end, 0),
2293+
(Timestamp('2013-02-01', offset='QS-FEB').is_month_start, 1),
2294+
(Timestamp('2013-02-01', offset='QS-FEB').is_quarter_start, 1),
2295+
(Timestamp('2013-02-01', offset='QS-FEB').is_year_start, 1),
2296+
(Timestamp('2013-06-30', offset='BQ').is_month_end, 0),
2297+
(Timestamp('2013-06-30', offset='BQ').is_quarter_end, 0),
2298+
(Timestamp('2013-06-30', offset='BQ').is_year_end, 0),
2299+
(Timestamp('2013-06-28', offset='BQ').is_month_end, 1),
2300+
(Timestamp('2013-06-28', offset='BQ').is_quarter_end, 1),
2301+
(Timestamp('2013-06-28', offset='BQ').is_year_end, 0),
2302+
(Timestamp('2013-06-30', offset='BQS-APR').is_month_end, 0),
2303+
(Timestamp('2013-06-30', offset='BQS-APR').is_quarter_end, 0),
2304+
(Timestamp('2013-06-30', offset='BQS-APR').is_year_end, 0),
2305+
(Timestamp('2013-06-28', offset='BQS-APR').is_month_end, 1),
2306+
(Timestamp('2013-06-28', offset='BQS-APR').is_quarter_end, 1),
2307+
(Timestamp('2013-03-29', offset='BQS-APR').is_year_end, 1),
2308+
(Timestamp('2013-11-01', offset='AS-NOV').is_year_start, 1),
2309+
(Timestamp('2013-10-31', offset='AS-NOV').is_year_end, 1)]
2310+
2311+
for ts, value in tests:
2312+
self.assertEquals(ts, value)
2313+
22362314

22372315
def test_nanosecond_field(self):
22382316
dti = DatetimeIndex(np.arange(10))

0 commit comments

Comments
 (0)