diff --git a/doc/source/api.rst b/doc/source/api.rst index 7918d6930341a..aa5c58652d550 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1143,6 +1143,12 @@ Time/Date Components DatetimeIndex.tz DatetimeIndex.freq DatetimeIndex.freqstr + DatetimeIndex.is_month_start + DatetimeIndex.is_month_end + DatetimeIndex.is_quarter_start + DatetimeIndex.is_quarter_end + DatetiemIndex.is_year_start + DatetimeIndex.is_year_end Selecting ~~~~~~~~~ diff --git a/doc/source/release.rst b/doc/source/release.rst index 145100c110194..f54cc13a7d775 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -470,6 +470,10 @@ API Changes - ``DataFrame.apply`` will use the ``reduce`` argument to determine whether a ``Series`` or a ``DataFrame`` should be returned when the ``DataFrame`` is empty (:issue:`6007`). +- Add ``is_month_start``, ``is_month_end``, ``is_quarter_start``, ``is_quarter_end``, + ``is_year_start``, ``is_year_end`` accessors for ``DateTimeIndex``/``Timestamp`` which return a boolean array + of whether the timestamp(s) are at the start/end of the month/quarter/year defined by the + frequency of the ``DateTimeIndex``/``Timestamp`` (:issue:`4565`, :issue:`6998`)) Experimental Features ~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index e3070ff1507a2..1cae66fada587 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -408,6 +408,39 @@ regularity will result in a ``DatetimeIndex`` (but frequency is lost): .. _timeseries.offsets: +Time/Date Components +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There are several time/date properties that one can access from ``Timestamp`` or a collection of timestamps like a ``DateTimeIndex``. + +.. csv-table:: + :header: "Property", "Description" + :widths: 15, 65 + + year, "The year of the datetime" + month,"The month of the datetime" + day,"The days of the datetime" + hour,"The hour of the datetime" + minute,"The minutes of the datetime" + second,"The seconds of the datetime" + microsecond,"The microseconds of the datetime" + nanosecond,"The nanoseconds of the datetime" + date,"Returns datetime.date" + time,"Returns datetime.time" + dayofyear,"The ordinal day of year" + weekofyear,"The week ordinal of the year" + week,"The week ordinal of the year" + dayofweek,"The day of the week with Monday=0, Sunday=6" + weekday,"The day of the week with Monday=0, Sunday=6" + quarter,"Quarter of the date: Jan=Mar = 1, Apr-Jun = 2, etc." + is_month_start,"Logical indicating if first day of month (defined by frequency)" + is_month_end,"Logical indicating if last day of month (defined by frequency)" + is_quarter_start,"Logical indicating if first day of quarter (defined by frequency)" + is_quarter_end,"Logical indicating if last day of quarter (defined by frequency)" + is_year_start,"Logical indicating if first day of year (defined by frequency)" + is_year_end,"Logical indicating if last day of year (defined by frequency)" + + DateOffset objects ------------------ diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index 7962d21b85ecd..f281d40642063 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -89,6 +89,8 @@ API changes s.year s.index.year +- Add ``is_month_start``, ``is_month_end``, ``is_quarter_start``, ``is_quarter_end``, ``is_year_start``, ``is_year_end`` accessors for ``DateTimeIndex``/``Timestamp`` which return a boolean array of whether the timestamp(s) are at the start/end of the month/quarter/year defined by the frequency of the ``DateTimeIndex``/``Timestamp`` (:issue:`4565`, :issue:`6998`) + - More consistent behaviour for some groupby methods: groupby ``head`` and ``tail`` now act more like ``filter`` rather than an aggregation: diff --git a/pandas/core/base.py b/pandas/core/base.py index ec6a4ffbcefbb..1e9adb60f534e 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -336,3 +336,9 @@ def nunique(self): dayofyear = _field_accessor('dayofyear', "The ordinal day of the year") quarter = _field_accessor('quarter', "The quarter of the date") qyear = _field_accessor('qyear') + is_month_start = _field_accessor('is_month_start', "Logical indicating if first day of month (defined by frequency)") + is_month_end = _field_accessor('is_month_end', "Logical indicating if last day of month (defined by frequency)") + is_quarter_start = _field_accessor('is_quarter_start', "Logical indicating if first day of quarter (defined by frequency)") + is_quarter_end = _field_accessor('is_quarter_end', "Logical indicating if last day of quarter (defined by frequency)") + is_year_start = _field_accessor('is_year_start', "Logical indicating if first day of year (defined by frequency)") + is_year_end = _field_accessor('is_year_end', "Logical indicating if last day of year (defined by frequency)") diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 036a868fe0451..81b3d4631bfbf 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -214,7 +214,7 @@ def test_value_counts_unique_nunique(self): # freq must be specified because repeat makes freq ambiguous o = klass(np.repeat(values, range(1, len(o) + 1)), freq=o.freq) else: - o = klass(np.repeat(values, range(1, len(o) + 1))) + o = klass(np.repeat(values, range(1, len(o) + 1))) expected_s = Series(range(10, 0, -1), index=values[::-1], dtype='int64') tm.assert_series_equal(o.value_counts(), expected_s) @@ -246,7 +246,7 @@ def test_value_counts_unique_nunique(self): if isinstance(o, PeriodIndex): o = klass(np.repeat(values, range(1, len(o) + 1)), freq=o.freq) else: - o = klass(np.repeat(values, range(1, len(o) + 1))) + o = klass(np.repeat(values, range(1, len(o) + 1))) if isinstance(o, DatetimeIndex): # DatetimeIndex: nan is casted to Nat and included @@ -278,7 +278,7 @@ def test_value_counts_inferred(self): s = klass(s_values) expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c']) tm.assert_series_equal(s.value_counts(), expected) - + self.assert_numpy_array_equal(s.unique(), np.unique(s_values)) self.assertEquals(s.nunique(), 4) # don't sort, have to sort after the fact as not sorting is platform-dep @@ -410,7 +410,7 @@ def setUp(self): def test_ops_properties(self): self.check_ops_properties(['year','month','day','hour','minute','second','weekofyear','week','dayofweek','dayofyear','quarter']) - self.check_ops_properties(['date','time','microsecond','nanosecond'], lambda x: isinstance(x,DatetimeIndex)) + self.check_ops_properties(['date','time','microsecond','nanosecond', 'is_month_start', 'is_month_end', 'is_quarter_start', 'is_quarter_end', 'is_year_start', 'is_year_end'], lambda x: isinstance(x,DatetimeIndex)) class TestPeriodIndexOps(Ops): _allowed = '_allow_period_index_ops' diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 6ac21e60ea7f3..a2e01c8110261 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -14,7 +14,7 @@ from pandas.compat import u from pandas.tseries.frequencies import ( infer_freq, to_offset, get_period_alias, - Resolution, get_reso_string) + Resolution, get_reso_string, get_offset) from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay from pandas.tseries.tools import parse_time_string, normalize_date from pandas.util.decorators import cache_readonly @@ -28,6 +28,7 @@ import pandas.algos as _algos import pandas.index as _index +from pandas.tslib import isleapyear def _utc(): import pytz @@ -43,7 +44,14 @@ def f(self): utc = _utc() if self.tz is not utc: values = self._local_timestamps() - return tslib.get_date_field(values, field) + if field in ['is_month_start', 'is_month_end', + 'is_quarter_start', 'is_quarter_end', + 'is_year_start', 'is_year_end']: + month_kw = self.freq.kwds.get('startingMonth', self.freq.kwds.get('month', 12)) if self.freq else 12 + freqstr = self.freqstr if self.freq else None + return tslib.get_start_end_field(values, field, freqstr, month_kw) + else: + return tslib.get_date_field(values, field) f.__name__ = name f.__doc__ = docstring return property(f) @@ -1439,6 +1447,12 @@ def freqstr(self): _weekday = _dayofweek _dayofyear = _field_accessor('dayofyear', 'doy') _quarter = _field_accessor('quarter', 'q') + _is_month_start = _field_accessor('is_month_start', 'is_month_start') + _is_month_end = _field_accessor('is_month_end', 'is_month_end') + _is_quarter_start = _field_accessor('is_quarter_start', 'is_quarter_start') + _is_quarter_end = _field_accessor('is_quarter_end', 'is_quarter_end') + _is_year_start = _field_accessor('is_year_start', 'is_year_start') + _is_year_end = _field_accessor('is_year_end', 'is_year_end') @property def _time(self): @@ -1774,6 +1788,7 @@ def to_julian_date(self): self.nanosecond/3600.0/1e+9 )/24.0) + def _generate_regular_range(start, end, periods, offset): if isinstance(offset, Tick): stride = offset.nanos @@ -1831,7 +1846,7 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None, Frequency strings can have multiples, e.g. '5H' tz : string or None Time zone name for returning localized DatetimeIndex, for example - Asia/Hong_Kong + Asia/Hong_Kong normalize : bool, default False Normalize start/end dates to midnight before generating date range name : str, default None diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index fc3ee993771d3..319eaee6d14df 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -1473,7 +1473,7 @@ def test_timestamp_fields(self): # extra fields from DatetimeIndex like quarter and week idx = tm.makeDateIndex(100) - fields = ['dayofweek', 'dayofyear', 'week', 'weekofyear', 'quarter'] + fields = ['dayofweek', 'dayofyear', 'week', 'weekofyear', 'quarter', 'is_month_start', 'is_month_end', 'is_quarter_start', 'is_quarter_end', 'is_year_start', 'is_year_end'] for f in fields: expected = getattr(idx, f)[-1] result = getattr(Timestamp(idx[-1]), f) @@ -2192,7 +2192,7 @@ def test_join_with_period_index(self): class TestDatetime64(tm.TestCase): """ - Also test supoprt for datetime64[ns] in Series / DataFrame + Also test support for datetime64[ns] in Series / DataFrame """ def setUp(self): @@ -2202,37 +2202,115 @@ def setUp(self): def test_datetimeindex_accessors(self): dti = DatetimeIndex( - freq='Q-JAN', start=datetime(1997, 12, 31), periods=100) + freq='D', start=datetime(1998, 1, 1), periods=365) self.assertEquals(dti.year[0], 1998) self.assertEquals(dti.month[0], 1) - self.assertEquals(dti.day[0], 31) + self.assertEquals(dti.day[0], 1) self.assertEquals(dti.hour[0], 0) self.assertEquals(dti.minute[0], 0) self.assertEquals(dti.second[0], 0) self.assertEquals(dti.microsecond[0], 0) - self.assertEquals(dti.dayofweek[0], 5) + self.assertEquals(dti.dayofweek[0], 3) - self.assertEquals(dti.dayofyear[0], 31) - self.assertEquals(dti.dayofyear[1], 120) + self.assertEquals(dti.dayofyear[0], 1) + self.assertEquals(dti.dayofyear[120], 121) - self.assertEquals(dti.weekofyear[0], 5) - self.assertEquals(dti.weekofyear[1], 18) + self.assertEquals(dti.weekofyear[0], 1) + self.assertEquals(dti.weekofyear[120], 18) self.assertEquals(dti.quarter[0], 1) - self.assertEquals(dti.quarter[1], 2) - - self.assertEquals(len(dti.year), 100) - self.assertEquals(len(dti.month), 100) - self.assertEquals(len(dti.day), 100) - self.assertEquals(len(dti.hour), 100) - self.assertEquals(len(dti.minute), 100) - self.assertEquals(len(dti.second), 100) - self.assertEquals(len(dti.microsecond), 100) - self.assertEquals(len(dti.dayofweek), 100) - self.assertEquals(len(dti.dayofyear), 100) - self.assertEquals(len(dti.weekofyear), 100) - self.assertEquals(len(dti.quarter), 100) + self.assertEquals(dti.quarter[120], 2) + + self.assertEquals(dti.is_month_start[0], True) + self.assertEquals(dti.is_month_start[1], False) + self.assertEquals(dti.is_month_start[31], True) + self.assertEquals(dti.is_quarter_start[0], True) + self.assertEquals(dti.is_quarter_start[90], True) + self.assertEquals(dti.is_year_start[0], True) + self.assertEquals(dti.is_year_start[364], False) + self.assertEquals(dti.is_month_end[0], False) + self.assertEquals(dti.is_month_end[30], True) + self.assertEquals(dti.is_month_end[31], False) + self.assertEquals(dti.is_month_end[364], True) + self.assertEquals(dti.is_quarter_end[0], False) + self.assertEquals(dti.is_quarter_end[30], False) + self.assertEquals(dti.is_quarter_end[89], True) + self.assertEquals(dti.is_quarter_end[364], True) + self.assertEquals(dti.is_year_end[0], False) + self.assertEquals(dti.is_year_end[364], True) + + self.assertEquals(len(dti.year), 365) + self.assertEquals(len(dti.month), 365) + self.assertEquals(len(dti.day), 365) + self.assertEquals(len(dti.hour), 365) + self.assertEquals(len(dti.minute), 365) + self.assertEquals(len(dti.second), 365) + self.assertEquals(len(dti.microsecond), 365) + self.assertEquals(len(dti.dayofweek), 365) + self.assertEquals(len(dti.dayofyear), 365) + self.assertEquals(len(dti.weekofyear), 365) + self.assertEquals(len(dti.quarter), 365) + self.assertEquals(len(dti.is_month_start), 365) + self.assertEquals(len(dti.is_month_end), 365) + self.assertEquals(len(dti.is_quarter_start), 365) + self.assertEquals(len(dti.is_quarter_end), 365) + self.assertEquals(len(dti.is_year_start), 365) + self.assertEquals(len(dti.is_year_end), 365) + + dti = DatetimeIndex( + freq='BQ-FEB', start=datetime(1998, 1, 1), periods=4) + + self.assertEquals(sum(dti.is_quarter_start), 0) + self.assertEquals(sum(dti.is_quarter_end), 4) + self.assertEquals(sum(dti.is_year_start), 0) + self.assertEquals(sum(dti.is_year_end), 1) + + # Ensure is_start/end accessors throw ValueError for CustomBusinessDay, CBD requires np >= 1.7 + if not _np_version_under1p7: + bday_egypt = offsets.CustomBusinessDay(weekmask='Sun Mon Tue Wed Thu') + dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt) + self.assertRaises(ValueError, lambda: dti.is_month_start) + + dti = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03']) + + self.assertEquals(dti.is_month_start[0], 1) + + tests = [ + (Timestamp('2013-06-01', offset='M').is_month_start, 1), + (Timestamp('2013-06-01', offset='BM').is_month_start, 0), + (Timestamp('2013-06-03', offset='M').is_month_start, 0), + (Timestamp('2013-06-03', offset='BM').is_month_start, 1), + (Timestamp('2013-02-28', offset='Q-FEB').is_month_end, 1), + (Timestamp('2013-02-28', offset='Q-FEB').is_quarter_end, 1), + (Timestamp('2013-02-28', offset='Q-FEB').is_year_end, 1), + (Timestamp('2013-03-01', offset='Q-FEB').is_month_start, 1), + (Timestamp('2013-03-01', offset='Q-FEB').is_quarter_start, 1), + (Timestamp('2013-03-01', offset='Q-FEB').is_year_start, 1), + (Timestamp('2013-03-31', offset='QS-FEB').is_month_end, 1), + (Timestamp('2013-03-31', offset='QS-FEB').is_quarter_end, 0), + (Timestamp('2013-03-31', offset='QS-FEB').is_year_end, 0), + (Timestamp('2013-02-01', offset='QS-FEB').is_month_start, 1), + (Timestamp('2013-02-01', offset='QS-FEB').is_quarter_start, 1), + (Timestamp('2013-02-01', offset='QS-FEB').is_year_start, 1), + (Timestamp('2013-06-30', offset='BQ').is_month_end, 0), + (Timestamp('2013-06-30', offset='BQ').is_quarter_end, 0), + (Timestamp('2013-06-30', offset='BQ').is_year_end, 0), + (Timestamp('2013-06-28', offset='BQ').is_month_end, 1), + (Timestamp('2013-06-28', offset='BQ').is_quarter_end, 1), + (Timestamp('2013-06-28', offset='BQ').is_year_end, 0), + (Timestamp('2013-06-30', offset='BQS-APR').is_month_end, 0), + (Timestamp('2013-06-30', offset='BQS-APR').is_quarter_end, 0), + (Timestamp('2013-06-30', offset='BQS-APR').is_year_end, 0), + (Timestamp('2013-06-28', offset='BQS-APR').is_month_end, 1), + (Timestamp('2013-06-28', offset='BQS-APR').is_quarter_end, 1), + (Timestamp('2013-03-29', offset='BQS-APR').is_year_end, 1), + (Timestamp('2013-11-01', offset='AS-NOV').is_year_start, 1), + (Timestamp('2013-10-31', offset='AS-NOV').is_year_end, 1)] + + for ts, value in tests: + self.assertEquals(ts, value) + def test_nanosecond_field(self): dti = DatetimeIndex(np.arange(10)) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index e76a2d0cb6cf1..6d99d38049e5a 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -1,7 +1,7 @@ # cython: profile=False cimport numpy as np -from numpy cimport (int32_t, int64_t, import_array, ndarray, +from numpy cimport (int8_t, int32_t, int64_t, import_array, ndarray, NPY_INT64, NPY_DATETIME, NPY_TIMEDELTA) import numpy as np @@ -303,6 +303,30 @@ class Timestamp(_Timestamp): def asm8(self): return np.int64(self.value).view('M8[ns]') + @property + def is_month_start(self): + return self._get_start_end_field('is_month_start') + + @property + def is_month_end(self): + return self._get_start_end_field('is_month_end') + + @property + def is_quarter_start(self): + return self._get_start_end_field('is_quarter_start') + + @property + def is_quarter_end(self): + return self._get_start_end_field('is_quarter_end') + + @property + def is_year_start(self): + return self._get_start_end_field('is_year_start') + + @property + def is_year_end(self): + return self._get_start_end_field('is_year_end') + def tz_localize(self, tz): """ Convert naive Timestamp to local time zone @@ -725,6 +749,12 @@ cdef class _Timestamp(datetime): out = get_date_field(np.array([self.value], dtype=np.int64), field) return out[0] + cpdef _get_start_end_field(self, field): + month_kw = self.freq.kwds.get('startingMonth', self.freq.kwds.get('month', 12)) if self.freq else 12 + freqstr = self.freqstr if self.freq else None + out = get_start_end_field(np.array([self.value], dtype=np.int64), field, freqstr, month_kw) + return out[0] + cdef PyTypeObject* ts_type = Timestamp @@ -2298,6 +2328,225 @@ def get_date_field(ndarray[int64_t] dtindex, object field): raise ValueError("Field %s not supported" % field) +@cython.wraparound(False) +def get_start_end_field(ndarray[int64_t] dtindex, object field, object freqstr=None, int month_kw=12): + ''' + Given an int64-based datetime index return array of indicators + of whether timestamps are at the start/end of the month/quarter/year + (defined by frequency). + ''' + cdef: + _TSObject ts + Py_ssize_t i + int count = 0 + bint is_business = 0 + int end_month = 12 + int start_month = 1 + ndarray[int8_t] out + ndarray[int32_t, ndim=2] _month_offset + bint isleap + pandas_datetimestruct dts + int mo_off, dom, doy, dow, ldom + + _month_offset = np.array( + [[ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 ], + [ 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 ]], + dtype=np.int32 ) + + count = len(dtindex) + out = np.zeros(count, dtype='int8') + + if freqstr: + if freqstr == 'C': + raise ValueError("Custom business days is not supported by %s" % field) + is_business = freqstr[0] == 'B' + + # YearBegin(), BYearBegin() use month = starting month of year + # QuarterBegin(), BQuarterBegin() use startingMonth = starting month of year + # other offests use month, startingMonth as ending month of year. + + if (freqstr[0:2] in ['MS', 'QS', 'AS']) or (freqstr[1:3] in ['MS', 'QS', 'AS']): + end_month = 12 if month_kw == 1 else month_kw - 1 + start_month = month_kw + else: + end_month = month_kw + start_month = (end_month % 12) + 1 + else: + end_month = 12 + start_month = 1 + + if field == 'is_month_start': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + ts = convert_to_tsobject(dtindex[i], None, None) + dom = dts.day + dow = ts_dayofweek(ts) + + if (dom == 1 and dow < 5) or (dom <= 3 and dow == 0): + out[i] = 1 + return out.view(bool) + else: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + dom = dts.day + + if dom == 1: + out[i] = 1 + return out.view(bool) + + elif field == 'is_month_end': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + ts = convert_to_tsobject(dtindex[i], None, None) + isleap = is_leapyear(dts.year) + mo_off = _month_offset[isleap, dts.month - 1] + dom = dts.day + doy = mo_off + dom + ldom = _month_offset[isleap, dts.month] + dow = ts_dayofweek(ts) + + if (ldom == doy and dow < 5) or (dow == 4 and (ldom - doy <= 2)): + out[i] = 1 + return out.view(bool) + else: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + isleap = is_leapyear(dts.year) + mo_off = _month_offset[isleap, dts.month - 1] + dom = dts.day + doy = mo_off + dom + ldom = _month_offset[isleap, dts.month] + + if ldom == doy: + out[i] = 1 + return out.view(bool) + + elif field == 'is_quarter_start': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + ts = convert_to_tsobject(dtindex[i], None, None) + dom = dts.day + dow = ts_dayofweek(ts) + + if ((dts.month - start_month) % 3 == 0) and ((dom == 1 and dow < 5) or (dom <= 3 and dow == 0)): + out[i] = 1 + return out.view(bool) + else: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + dom = dts.day + + if ((dts.month - start_month) % 3 == 0) and dom == 1: + out[i] = 1 + return out.view(bool) + + elif field == 'is_quarter_end': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + ts = convert_to_tsobject(dtindex[i], None, None) + isleap = is_leapyear(dts.year) + mo_off = _month_offset[isleap, dts.month - 1] + dom = dts.day + doy = mo_off + dom + ldom = _month_offset[isleap, dts.month] + dow = ts_dayofweek(ts) + + if ((dts.month - end_month) % 3 == 0) and ((ldom == doy and dow < 5) or (dow == 4 and (ldom - doy <= 2))): + out[i] = 1 + return out.view(bool) + else: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + isleap = is_leapyear(dts.year) + mo_off = _month_offset[isleap, dts.month - 1] + dom = dts.day + doy = mo_off + dom + ldom = _month_offset[isleap, dts.month] + + if ((dts.month - end_month) % 3 == 0) and (ldom == doy): + out[i] = 1 + return out.view(bool) + + elif field == 'is_year_start': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + ts = convert_to_tsobject(dtindex[i], None, None) + dom = dts.day + dow = ts_dayofweek(ts) + + if (dts.month == start_month) and ((dom == 1 and dow < 5) or (dom <= 3 and dow == 0)): + out[i] = 1 + return out.view(bool) + else: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + dom = dts.day + + if (dts.month == start_month) and dom == 1: + out[i] = 1 + return out.view(bool) + + elif field == 'is_year_end': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + ts = convert_to_tsobject(dtindex[i], None, None) + isleap = is_leapyear(dts.year) + dom = dts.day + mo_off = _month_offset[isleap, dts.month - 1] + doy = mo_off + dom + dow = ts_dayofweek(ts) + ldom = _month_offset[isleap, dts.month] + + if (dts.month == end_month) and ((ldom == doy and dow < 5) or (dow == 4 and (ldom - doy <= 2))): + out[i] = 1 + return out.view(bool) + else: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + ts = convert_to_tsobject(dtindex[i], None, None) + isleap = is_leapyear(dts.year) + mo_off = _month_offset[isleap, dts.month - 1] + dom = dts.day + doy = mo_off + dom + ldom = _month_offset[isleap, dts.month] + + if (dts.month == end_month) and (ldom == doy): + out[i] = 1 + return out.view(bool) + + raise ValueError("Field %s not supported" % field) + + cdef inline int m8_weekday(int64_t val): ts = convert_to_tsobject(val, None, None) return ts_dayofweek(ts) diff --git a/vb_suite/timeseries.py b/vb_suite/timeseries.py index 06ef99442b574..a3d4d4c7d40a5 100644 --- a/vb_suite/timeseries.py +++ b/vb_suite/timeseries.py @@ -303,3 +303,14 @@ def date_range(start=None, end=None, periods=None, freq=None): timeseries_custom_bmonthend_incr_n = \ Benchmark("date + 10 * cme",setup) + +#---------------------------------------------------------------------- +# month/quarter/year start/end accessors + +setup = common_setup + """ +N = 10000 +rng = date_range('1/1/1', periods=N, freq='B') +""" + +timeseries_is_month_start = Benchmark('rng.is_month_start', setup, + start_date=datetime(2014, 4, 1))