diff --git a/doc/source/api.rst b/doc/source/api.rst index a1284a3ff7bc9..f0c79fc7d567f 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1444,6 +1444,7 @@ Conversion DatetimeIndex.to_datetime DatetimeIndex.to_period + DatetimeIndex.to_perioddelta DatetimeIndex.to_pydatetime DatetimeIndex.to_series diff --git a/doc/source/timedeltas.rst b/doc/source/timedeltas.rst index 8215414e425fe..e62f4f9387526 100644 --- a/doc/source/timedeltas.rst +++ b/doc/source/timedeltas.rst @@ -97,6 +97,8 @@ It will construct Series if the input is a Series, a scalar if the input is scal to_timedelta(np.arange(5),unit='s') to_timedelta(np.arange(5),unit='d') +.. _timedeltas.operations: + Operations ---------- diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 1b5a4586e59e7..753c3ac57fb50 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -647,6 +647,46 @@ Another example is parameterizing ``YearEnd`` with the specific ending month: d + YearEnd() d + YearEnd(month=6) + +.. _timeseries.offsetseries: + +Using offsets with ``Series`` / ``DatetimeIndex`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Offsets can be used with either a ``Series`` or ``DatetimeIndex`` to +apply the offset to each element. + +.. ipython:: python + + rng = date_range('2012-01-01', '2012-01-03') + s = Series(rng) + rng + rng + DateOffset(months=2) + s + DateOffset(months=2) + s - DateOffset(months=2) + +If the offset class maps directly to a ``Timedelta`` (``Day``, ``Hour``, +``Minute``, ``Second``, ``Micro``, ``Milli``, ``Nano``) it can be +used exactly like a ``Timedelta`` - see the +:ref:`Timedelta section` for more examples. + +.. ipython:: python + + s - Day(2) + td = s - Series(date_range('2011-12-29', '2011-12-31')) + td + td + Minute(15) + +Note that some offsets (such as ``BQuarterEnd``) do not have a +vectorized implementation. They can still be used but may +calculate signficantly slower and will raise a ``PerformanceWarning`` + +.. ipython:: python + :okwarning: + + rng + BQuarterEnd() + + .. _timeseries.alias: Custom Business Days (Experimental) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 16c6c639a489e..68c23951ffbd1 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -133,6 +133,8 @@ Other enhancements - ``to_datetime`` can now accept ``yearfirst`` keyword (:issue:`7599`) +- ``pandas.tseries.offsets`` larger than the ``Day`` offset can now be used with with ``Series`` for addition/subtraction (:issue:`10699`). See the :ref:`Documentation ` for more details. + - ``.as_blocks`` will now take a ``copy`` optional argument to return a copy of the data, default is to copy (no change in behavior from prior versions), (:issue:`9607`) - ``regex`` argument to ``DataFrame.filter`` now handles numeric column names instead of raising ``ValueError`` (:issue:`10384`). diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 6a278e0e44306..8e3dd3836855c 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -6,6 +6,7 @@ # necessary to enforce truediv in Python 2.X from __future__ import division import operator +import warnings import numpy as np import pandas as pd from pandas import compat, lib, tslib @@ -21,7 +22,7 @@ needs_i8_conversion, is_datetimelike_v_numeric, is_integer_dtype, is_categorical_dtype, is_object_dtype, is_timedelta64_dtype, is_datetime64_dtype, is_bool_dtype) - +from pandas.io.common import PerformanceWarning # ----------------------------------------------------------------------------- # Functions that add arithmetic methods to objects, given arithmetic factory # methods @@ -276,12 +277,16 @@ def __init__(self, left, right, name): self.left = left self.right = right - lvalues = self._convert_to_array(left, name=name) - rvalues = self._convert_to_array(right, name=name, other=lvalues) + self.is_offset_lhs = self._is_offset(left) + self.is_offset_rhs = self._is_offset(right) + + lvalues = self._convert_to_array(left, name=name) self.is_timedelta_lhs = is_timedelta64_dtype(left) self.is_datetime_lhs = is_datetime64_dtype(left) self.is_integer_lhs = left.dtype.kind in ['i', 'u'] + + rvalues = self._convert_to_array(right, name=name, other=lvalues) self.is_datetime_rhs = is_datetime64_dtype(rvalues) self.is_timedelta_rhs = is_timedelta64_dtype(rvalues) self.is_integer_rhs = rvalues.dtype.kind in ('i', 'u') @@ -309,7 +314,10 @@ def _validate(self): " passed" % self.name) # 2 timedeltas - elif self.is_timedelta_lhs and self.is_timedelta_rhs: + elif ((self.is_timedelta_lhs and + (self.is_timedelta_rhs or self.is_offset_rhs)) or + (self.is_timedelta_rhs and + (self.is_timedelta_lhs or self.is_offset_lhs))): if self.name not in ('__div__', '__truediv__', '__add__', '__sub__'): @@ -317,19 +325,21 @@ def _validate(self): "addition, subtraction, and division, but the" " operator [%s] was passed" % self.name) - # datetime and timedelta - elif self.is_datetime_lhs and self.is_timedelta_rhs: + # datetime and timedelta/DateOffset + elif (self.is_datetime_lhs and + (self.is_timedelta_rhs or self.is_offset_rhs)): if self.name not in ('__add__', '__sub__'): raise TypeError("can only operate on a datetime with a rhs of" - " a timedelta for addition and subtraction, " + " a timedelta/DateOffset for addition and subtraction," " but the operator [%s] was passed" % self.name) - elif self.is_timedelta_lhs and self.is_datetime_rhs: + elif ((self.is_timedelta_lhs or self.is_offset_lhs) + and self.is_datetime_rhs): if self.name != '__add__': - raise TypeError("can only operate on a timedelta and" + raise TypeError("can only operate on a timedelta/DateOffset and" " a datetime for addition, but the operator" " [%s] was passed" % self.name) else: @@ -371,18 +381,7 @@ def _convert_to_array(self, values, name=None, other=None): elif name not in ('__truediv__', '__div__', '__mul__'): raise TypeError("incompatible type for a datetime/timedelta " "operation [{0}]".format(name)) - elif isinstance(values[0], pd.DateOffset): - # handle DateOffsets - os = np.array([getattr(v, 'delta', None) for v in values]) - mask = isnull(os) - if mask.any(): - raise TypeError("cannot use a non-absolute DateOffset in " - "datetime/timedelta operations [{0}]".format( - ', '.join([com.pprint_thing(v) - for v in values[mask]]))) - values = to_timedelta(os, errors='coerce') elif inferred_type == 'floating': - # all nan, so ok, use the other dtype (e.g. timedelta or datetime) if isnull(values).all(): values = np.empty(values.shape, dtype=other.dtype) @@ -391,6 +390,8 @@ def _convert_to_array(self, values, name=None, other=None): raise TypeError( 'incompatible type [{0}] for a datetime/timedelta ' 'operation'.format(np.array(values).dtype)) + elif self._is_offset(values): + return values else: raise TypeError("incompatible type [{0}] for a datetime/timedelta" " operation".format(np.array(values).dtype)) @@ -398,6 +399,7 @@ def _convert_to_array(self, values, name=None, other=None): return values def _convert_for_datetime(self, lvalues, rvalues): + from pandas.tseries.timedeltas import to_timedelta mask = None # datetimes require views if self.is_datetime_lhs or self.is_datetime_rhs: @@ -407,13 +409,40 @@ def _convert_for_datetime(self, lvalues, rvalues): else: self.dtype = 'datetime64[ns]' mask = isnull(lvalues) | isnull(rvalues) - lvalues = lvalues.view(np.int64) - rvalues = rvalues.view(np.int64) + + # if adding single offset try vectorized path + # in DatetimeIndex; otherwise elementwise apply + if self.is_offset_lhs: + if len(lvalues) == 1: + rvalues = pd.DatetimeIndex(rvalues) + lvalues = lvalues[0] + else: + warnings.warn("Adding/subtracting array of DateOffsets to Series not vectorized", + PerformanceWarning) + rvalues = rvalues.astype('O') + elif self.is_offset_rhs: + if len(rvalues) == 1: + lvalues = pd.DatetimeIndex(lvalues) + rvalues = rvalues[0] + else: + warnings.warn("Adding/subtracting array of DateOffsets to Series not vectorized", + PerformanceWarning) + lvalues = lvalues.astype('O') + else: + lvalues = lvalues.view(np.int64) + rvalues = rvalues.view(np.int64) # otherwise it's a timedelta else: self.dtype = 'timedelta64[ns]' mask = isnull(lvalues) | isnull(rvalues) + + # convert Tick DateOffset to underlying delta + if self.is_offset_lhs: + lvalues = to_timedelta(lvalues) + if self.is_offset_rhs: + rvalues = to_timedelta(rvalues) + lvalues = lvalues.astype(np.int64) rvalues = rvalues.astype(np.int64) @@ -439,6 +468,16 @@ def f(x): self.lvalues = lvalues self.rvalues = rvalues + + def _is_offset(self, arr_or_obj): + """ check if obj or all elements of list-like is DateOffset """ + if isinstance(arr_or_obj, pd.DateOffset): + return True + elif is_list_like(arr_or_obj): + return all(isinstance(x, pd.DateOffset) for x in arr_or_obj) + else: + return False + @classmethod def maybe_convert_for_time_op(cls, left, right, name): """ @@ -532,8 +571,8 @@ def wrapper(left, right, name=name): name=name, dtype=dtype) else: # scalars - if hasattr(lvalues, 'values'): - lvalues = lvalues.values + if hasattr(lvalues, 'values') and not isinstance(lvalues, pd.DatetimeIndex): + lvalues = lvalues.values return left._constructor(wrap_results(na_op(lvalues, rvalues)), index=left.index, name=left.name, dtype=dtype) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 66a38cd858846..bd1b0ac8905b2 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -3286,14 +3286,37 @@ def test_timedeltas_with_DateOffset(self): s + op(5) op(5) + s - # invalid DateOffsets - for do in [ 'Week', 'BDay', 'BQuarterEnd', 'BMonthEnd', 'BYearEnd', - 'BYearBegin','BQuarterBegin', 'BMonthBegin', - 'MonthEnd','YearBegin', 'YearEnd', - 'MonthBegin', 'QuarterBegin' ]: + + def test_timedelta64_operations_with_DateOffset(self): + # GH 10699 + td = Series([timedelta(minutes=5, seconds=3)] * 3) + result = td + pd.offsets.Minute(1) + expected = Series([timedelta(minutes=6, seconds=3)] * 3) + assert_series_equal(result, expected) + + result = td - pd.offsets.Minute(1) + expected = Series([timedelta(minutes=4, seconds=3)] * 3) + assert_series_equal(result, expected) + + result = td + Series([pd.offsets.Minute(1), pd.offsets.Second(3), + pd.offsets.Hour(2)]) + expected = Series([timedelta(minutes=6, seconds=3), + timedelta(minutes=5, seconds=6), + timedelta(hours=2, minutes=5, seconds=3)]) + assert_series_equal(result, expected) + + result = td + pd.offsets.Minute(1) + pd.offsets.Second(12) + expected = Series([timedelta(minutes=6, seconds=15)] * 3) + assert_series_equal(result, expected) + + # valid DateOffsets + for do in [ 'Hour', 'Minute', 'Second', 'Day', 'Micro', + 'Milli', 'Nano' ]: op = getattr(pd.offsets,do) - self.assertRaises(TypeError, s.__add__, op(5)) - self.assertRaises(TypeError, s.__radd__, op(5)) + td + op(5) + op(5) + td + td - op(5) + op(5) - td def test_timedelta64_operations_with_timedeltas(self): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 8ee6a1bc64e4e..576656ad3ed9d 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1,5 +1,6 @@ # pylint: disable=E1101 import operator +import warnings from datetime import time, datetime from datetime import timedelta import numpy as np @@ -7,6 +8,7 @@ _values_from_object, _maybe_box, ABCSeries, is_integer, is_float, is_object_dtype, is_datetime64_dtype) +from pandas.io.common import PerformanceWarning from pandas.core.index import Index, Int64Index, Float64Index import pandas.compat as compat from pandas.compat import u @@ -16,6 +18,7 @@ from pandas.tseries.base import DatelikeOps, DatetimeIndexOpsMixin from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay from pandas.tseries.tools import parse_time_string, normalize_date +from pandas.tseries.timedeltas import to_timedelta from pandas.util.decorators import cache_readonly, deprecate_kwarg import pandas.core.common as com import pandas.tseries.offsets as offsets @@ -672,8 +675,11 @@ def _add_delta(self, delta): new_values = self._add_delta_tdi(delta) # update name when delta is Index name = com._maybe_match_name(self, delta) + elif isinstance(delta, DateOffset): + new_values = self._add_offset(delta).asi8 else: new_values = self.astype('O') + delta + tz = 'UTC' if self.tz is not None else None result = DatetimeIndex(new_values, tz=tz, name=name, freq='infer') utc = _utc() @@ -681,6 +687,14 @@ def _add_delta(self, delta): result = result.tz_convert(self.tz) return result + def _add_offset(self, offset): + try: + return offset.apply_index(self) + except NotImplementedError: + warnings.warn("Non-vectorized DateOffset being applied to Series or DatetimeIndex", + PerformanceWarning) + return self.astype('O') + offset + def _format_native_types(self, na_rep=u('NaT'), date_format=None, **kwargs): from pandas.core.format import _get_format_datetime64_from_values @@ -834,6 +848,24 @@ def union(self, other): result.offset = to_offset(result.inferred_freq) return result + def to_perioddelta(self, freq): + """ + Calcuates TimedeltaIndex of difference between index + values and index converted to PeriodIndex at specified + freq. Used for vectorized offsets + + .. versionadded:: 0.17.0 + + Parameters + ---------- + freq : Period frequency + + Returns + ------- + y : TimedeltaIndex + """ + return to_timedelta(self.asi8 - self.to_period(freq).to_timestamp().asi8) + def union_many(self, others): """ A bit of a hack to accelerate unioning a collection of indexes diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 941456fa07cfa..33faac153cce0 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -4,6 +4,8 @@ import numpy as np from pandas.tseries.tools import to_datetime +from pandas.tseries.timedeltas import to_timedelta +from pandas.core.common import ABCSeries, ABCDatetimeIndex # import after tools, dateutil check from dateutil.relativedelta import relativedelta, weekday @@ -93,6 +95,15 @@ def wrapper(self, other): return wrapper +def apply_index_wraps(func): + @functools.wraps(func) + def wrapper(self, other): + result = func(self, other) + if self.normalize: + result = result.to_period('D').to_timestamp() + return result + return wrapper + def _is_normalized(dt): if (dt.hour != 0 or dt.minute != 0 or dt.second != 0 or dt.microsecond != 0 or getattr(dt, 'nanosecond', 0) != 0): @@ -221,6 +232,67 @@ def apply(self, other): else: return other + timedelta(self.n) + @apply_index_wraps + def apply_index(self, i): + """ + Vectorized apply of DateOffset to DatetimeIndex, + raises NotImplentedError for offsets without a + vectorized implementation + + .. versionadded:: 0.17.0 + + Parameters + ---------- + i : DatetimeIndex + + Returns + ------- + y : DatetimeIndex + """ + + if not type(self) is DateOffset: + raise NotImplementedError("DateOffset subclass %s " + "does not have a vectorized " + "implementation" + % (self.__class__.__name__,)) + relativedelta_fast = set(['years', 'months', 'weeks', + 'days', 'hours', 'minutes', + 'seconds', 'microseconds']) + # relativedelta/_offset path only valid for base DateOffset + if (self._use_relativedelta and + set(self.kwds).issubset(relativedelta_fast)): + months = ((self.kwds.get('years', 0) * 12 + + self.kwds.get('months', 0)) * self.n) + if months: + base = (i.to_period('M') + months).to_timestamp() + time = i.to_perioddelta('D') + days = i.to_perioddelta('M') - time + # minimum prevents month-end from wrapping + day_offset = np.minimum(days, + to_timedelta(base.days_in_month - 1, unit='D')) + i = base + day_offset + time + + weeks = (self.kwds.get('weeks', 0)) * self.n + if weeks: + i = (i.to_period('W') + weeks).to_timestamp() + i.to_perioddelta('W') + + timedelta_kwds = dict((k,v) for k,v in self.kwds.items() + if k in ['days','hours','minutes', + 'seconds','microseconds']) + if timedelta_kwds: + delta = Timedelta(**timedelta_kwds) + i = i + (self.n * delta) + return i + elif not self._use_relativedelta and hasattr(self, '_offset'): + # timedelta + return i + (self._offset * self.n) + else: + # relativedelta with other keywords + raise NotImplementedError("DateOffset with relativedelta " + "keyword(s) %s not able to be " + "applied vectorized" % + (set(self.kwds) - relativedelta_fast),) + def isAnchored(self): return (self.n == 1) @@ -307,6 +379,8 @@ def __call__(self, other): return self.apply(other) def __add__(self, other): + if isinstance(other, (ABCDatetimeIndex, ABCSeries)): + return other + self try: return self.apply(other) except ApplyTypeError: @@ -324,6 +398,8 @@ def __sub__(self, other): return NotImplemented def __rsub__(self, other): + if isinstance(other, (ABCDatetimeIndex, ABCSeries)): + return other - self return self.__class__(-self.n, normalize=self.normalize, **self.kwds) + other def __mul__(self, someInt): @@ -363,6 +439,37 @@ def onOffset(self, dt): b = ((dt + self) - self) return a == b + # helpers for vectorized offsets + def _beg_apply_index(self, i, freq): + """Offsets index to beginning of Period frequency""" + + off = i.to_perioddelta('D') + base_period = i.to_period(freq) + if self.n < 0: + # when subtracting, dates on start roll to prior + roll = np.where(base_period.to_timestamp() == i - off, + self.n, self.n + 1) + else: + roll = self.n + + base = (base_period + roll).to_timestamp() + return base + off + + def _end_apply_index(self, i, freq): + """Offsets index to end of Period frequency""" + + off = i.to_perioddelta('D') + base_period = i.to_period(freq) + if self.n > 0: + # when adding, dtates on end roll to next + roll = np.where(base_period.to_timestamp(how='end') == i - off, + self.n, self.n - 1) + else: + roll = self.n + + base = (base_period + roll).to_timestamp(how='end') + return base + off + # way to get around weirdness with rule_code @property def _prefix(self): @@ -529,6 +636,19 @@ def apply(self, other): raise ApplyTypeError('Only know how to combine business day with ' 'datetime or timedelta.') + @apply_index_wraps + def apply_index(self, i): + time = i.to_perioddelta('D') + # to_period rolls forward to next BDay; track and + # reduce n where it does when rolling forward + shifted = (i.to_perioddelta('B') - time).asi8 != 0 + if self.n > 0: + roll = np.where(shifted, self.n - 1, self.n) + else: + roll = self.n + + return (i.to_period('B') + roll).to_timestamp() + time + def onOffset(self, dt): if self.normalize and not _is_normalized(dt): return False @@ -902,6 +1022,9 @@ def apply(self, other): raise ApplyTypeError('Only know how to combine trading day with ' 'datetime, datetime64 or timedelta.') + def apply_index(self, i): + raise NotImplementedError + @staticmethod def _to_dt64(dt, dtype='datetime64'): # Currently @@ -949,6 +1072,10 @@ def apply(self, other): other = other + relativedelta(months=n, day=31) return other + @apply_index_wraps + def apply_index(self, i): + return self._end_apply_index(i, 'M') + def onOffset(self, dt): if self.normalize and not _is_normalized(dt): return False @@ -970,6 +1097,10 @@ def apply(self, other): return other + relativedelta(months=n, day=1) + @apply_index_wraps + def apply_index(self, i): + return self._beg_apply_index(i, 'M') + def onOffset(self, dt): if self.normalize and not _is_normalized(dt): return False @@ -1211,6 +1342,13 @@ def apply(self, other): base.hour, base.minute, base.second, base.microsecond) return other + @apply_index_wraps + def apply_index(self, i): + if self.weekday is None: + return (i.to_period('W') + self.n).to_timestamp() + i.to_perioddelta('W') + else: + return self._end_apply_index(i, self.freqstr) + def onOffset(self, dt): if self.normalize and not _is_normalized(dt): return False @@ -1587,6 +1725,10 @@ def apply(self, other): other = other + relativedelta(months=monthsToGo + 3 * n, day=31) return other + @apply_index_wraps + def apply_index(self, i): + return self._end_apply_index(i, self.freqstr) + def onOffset(self, dt): if self.normalize and not _is_normalized(dt): return False @@ -1621,6 +1763,11 @@ def apply(self, other): other = other + relativedelta(months=3 * n - monthsSince, day=1) return other + @apply_index_wraps + def apply_index(self, i): + freq_month = 12 if self.startingMonth == 1 else self.startingMonth - 1 + freqstr = 'Q-%s' % (_int_to_month[freq_month],) + return self._beg_apply_index(i, freqstr) class YearOffset(DateOffset): """DateOffset that just needs a month""" @@ -1764,6 +1911,11 @@ def _rollf(date): result = _rollf(result) return result + @apply_index_wraps + def apply_index(self, i): + # convert month anchor to annual period tuple + return self._end_apply_index(i, self.freqstr) + def onOffset(self, dt): if self.normalize and not _is_normalized(dt): return False @@ -1809,6 +1961,12 @@ def _rollf(date): result = _rollf(result) return result + @apply_index_wraps + def apply_index(self, i): + freq_month = 12 if self.month == 1 else self.month - 1 + freqstr = 'A-%s' % (_int_to_month[freq_month],) + return self._beg_apply_index(i, freqstr) + def onOffset(self, dt): if self.normalize and not _is_normalized(dt): return False @@ -2311,6 +2469,7 @@ def apply(self, other): _prefix = 'undefined' + def isAnchored(self): return False diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index f14358452ec13..1b14625310fd3 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -31,6 +31,7 @@ from pandas.compat import range, long, StringIO, lrange, lmap, zip, product from numpy.random import rand from pandas.util.testing import assert_frame_equal +from pandas.io.common import PerformanceWarning import pandas.compat as compat import pandas.core.common as com from pandas import concat @@ -2454,6 +2455,91 @@ def test_intersection_bug_1708(self): result = index_1 & index_2 self.assertEqual(len(result), 0) + # GH 10699 + def test_datetime64_with_DateOffset(self): + for klass, assert_func in zip([Series, DatetimeIndex], + [self.assert_series_equal, + tm.assert_index_equal]): + s = klass(date_range('2000-01-01', '2000-01-31')) + result = s + pd.DateOffset(years=1) + result2 = pd.DateOffset(years=1) + s + exp = klass(date_range('2001-01-01', '2001-01-31')) + assert_func(result, exp) + assert_func(result2, exp) + + result = s - pd.DateOffset(years=1) + exp = klass(date_range('1999-01-01', '1999-01-31')) + assert_func(result, exp) + + s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), + pd.Timestamp('2000-02-15', tz='US/Central')]) + result = s + pd.offsets.MonthEnd() + result2 = pd.offsets.MonthEnd() + s + exp = klass([Timestamp('2000-01-31 00:15:00', tz='US/Central'), + Timestamp('2000-02-29', tz='US/Central')]) + assert_func(result, exp) + assert_func(result2, exp) + + # array of offsets - valid for Series only + if klass is Series: + with tm.assert_produces_warning(PerformanceWarning): + s = klass([Timestamp('2000-1-1'), Timestamp('2000-2-1')]) + result = s + Series([pd.offsets.DateOffset(years=1), + pd.offsets.MonthEnd()]) + exp = klass([Timestamp('2001-1-1'), Timestamp('2000-2-29')]) + assert_func(result, exp) + + # same offset + result = s + Series([pd.offsets.DateOffset(years=1), + pd.offsets.DateOffset(years=1)]) + exp = klass([Timestamp('2001-1-1'), Timestamp('2001-2-1')]) + assert_func(result, exp) + + s = klass([Timestamp('2000-01-05 00:15:00'), Timestamp('2000-01-31 00:23:00'), + Timestamp('2000-01-01'), Timestamp('2000-02-29'), Timestamp('2000-12-31')]) + + #DateOffset relativedelta fastpath + relative_kwargs = [('years', 2), ('months', 5), ('days', 3), + ('hours', 5), ('minutes', 10), ('seconds', 2), + ('microseconds', 5)] + for i, kwd in enumerate(relative_kwargs): + op = pd.DateOffset(**dict([kwd])) + assert_func(klass([x + op for x in s]), s + op) + assert_func(klass([x - op for x in s]), s - op) + op = pd.DateOffset(**dict(relative_kwargs[:i+1])) + assert_func(klass([x + op for x in s]), s + op) + assert_func(klass([x - op for x in s]), s - op) + + + # split by fast/slow path to test perf warning + off = {False: + ['YearBegin', ('YearBegin', {'month': 5}), + 'YearEnd', ('YearEnd', {'month': 5}), + 'MonthBegin', 'MonthEnd', 'Week', ('Week', {'weekday': 3}), + 'BusinessDay', 'BDay', 'QuarterEnd', 'QuarterBegin'], + PerformanceWarning: + ['CustomBusinessDay', 'CDay', 'CBMonthEnd','CBMonthBegin', + 'BMonthBegin', 'BMonthEnd', 'BusinessHour', 'BYearBegin', + 'BYearEnd','BQuarterBegin', ('LastWeekOfMonth', {'weekday':2}), + ('FY5253Quarter', {'qtr_with_extra_week': 1, 'startingMonth': 1, + 'weekday': 2, 'variation': 'nearest'}), + ('FY5253',{'weekday': 0, 'startingMonth': 2, 'variation': 'nearest'}), + ('WeekOfMonth', {'weekday': 2, 'week': 2}), 'Easter', + ('DateOffset', {'day': 4}), ('DateOffset', {'month': 5})]} + + for normalize in (True, False): + for warning, offsets in off.items(): + for do in offsets: + if isinstance(do, tuple): + do, kwargs = do + else: + do = do + kwargs = {} + op = getattr(pd.offsets,do)(5, normalize=normalize, **kwargs) + with tm.assert_produces_warning(warning): + assert_func(klass([x + op for x in s]), s + op) + assert_func(klass([x - op for x in s]), s - op) + assert_func(klass([op + x for x in s]), op + s) # def test_add_timedelta64(self): # rng = date_range('1/1/2000', periods=5) # delta = rng.values[3] - rng.values[1] @@ -4222,12 +4308,12 @@ def test_to_datetime_format_microsecond(self): def test_to_datetime_format_time(self): data = [ - ['01/10/2010 15:20', '%m/%d/%Y %H:%M', Timestamp('2010-01-10 15:20')], - ['01/10/2010 05:43', '%m/%d/%Y %I:%M', Timestamp('2010-01-10 05:43')], - ['01/10/2010 13:56:01', '%m/%d/%Y %H:%M:%S', Timestamp('2010-01-10 13:56:01')]#, - #['01/10/2010 08:14 PM', '%m/%d/%Y %I:%M %p', Timestamp('2010-01-10 20:14')], - #['01/10/2010 07:40 AM', '%m/%d/%Y %I:%M %p', Timestamp('2010-01-10 07:40')], - #['01/10/2010 09:12:56 AM', '%m/%d/%Y %I:%M:%S %p', Timestamp('2010-01-10 09:12:56')] + ['01/10/2010 15:20', '%m/%d/%Y %H:%M', Timestamp('2010-01-10 15:20')], + ['01/10/2010 05:43', '%m/%d/%Y %I:%M', Timestamp('2010-01-10 05:43')], + ['01/10/2010 13:56:01', '%m/%d/%Y %H:%M:%S', Timestamp('2010-01-10 13:56:01')]#, + #['01/10/2010 08:14 PM', '%m/%d/%Y %I:%M %p', Timestamp('2010-01-10 20:14')], + #['01/10/2010 07:40 AM', '%m/%d/%Y %I:%M %p', Timestamp('2010-01-10 07:40')], + #['01/10/2010 09:12:56 AM', '%m/%d/%Y %I:%M:%S %p', Timestamp('2010-01-10 09:12:56')] ] for s, format, dt in data: self.assertEqual(to_datetime(s, format=format), dt) diff --git a/vb_suite/timeseries.py b/vb_suite/timeseries.py index 75147e079bb65..151777add104a 100644 --- a/vb_suite/timeseries.py +++ b/vb_suite/timeseries.py @@ -405,3 +405,33 @@ def iter_n(iterable, n=None): timeseries_iter_datetimeindex_preexit = Benchmark('iter_n(idx1, M)', setup) timeseries_iter_periodindex_preexit = Benchmark('iter_n(idx2, M)', setup) + + +#---------------------------------------------------------------------- +# apply an Offset to a DatetimeIndex +setup = common_setup + """ +N = 100000 +idx1 = date_range(start='20140101', freq='T', periods=N) +delta_offset = Day() +fast_offset = DateOffset(months=2, days=2) +slow_offset = offsets.BusinessDay() + +""" + +timeseries_datetimeindex_offset_delta = Benchmark('idx1 + delta_offset', setup) +timeseries_datetimeindex_offset_fast = Benchmark('idx1 + fast_offset', setup) +timeseries_datetimeindex_offset_slow = Benchmark('idx1 + slow_offset', setup) + +# apply an Offset to a Series containing datetime64 values +setup = common_setup + """ +N = 100000 +s = Series(date_range(start='20140101', freq='T', periods=N)) +delta_offset = Day() +fast_offset = DateOffset(months=2, days=2) +slow_offset = offsets.BusinessDay() + +""" + +timeseries_series_offset_delta = Benchmark('s + delta_offset', setup) +timeseries_series_offset_fast = Benchmark('s + fast_offset', setup) +timeseries_series_offset_slow = Benchmark('s + slow_offset', setup)