Skip to content

Commit ac97541

Browse files
committed
ENH: Allow DateOffset operations with Series
1 parent 58ae9db commit ac97541

File tree

10 files changed

+451
-37
lines changed

10 files changed

+451
-37
lines changed

doc/source/api.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1444,6 +1444,7 @@ Conversion
14441444

14451445
DatetimeIndex.to_datetime
14461446
DatetimeIndex.to_period
1447+
DatetimeIndex.to_perioddelta
14471448
DatetimeIndex.to_pydatetime
14481449
DatetimeIndex.to_series
14491450

doc/source/timedeltas.rst

+2
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ It will construct Series if the input is a Series, a scalar if the input is scal
9797
to_timedelta(np.arange(5),unit='s')
9898
to_timedelta(np.arange(5),unit='d')
9999
100+
.. _timedeltas.operations:
101+
100102
Operations
101103
----------
102104

doc/source/timeseries.rst

+40
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,46 @@ Another example is parameterizing ``YearEnd`` with the specific ending month:
647647
d + YearEnd()
648648
d + YearEnd(month=6)
649649
650+
651+
.. _timeseries.offsetseries:
652+
653+
Using offsets with ``Series`` / ``DatetimeIndex``
654+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
655+
656+
Offsets can be used with either a ``Series`` or ``DatetimeIndex`` to
657+
apply the offset to each element.
658+
659+
.. ipython:: python
660+
661+
rng = date_range('2012-01-01', '2012-01-03')
662+
s = Series(rng)
663+
rng
664+
rng + DateOffset(months=2)
665+
s + DateOffset(months=2)
666+
s - DateOffset(months=2)
667+
668+
If the offset class maps directly to a ``Timedelta`` (``Day``, ``Hour``,
669+
``Minute``, ``Second``, ``Micro``, ``Milli``, ``Nano``) it can be
670+
used exactly like a ``Timedelta`` - see the
671+
:ref:`Timedelta section<timedeltas.operations>` for more examples.
672+
673+
.. ipython:: python
674+
675+
s - Day(2)
676+
td = s - Series(date_range('2011-12-29', '2011-12-31'))
677+
td
678+
td + Minute(15)
679+
680+
Note that some offsets (such as ``BQuarterEnd``) do not have a
681+
vectorized implementation. They can still be used but may
682+
calculate signficantly slower and will raise a ``PerformanceWarning``
683+
684+
.. ipython:: python
685+
:okwarning:
686+
687+
rng + BQuarterEnd()
688+
689+
650690
.. _timeseries.alias:
651691

652692
Custom Business Days (Experimental)

doc/source/whatsnew/v0.17.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ Other enhancements
133133

134134
- ``to_datetime`` can now accept ``yearfirst`` keyword (:issue:`7599`)
135135

136+
- ``pandas.tseries.offsets`` larger than the ``Day`` offset can now be used with with ``Series`` for addition/subtraction (:issue:`10699`). See the :ref:`Documentation <timeseries.offsetseries>` for more details.
137+
136138
- ``.as_blocks`` will now take a ``copy`` optional argument to return a copy of the data, default is to copy (no change in behavior from prior versions), (:issue:`9607`)
137139

138140
- ``regex`` argument to ``DataFrame.filter`` now handles numeric column names instead of raising ``ValueError`` (:issue:`10384`).

pandas/core/ops.py

+63-24
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
# necessary to enforce truediv in Python 2.X
77
from __future__ import division
88
import operator
9+
import warnings
910
import numpy as np
1011
import pandas as pd
1112
from pandas import compat, lib, tslib
@@ -21,7 +22,7 @@
2122
needs_i8_conversion, is_datetimelike_v_numeric,
2223
is_integer_dtype, is_categorical_dtype, is_object_dtype,
2324
is_timedelta64_dtype, is_datetime64_dtype, is_bool_dtype)
24-
25+
from pandas.io.common import PerformanceWarning
2526
# -----------------------------------------------------------------------------
2627
# Functions that add arithmetic methods to objects, given arithmetic factory
2728
# methods
@@ -276,12 +277,16 @@ def __init__(self, left, right, name):
276277

277278
self.left = left
278279
self.right = right
279-
lvalues = self._convert_to_array(left, name=name)
280-
rvalues = self._convert_to_array(right, name=name, other=lvalues)
281280

281+
self.is_offset_lhs = self._is_offset(left)
282+
self.is_offset_rhs = self._is_offset(right)
283+
284+
lvalues = self._convert_to_array(left, name=name)
282285
self.is_timedelta_lhs = is_timedelta64_dtype(left)
283286
self.is_datetime_lhs = is_datetime64_dtype(left)
284287
self.is_integer_lhs = left.dtype.kind in ['i', 'u']
288+
289+
rvalues = self._convert_to_array(right, name=name, other=lvalues)
285290
self.is_datetime_rhs = is_datetime64_dtype(rvalues)
286291
self.is_timedelta_rhs = is_timedelta64_dtype(rvalues)
287292
self.is_integer_rhs = rvalues.dtype.kind in ('i', 'u')
@@ -309,27 +314,32 @@ def _validate(self):
309314
" passed" % self.name)
310315

311316
# 2 timedeltas
312-
elif self.is_timedelta_lhs and self.is_timedelta_rhs:
317+
elif ((self.is_timedelta_lhs and
318+
(self.is_timedelta_rhs or self.is_offset_rhs)) or
319+
(self.is_timedelta_rhs and
320+
(self.is_timedelta_lhs or self.is_offset_lhs))):
313321

314322
if self.name not in ('__div__', '__truediv__', '__add__',
315323
'__sub__'):
316324
raise TypeError("can only operate on a timedeltas for "
317325
"addition, subtraction, and division, but the"
318326
" operator [%s] was passed" % self.name)
319327

320-
# datetime and timedelta
321-
elif self.is_datetime_lhs and self.is_timedelta_rhs:
328+
# datetime and timedelta/DateOffset
329+
elif (self.is_datetime_lhs and
330+
(self.is_timedelta_rhs or self.is_offset_rhs)):
322331

323332
if self.name not in ('__add__', '__sub__'):
324333
raise TypeError("can only operate on a datetime with a rhs of"
325-
" a timedelta for addition and subtraction, "
334+
" a timedelta/DateOffset for addition and subtraction,"
326335
" but the operator [%s] was passed" %
327336
self.name)
328337

329-
elif self.is_timedelta_lhs and self.is_datetime_rhs:
338+
elif ((self.is_timedelta_lhs or self.is_offset_lhs)
339+
and self.is_datetime_rhs):
330340

331341
if self.name != '__add__':
332-
raise TypeError("can only operate on a timedelta and"
342+
raise TypeError("can only operate on a timedelta/DateOffset and"
333343
" a datetime for addition, but the operator"
334344
" [%s] was passed" % self.name)
335345
else:
@@ -371,18 +381,7 @@ def _convert_to_array(self, values, name=None, other=None):
371381
elif name not in ('__truediv__', '__div__', '__mul__'):
372382
raise TypeError("incompatible type for a datetime/timedelta "
373383
"operation [{0}]".format(name))
374-
elif isinstance(values[0], pd.DateOffset):
375-
# handle DateOffsets
376-
os = np.array([getattr(v, 'delta', None) for v in values])
377-
mask = isnull(os)
378-
if mask.any():
379-
raise TypeError("cannot use a non-absolute DateOffset in "
380-
"datetime/timedelta operations [{0}]".format(
381-
', '.join([com.pprint_thing(v)
382-
for v in values[mask]])))
383-
values = to_timedelta(os, errors='coerce')
384384
elif inferred_type == 'floating':
385-
386385
# all nan, so ok, use the other dtype (e.g. timedelta or datetime)
387386
if isnull(values).all():
388387
values = np.empty(values.shape, dtype=other.dtype)
@@ -391,13 +390,16 @@ def _convert_to_array(self, values, name=None, other=None):
391390
raise TypeError(
392391
'incompatible type [{0}] for a datetime/timedelta '
393392
'operation'.format(np.array(values).dtype))
393+
elif self._is_offset(values):
394+
return values
394395
else:
395396
raise TypeError("incompatible type [{0}] for a datetime/timedelta"
396397
" operation".format(np.array(values).dtype))
397398

398399
return values
399400

400401
def _convert_for_datetime(self, lvalues, rvalues):
402+
from pandas.tseries.timedeltas import to_timedelta
401403
mask = None
402404
# datetimes require views
403405
if self.is_datetime_lhs or self.is_datetime_rhs:
@@ -407,13 +409,40 @@ def _convert_for_datetime(self, lvalues, rvalues):
407409
else:
408410
self.dtype = 'datetime64[ns]'
409411
mask = isnull(lvalues) | isnull(rvalues)
410-
lvalues = lvalues.view(np.int64)
411-
rvalues = rvalues.view(np.int64)
412+
413+
# if adding single offset try vectorized path
414+
# in DatetimeIndex; otherwise elementwise apply
415+
if self.is_offset_lhs:
416+
if len(lvalues) == 1:
417+
rvalues = pd.DatetimeIndex(rvalues)
418+
lvalues = lvalues[0]
419+
else:
420+
warnings.warn("Adding/subtracting array of DateOffsets to Series not vectorized",
421+
PerformanceWarning)
422+
rvalues = rvalues.astype('O')
423+
elif self.is_offset_rhs:
424+
if len(rvalues) == 1:
425+
lvalues = pd.DatetimeIndex(lvalues)
426+
rvalues = rvalues[0]
427+
else:
428+
warnings.warn("Adding/subtracting array of DateOffsets to Series not vectorized",
429+
PerformanceWarning)
430+
lvalues = lvalues.astype('O')
431+
else:
432+
lvalues = lvalues.view(np.int64)
433+
rvalues = rvalues.view(np.int64)
412434

413435
# otherwise it's a timedelta
414436
else:
415437
self.dtype = 'timedelta64[ns]'
416438
mask = isnull(lvalues) | isnull(rvalues)
439+
440+
# convert Tick DateOffset to underlying delta
441+
if self.is_offset_lhs:
442+
lvalues = to_timedelta(lvalues)
443+
if self.is_offset_rhs:
444+
rvalues = to_timedelta(rvalues)
445+
417446
lvalues = lvalues.astype(np.int64)
418447
rvalues = rvalues.astype(np.int64)
419448

@@ -439,6 +468,16 @@ def f(x):
439468
self.lvalues = lvalues
440469
self.rvalues = rvalues
441470

471+
472+
def _is_offset(self, arr_or_obj):
473+
""" check if obj or all elements of list-like is DateOffset """
474+
if isinstance(arr_or_obj, pd.DateOffset):
475+
return True
476+
elif is_list_like(arr_or_obj):
477+
return all(isinstance(x, pd.DateOffset) for x in arr_or_obj)
478+
else:
479+
return False
480+
442481
@classmethod
443482
def maybe_convert_for_time_op(cls, left, right, name):
444483
"""
@@ -532,8 +571,8 @@ def wrapper(left, right, name=name):
532571
name=name, dtype=dtype)
533572
else:
534573
# scalars
535-
if hasattr(lvalues, 'values'):
536-
lvalues = lvalues.values
574+
if hasattr(lvalues, 'values') and not isinstance(lvalues, pd.DatetimeIndex):
575+
lvalues = lvalues.values
537576
return left._constructor(wrap_results(na_op(lvalues, rvalues)),
538577
index=left.index, name=left.name,
539578
dtype=dtype)

pandas/tests/test_series.py

+30-7
Original file line numberDiff line numberDiff line change
@@ -3286,14 +3286,37 @@ def test_timedeltas_with_DateOffset(self):
32863286
s + op(5)
32873287
op(5) + s
32883288

3289-
# invalid DateOffsets
3290-
for do in [ 'Week', 'BDay', 'BQuarterEnd', 'BMonthEnd', 'BYearEnd',
3291-
'BYearBegin','BQuarterBegin', 'BMonthBegin',
3292-
'MonthEnd','YearBegin', 'YearEnd',
3293-
'MonthBegin', 'QuarterBegin' ]:
3289+
3290+
def test_timedelta64_operations_with_DateOffset(self):
3291+
# GH 10699
3292+
td = Series([timedelta(minutes=5, seconds=3)] * 3)
3293+
result = td + pd.offsets.Minute(1)
3294+
expected = Series([timedelta(minutes=6, seconds=3)] * 3)
3295+
assert_series_equal(result, expected)
3296+
3297+
result = td - pd.offsets.Minute(1)
3298+
expected = Series([timedelta(minutes=4, seconds=3)] * 3)
3299+
assert_series_equal(result, expected)
3300+
3301+
result = td + Series([pd.offsets.Minute(1), pd.offsets.Second(3),
3302+
pd.offsets.Hour(2)])
3303+
expected = Series([timedelta(minutes=6, seconds=3),
3304+
timedelta(minutes=5, seconds=6),
3305+
timedelta(hours=2, minutes=5, seconds=3)])
3306+
assert_series_equal(result, expected)
3307+
3308+
result = td + pd.offsets.Minute(1) + pd.offsets.Second(12)
3309+
expected = Series([timedelta(minutes=6, seconds=15)] * 3)
3310+
assert_series_equal(result, expected)
3311+
3312+
# valid DateOffsets
3313+
for do in [ 'Hour', 'Minute', 'Second', 'Day', 'Micro',
3314+
'Milli', 'Nano' ]:
32943315
op = getattr(pd.offsets,do)
3295-
self.assertRaises(TypeError, s.__add__, op(5))
3296-
self.assertRaises(TypeError, s.__radd__, op(5))
3316+
td + op(5)
3317+
op(5) + td
3318+
td - op(5)
3319+
op(5) - td
32973320

32983321
def test_timedelta64_operations_with_timedeltas(self):
32993322

pandas/tseries/index.py

+32
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
# pylint: disable=E1101
22
import operator
3+
import warnings
34
from datetime import time, datetime
45
from datetime import timedelta
56
import numpy as np
67
from pandas.core.common import (_NS_DTYPE, _INT64_DTYPE,
78
_values_from_object, _maybe_box,
89
ABCSeries, is_integer, is_float,
910
is_object_dtype, is_datetime64_dtype)
11+
from pandas.io.common import PerformanceWarning
1012
from pandas.core.index import Index, Int64Index, Float64Index
1113
import pandas.compat as compat
1214
from pandas.compat import u
@@ -16,6 +18,7 @@
1618
from pandas.tseries.base import DatelikeOps, DatetimeIndexOpsMixin
1719
from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay
1820
from pandas.tseries.tools import parse_time_string, normalize_date
21+
from pandas.tseries.timedeltas import to_timedelta
1922
from pandas.util.decorators import cache_readonly, deprecate_kwarg
2023
import pandas.core.common as com
2124
import pandas.tseries.offsets as offsets
@@ -672,15 +675,26 @@ def _add_delta(self, delta):
672675
new_values = self._add_delta_tdi(delta)
673676
# update name when delta is Index
674677
name = com._maybe_match_name(self, delta)
678+
elif isinstance(delta, DateOffset):
679+
new_values = self._add_offset(delta).asi8
675680
else:
676681
new_values = self.astype('O') + delta
682+
677683
tz = 'UTC' if self.tz is not None else None
678684
result = DatetimeIndex(new_values, tz=tz, name=name, freq='infer')
679685
utc = _utc()
680686
if self.tz is not None and self.tz is not utc:
681687
result = result.tz_convert(self.tz)
682688
return result
683689

690+
def _add_offset(self, offset):
691+
try:
692+
return offset.apply_index(self)
693+
except NotImplementedError:
694+
warnings.warn("Non-vectorized DateOffset being applied to Series or DatetimeIndex",
695+
PerformanceWarning)
696+
return self.astype('O') + offset
697+
684698
def _format_native_types(self, na_rep=u('NaT'),
685699
date_format=None, **kwargs):
686700
from pandas.core.format import _get_format_datetime64_from_values
@@ -834,6 +848,24 @@ def union(self, other):
834848
result.offset = to_offset(result.inferred_freq)
835849
return result
836850

851+
def to_perioddelta(self, freq):
852+
"""
853+
Calcuates TimedeltaIndex of difference between index
854+
values and index converted to PeriodIndex at specified
855+
freq. Used for vectorized offsets
856+
857+
.. versionadded:: 0.17.0
858+
859+
Parameters
860+
----------
861+
freq : Period frequency
862+
863+
Returns
864+
-------
865+
y : TimedeltaIndex
866+
"""
867+
return to_timedelta(self.asi8 - self.to_period(freq).to_timestamp().asi8)
868+
837869
def union_many(self, others):
838870
"""
839871
A bit of a hack to accelerate unioning a collection of indexes

0 commit comments

Comments
 (0)