-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Allow DateOffset addition with Series #10744
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ | |
# necessary to enforce truediv in Python 2.X | ||
from __future__ import division | ||
import operator | ||
import warnings | ||
import numpy as np | ||
import pandas as pd | ||
from pandas import compat, lib, tslib | ||
|
@@ -21,7 +22,7 @@ | |
needs_i8_conversion, is_datetimelike_v_numeric, | ||
is_integer_dtype, is_categorical_dtype, is_object_dtype, | ||
is_timedelta64_dtype, is_datetime64_dtype, is_bool_dtype) | ||
|
||
from pandas.io.common import PerformanceWarning | ||
# ----------------------------------------------------------------------------- | ||
# Functions that add arithmetic methods to objects, given arithmetic factory | ||
# methods | ||
|
@@ -276,12 +277,16 @@ def __init__(self, left, right, name): | |
|
||
self.left = left | ||
self.right = right | ||
lvalues = self._convert_to_array(left, name=name) | ||
rvalues = self._convert_to_array(right, name=name, other=lvalues) | ||
|
||
self.is_offset_lhs = self._is_offset(left) | ||
self.is_offset_rhs = self._is_offset(right) | ||
|
||
lvalues = self._convert_to_array(left, name=name) | ||
self.is_timedelta_lhs = is_timedelta64_dtype(left) | ||
self.is_datetime_lhs = is_datetime64_dtype(left) | ||
self.is_integer_lhs = left.dtype.kind in ['i', 'u'] | ||
|
||
rvalues = self._convert_to_array(right, name=name, other=lvalues) | ||
self.is_datetime_rhs = is_datetime64_dtype(rvalues) | ||
self.is_timedelta_rhs = is_timedelta64_dtype(rvalues) | ||
self.is_integer_rhs = rvalues.dtype.kind in ('i', 'u') | ||
|
@@ -309,27 +314,32 @@ def _validate(self): | |
" passed" % self.name) | ||
|
||
# 2 timedeltas | ||
elif self.is_timedelta_lhs and self.is_timedelta_rhs: | ||
elif ((self.is_timedelta_lhs and | ||
(self.is_timedelta_rhs or self.is_offset_rhs)) or | ||
(self.is_timedelta_rhs and | ||
(self.is_timedelta_lhs or self.is_offset_lhs))): | ||
|
||
if self.name not in ('__div__', '__truediv__', '__add__', | ||
'__sub__'): | ||
raise TypeError("can only operate on a timedeltas for " | ||
"addition, subtraction, and division, but the" | ||
" operator [%s] was passed" % self.name) | ||
|
||
# datetime and timedelta | ||
elif self.is_datetime_lhs and self.is_timedelta_rhs: | ||
# datetime and timedelta/DateOffset | ||
elif (self.is_datetime_lhs and | ||
(self.is_timedelta_rhs or self.is_offset_rhs)): | ||
|
||
if self.name not in ('__add__', '__sub__'): | ||
raise TypeError("can only operate on a datetime with a rhs of" | ||
" a timedelta for addition and subtraction, " | ||
" a timedelta/DateOffset for addition and subtraction," | ||
" but the operator [%s] was passed" % | ||
self.name) | ||
|
||
elif self.is_timedelta_lhs and self.is_datetime_rhs: | ||
elif ((self.is_timedelta_lhs or self.is_offset_lhs) | ||
and self.is_datetime_rhs): | ||
|
||
if self.name != '__add__': | ||
raise TypeError("can only operate on a timedelta and" | ||
raise TypeError("can only operate on a timedelta/DateOffset and" | ||
" a datetime for addition, but the operator" | ||
" [%s] was passed" % self.name) | ||
else: | ||
|
@@ -371,18 +381,7 @@ def _convert_to_array(self, values, name=None, other=None): | |
elif name not in ('__truediv__', '__div__', '__mul__'): | ||
raise TypeError("incompatible type for a datetime/timedelta " | ||
"operation [{0}]".format(name)) | ||
elif isinstance(values[0], pd.DateOffset): | ||
# handle DateOffsets | ||
os = np.array([getattr(v, 'delta', None) for v in values]) | ||
mask = isnull(os) | ||
if mask.any(): | ||
raise TypeError("cannot use a non-absolute DateOffset in " | ||
"datetime/timedelta operations [{0}]".format( | ||
', '.join([com.pprint_thing(v) | ||
for v in values[mask]]))) | ||
values = to_timedelta(os, errors='coerce') | ||
elif inferred_type == 'floating': | ||
|
||
# all nan, so ok, use the other dtype (e.g. timedelta or datetime) | ||
if isnull(values).all(): | ||
values = np.empty(values.shape, dtype=other.dtype) | ||
|
@@ -391,13 +390,16 @@ def _convert_to_array(self, values, name=None, other=None): | |
raise TypeError( | ||
'incompatible type [{0}] for a datetime/timedelta ' | ||
'operation'.format(np.array(values).dtype)) | ||
elif self._is_offset(values): | ||
return values | ||
else: | ||
raise TypeError("incompatible type [{0}] for a datetime/timedelta" | ||
" operation".format(np.array(values).dtype)) | ||
|
||
return values | ||
|
||
def _convert_for_datetime(self, lvalues, rvalues): | ||
from pandas.tseries.timedeltas import to_timedelta | ||
mask = None | ||
# datetimes require views | ||
if self.is_datetime_lhs or self.is_datetime_rhs: | ||
|
@@ -407,13 +409,40 @@ def _convert_for_datetime(self, lvalues, rvalues): | |
else: | ||
self.dtype = 'datetime64[ns]' | ||
mask = isnull(lvalues) | isnull(rvalues) | ||
lvalues = lvalues.view(np.int64) | ||
rvalues = rvalues.view(np.int64) | ||
|
||
# if adding single offset try vectorized path | ||
# in DatetimeIndex; otherwise elementwise apply | ||
if self.is_offset_lhs: | ||
if len(lvalues) == 1: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in theory one could provide a series of offsets that are all the same (and equal to each other), but that is prob a special case. |
||
rvalues = pd.DatetimeIndex(rvalues) | ||
lvalues = lvalues[0] | ||
else: | ||
warnings.warn("Adding/subtracting array of DateOffsets to Series not vectorized", | ||
PerformanceWarning) | ||
rvalues = rvalues.astype('O') | ||
elif self.is_offset_rhs: | ||
if len(rvalues) == 1: | ||
lvalues = pd.DatetimeIndex(lvalues) | ||
rvalues = rvalues[0] | ||
else: | ||
warnings.warn("Adding/subtracting array of DateOffsets to Series not vectorized", | ||
PerformanceWarning) | ||
lvalues = lvalues.astype('O') | ||
else: | ||
lvalues = lvalues.view(np.int64) | ||
rvalues = rvalues.view(np.int64) | ||
|
||
# otherwise it's a timedelta | ||
else: | ||
self.dtype = 'timedelta64[ns]' | ||
mask = isnull(lvalues) | isnull(rvalues) | ||
|
||
# convert Tick DateOffset to underlying delta | ||
if self.is_offset_lhs: | ||
lvalues = to_timedelta(lvalues) | ||
if self.is_offset_rhs: | ||
rvalues = to_timedelta(rvalues) | ||
|
||
lvalues = lvalues.astype(np.int64) | ||
rvalues = rvalues.astype(np.int64) | ||
|
||
|
@@ -439,6 +468,16 @@ def f(x): | |
self.lvalues = lvalues | ||
self.rvalues = rvalues | ||
|
||
|
||
def _is_offset(self, arr_or_obj): | ||
""" check if obj or all elements of list-like is DateOffset """ | ||
if isinstance(arr_or_obj, pd.DateOffset): | ||
return True | ||
elif is_list_like(arr_or_obj): | ||
return all(isinstance(x, pd.DateOffset) for x in arr_or_obj) | ||
else: | ||
return False | ||
|
||
@classmethod | ||
def maybe_convert_for_time_op(cls, left, right, name): | ||
""" | ||
|
@@ -532,8 +571,8 @@ def wrapper(left, right, name=name): | |
name=name, dtype=dtype) | ||
else: | ||
# scalars | ||
if hasattr(lvalues, 'values'): | ||
lvalues = lvalues.values | ||
if hasattr(lvalues, 'values') and not isinstance(lvalues, pd.DatetimeIndex): | ||
lvalues = lvalues.values | ||
return left._constructor(wrap_results(na_op(lvalues, rvalues)), | ||
index=left.index, name=left.name, | ||
dtype=dtype) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,14 @@ | ||
# pylint: disable=E1101 | ||
import operator | ||
import warnings | ||
from datetime import time, datetime | ||
from datetime import timedelta | ||
import numpy as np | ||
from pandas.core.common import (_NS_DTYPE, _INT64_DTYPE, | ||
_values_from_object, _maybe_box, | ||
ABCSeries, is_integer, is_float, | ||
is_object_dtype, is_datetime64_dtype) | ||
from pandas.io.common import PerformanceWarning | ||
from pandas.core.index import Index, Int64Index, Float64Index | ||
import pandas.compat as compat | ||
from pandas.compat import u | ||
|
@@ -16,6 +18,7 @@ | |
from pandas.tseries.base import DatelikeOps, DatetimeIndexOpsMixin | ||
from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay | ||
from pandas.tseries.tools import parse_time_string, normalize_date | ||
from pandas.tseries.timedeltas import to_timedelta | ||
from pandas.util.decorators import cache_readonly, deprecate_kwarg | ||
import pandas.core.common as com | ||
import pandas.tseries.offsets as offsets | ||
|
@@ -672,15 +675,26 @@ def _add_delta(self, delta): | |
new_values = self._add_delta_tdi(delta) | ||
# update name when delta is Index | ||
name = com._maybe_match_name(self, delta) | ||
elif isinstance(delta, DateOffset): | ||
new_values = self._add_offset(delta).asi8 | ||
else: | ||
new_values = self.astype('O') + delta | ||
|
||
tz = 'UTC' if self.tz is not None else None | ||
result = DatetimeIndex(new_values, tz=tz, name=name, freq='infer') | ||
utc = _utc() | ||
if self.tz is not None and self.tz is not utc: | ||
result = result.tz_convert(self.tz) | ||
return result | ||
|
||
def _add_offset(self, offset): | ||
try: | ||
return offset.apply_index(self) | ||
except NotImplementedError: | ||
warnings.warn("Non-vectorized DateOffset being applied to Series or DatetimeIndex", | ||
PerformanceWarning) | ||
return self.astype('O') + offset | ||
|
||
def _format_native_types(self, na_rep=u('NaT'), | ||
date_format=None, **kwargs): | ||
from pandas.core.format import _get_format_datetime64_from_values | ||
|
@@ -834,6 +848,24 @@ def union(self, other): | |
result.offset = to_offset(result.inferred_freq) | ||
return result | ||
|
||
def to_perioddelta(self, freq): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add this to the API.rst, also add a versionadded |
||
""" | ||
Calcuates TimedeltaIndex of difference between index | ||
values and index converted to PeriodIndex at specified | ||
freq. Used for vectorized offsets | ||
|
||
.. versionadded:: 0.17.0 | ||
|
||
Parameters | ||
---------- | ||
freq : Period frequency | ||
|
||
Returns | ||
------- | ||
y : TimedeltaIndex | ||
""" | ||
return to_timedelta(self.asi8 - self.to_period(freq).to_timestamp().asi8) | ||
|
||
def union_many(self, others): | ||
""" | ||
A bit of a hack to accelerate unioning a collection of indexes | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
provide a link to this section as its pretty similar: http://pandas.pydata.org/pandas-docs/stable/timedeltas.html#operations (feel free to adjust language in either as needed)