Skip to content

Commit 28f479e

Browse files
committed
Move time operations for Series into class _TimeOp
* clarify/simplify time arithmetic-specific code into a separate class * cleanup extraneous `set` and containment checks in branches * refactor validation checks to separate function * combine branches and dry out repetitive code * move convert_to_array out of arith method * self --> left; other --> right [possibly clearer for future refacotr] * change handling of DateTimeIndex (no longer need additional return) * add np1.6 check for integer --> timedelta conversion under the hood
1 parent e3c71f2 commit 28f479e

File tree

2 files changed

+202
-156
lines changed

2 files changed

+202
-156
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,8 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
200200
with the same signature for ``Panel``
201201
- Series (for index) / Panel (for items) now as attribute access to its elements (:issue:`1903`)
202202
- Refactor of ``_get_numeric_data/_get_bool_data`` to core/generic.py, allowing Series/Panel functionaility
203+
- Refactor of Series arithmetic with time-like objects (datetime/timedelta/time
204+
etc.) into a separate, cleaned up wrapper class. (:issue:`4613`)
203205

204206
**Experimental Features**
205207

pandas/core/series.py

+200-156
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,178 @@
5959

6060
_SHOW_WARNINGS = True
6161

62+
class _TimeOp(object):
63+
"""
64+
Wrapper around Series datetime/time/timedelta arithmetic operations.
65+
Generally, you should use classmethod ``maybe_convert_for_time_op`` as an
66+
entry point.
67+
"""
68+
fill_value = tslib.iNaT
69+
wrap_results = staticmethod(lambda x: x)
70+
dtype = None
71+
72+
def __init__(self, left, right, name):
73+
self.name = name
74+
75+
lvalues = self._convert_to_array(left, name=name)
76+
rvalues = self._convert_to_array(right, name=name)
77+
78+
self.is_timedelta_lhs = com.is_timedelta64_dtype(left)
79+
self.is_datetime_lhs = com.is_datetime64_dtype(left)
80+
self.is_integer_lhs = left.dtype.kind in ['i','u']
81+
self.is_datetime_rhs = com.is_datetime64_dtype(rvalues)
82+
self.is_timedelta_rhs = com.is_timedelta64_dtype(rvalues) or (not self.is_datetime_rhs and _np_version_under1p7)
83+
self.is_integer_rhs = rvalues.dtype.kind in ('i','u')
84+
85+
self._validate()
86+
87+
self._convert_for_datetime(lvalues, rvalues)
88+
89+
def _validate(self):
90+
# timedelta and integer mul/div
91+
92+
if (self.is_timedelta_lhs and self.is_integer_rhs) or\
93+
(self.is_integer_lhs and self.is_timedelta_rhs):
94+
95+
if self.name not in ('__truediv__','__div__','__mul__'):
96+
raise TypeError("can only operate on a timedelta and an integer for "
97+
"division, but the operator [%s] was passed" % self.name)
98+
99+
# 2 datetimes
100+
elif self.is_datetime_lhs and self.is_datetime_rhs:
101+
if self.name != '__sub__':
102+
raise TypeError("can only operate on a datetimes for subtraction, "
103+
"but the operator [%s] was passed" % self.name)
104+
105+
106+
# 2 timedeltas
107+
elif self.is_timedelta_lhs and self.is_timedelta_rhs:
108+
109+
if self.name not in ('__div__', '__truediv__', '__add__', '__sub__'):
110+
raise TypeError("can only operate on a timedeltas for "
111+
"addition, subtraction, and division, but the operator [%s] was passed" % self.name)
112+
113+
# datetime and timedelta
114+
elif self.is_datetime_lhs and self.is_timedelta_rhs:
115+
116+
if self.name not in ('__add__','__sub__'):
117+
raise TypeError("can only operate on a datetime with a rhs of a timedelta for "
118+
"addition and subtraction, but the operator [%s] was passed" % self.name)
119+
120+
elif self.is_timedelta_lhs and self.is_datetime_rhs:
121+
122+
if self.name != '__add__':
123+
raise TypeError("can only operate on a timedelta and a datetime for "
124+
"addition, but the operator [%s] was passed" % self.name)
125+
else:
126+
raise TypeError('cannot operate on a series with out a rhs '
127+
'of a series/ndarray of type datetime64[ns] '
128+
'or a timedelta')
129+
130+
def _convert_to_array(self, values, name=None):
131+
"""converts values to ndarray"""
132+
coerce = 'compat' if _np_version_under1p7 else True
133+
if not is_list_like(values):
134+
values = np.array([values])
135+
inferred_type = lib.infer_dtype(values)
136+
if inferred_type in ('datetime64','datetime','date','time'):
137+
# a datetlike
138+
if not (isinstance(values, (pa.Array, Series)) and com.is_datetime64_dtype(values)):
139+
values = tslib.array_to_datetime(values)
140+
elif isinstance(values, DatetimeIndex):
141+
values = values.to_series()
142+
elif inferred_type in ('timedelta', 'timedelta64'):
143+
# have a timedelta, convert to to ns here
144+
values = com._possibly_cast_to_timedelta(values, coerce=coerce)
145+
elif inferred_type == 'integer':
146+
# py3 compat where dtype is 'm' but is an integer
147+
if values.dtype.kind == 'm':
148+
values = values.astype('timedelta64[ns]')
149+
elif isinstance(values, PeriodIndex):
150+
values = values.to_timestamp().to_series()
151+
elif name not in ('__truediv__','__div__','__mul__'):
152+
raise TypeError("incompatible type for a datetime/timedelta "
153+
"operation [{0}]".format(name))
154+
elif isinstance(values[0],DateOffset):
155+
# handle DateOffsets
156+
os = pa.array([ getattr(v,'delta',None) for v in values ])
157+
mask = isnull(os)
158+
if mask.any():
159+
raise TypeError("cannot use a non-absolute DateOffset in "
160+
"datetime/timedelta operations [{0}]".format(
161+
','.join([ com.pprint_thing(v) for v in values[mask] ])))
162+
values = com._possibly_cast_to_timedelta(os, coerce=coerce)
163+
else:
164+
raise TypeError("incompatible type [{0}] for a datetime/timedelta operation".format(pa.array(values).dtype))
165+
166+
return values
167+
168+
def _convert_for_datetime(self, lvalues, rvalues):
169+
mask = None
170+
# datetimes require views
171+
if self.is_datetime_lhs or self.is_datetime_rhs:
172+
# datetime subtraction means timedelta
173+
if self.is_datetime_lhs and self.is_datetime_rhs:
174+
self.dtype = 'timedelta64[ns]'
175+
else:
176+
self.dtype = 'datetime64[ns]'
177+
mask = isnull(lvalues) | isnull(rvalues)
178+
lvalues = lvalues.view(np.int64)
179+
rvalues = rvalues.view(np.int64)
180+
181+
# otherwise it's a timedelta
182+
else:
183+
self.dtype = 'timedelta64[ns]'
184+
mask = isnull(lvalues) | isnull(rvalues)
185+
lvalues = lvalues.astype(np.int64)
186+
rvalues = rvalues.astype(np.int64)
187+
188+
# time delta division -> unit less
189+
# integer gets converted to timedelta in np < 1.6
190+
if (self.is_timedelta_lhs and self.is_timedelta_rhs) and\
191+
not self.is_integer_rhs and\
192+
not self.is_integer_lhs and\
193+
self.name in ('__div__', '__truediv__'):
194+
self.dtype = 'float64'
195+
self.fill_value = np.nan
196+
lvalues = lvalues.astype(np.float64)
197+
rvalues = rvalues.astype(np.float64)
198+
199+
# if we need to mask the results
200+
if mask is not None:
201+
if mask.any():
202+
def f(x):
203+
x = pa.array(x,dtype=self.dtype)
204+
np.putmask(x,mask,self.fill_value)
205+
return x
206+
self.wrap_results = f
207+
self.lvalues = lvalues
208+
self.rvalues = rvalues
209+
210+
@classmethod
211+
def maybe_convert_for_time_op(cls, left, right, name):
212+
"""
213+
if ``left`` and ``right`` are appropriate for datetime arithmetic with
214+
operation ``name``, processes them and returns a ``_TimeOp`` object
215+
that stores all the required values. Otherwise, it will generate
216+
either a ``NotImplementedError`` or ``None``, indicating that the
217+
operation is unsupported for datetimes (e.g., an unsupported r_op) or
218+
that the data is not the right type for time ops.
219+
"""
220+
# decide if we can do it
221+
is_timedelta_lhs = com.is_timedelta64_dtype(left)
222+
is_datetime_lhs = com.is_datetime64_dtype(left)
223+
if not (is_datetime_lhs or is_timedelta_lhs):
224+
return None
225+
# rops currently disabled
226+
if name.startswith('__r'):
227+
return NotImplemented
228+
229+
return cls(left, right, name)
230+
62231
#----------------------------------------------------------------------
63232
# Wrapper function for Series arithmetic methods
64233

65-
66234
def _arith_method(op, name, fill_zeros=None):
67235
"""
68236
Wrapper function for Series arithmetic operations, to avoid
@@ -87,178 +255,54 @@ def na_op(x, y):
87255

88256
return result
89257

90-
def wrapper(self, other, name=name):
258+
def wrapper(left, right, name=name):
91259
from pandas.core.frame import DataFrame
92-
dtype = None
93-
fill_value = tslib.iNaT
94-
wrap_results = lambda x: x
95-
96-
lvalues, rvalues = self, other
97-
98-
is_timedelta_lhs = com.is_timedelta64_dtype(self)
99-
is_datetime_lhs = com.is_datetime64_dtype(self)
100-
is_integer_lhs = lvalues.dtype.kind in ['i','u']
101-
102-
if is_datetime_lhs or is_timedelta_lhs:
103-
104-
coerce = 'compat' if _np_version_under1p7 else True
105-
106-
# convert the argument to an ndarray
107-
def convert_to_array(values, other=None):
108-
if not is_list_like(values):
109-
values = np.array([values])
110-
inferred_type = lib.infer_dtype(values)
111-
if inferred_type in set(['datetime64','datetime','date','time']):
112-
# a datetlike
113-
if not (isinstance(values, (pa.Array, Series)) and com.is_datetime64_dtype(values)):
114-
values = tslib.array_to_datetime(values)
115-
elif isinstance(values, DatetimeIndex):
116-
other = values = values.to_series()
117-
elif inferred_type in set(['timedelta']):
118-
# have a timedelta, convert to to ns here
119-
values = com._possibly_cast_to_timedelta(values, coerce=coerce)
120-
elif inferred_type in set(['timedelta64']):
121-
# have a timedelta64, make sure dtype dtype is ns
122-
values = com._possibly_cast_to_timedelta(values, coerce=coerce)
123-
elif inferred_type in set(['integer']):
124-
# py3 compat where dtype is 'm' but is an integer
125-
if values.dtype.kind == 'm':
126-
values = values.astype('timedelta64[ns]')
127-
elif isinstance(values, PeriodIndex):
128-
other = values = values.to_timestamp().to_series()
129-
elif name not in ['__truediv__','__div__','__mul__']:
130-
raise TypeError("incompatible type for a datetime/timedelta operation [{0}]".format(name))
131-
elif isinstance(values[0],DateOffset):
132-
# handle DateOffsets
133-
os = pa.array([ getattr(v,'delta',None) for v in values ])
134-
mask = isnull(os)
135-
if mask.any():
136-
raise TypeError("cannot use a non-absolute DateOffset in "
137-
"datetime/timedelta operations [{0}]".format(','.join([ com.pprint_thing(v) for v in values[mask] ])))
138-
values = com._possibly_cast_to_timedelta(os, coerce=coerce)
139-
else:
140-
raise TypeError("incompatible type [{0}] for a datetime/timedelta operation".format(pa.array(values).dtype))
141-
142-
return values, other
143-
144-
# convert lhs and rhs
145-
lvalues,_ = convert_to_array(lvalues)
146-
rvalues,other = convert_to_array(rvalues,other)
147-
148-
is_datetime_rhs = com.is_datetime64_dtype(rvalues)
149-
is_timedelta_rhs = com.is_timedelta64_dtype(rvalues) or (not is_datetime_rhs and _np_version_under1p7)
150-
is_integer_rhs = rvalues.dtype.kind in ['i','u']
151-
mask = None
152-
153-
# timedelta and integer mul/div
154-
if (is_timedelta_lhs and is_integer_rhs) or (is_integer_lhs and is_timedelta_rhs):
155-
156-
if name not in ['__truediv__','__div__','__mul__']:
157-
raise TypeError("can only operate on a timedelta and an integer for "
158-
"division, but the operator [%s] was passed" % name)
159-
dtype = 'timedelta64[ns]'
160-
mask = isnull(lvalues) | isnull(rvalues)
161-
lvalues = lvalues.astype(np.int64)
162-
rvalues = rvalues.astype(np.int64)
163-
164-
# 2 datetimes
165-
elif is_datetime_lhs and is_datetime_rhs:
166-
if name != '__sub__':
167-
raise TypeError("can only operate on a datetimes for subtraction, "
168-
"but the operator [%s] was passed" % name)
169-
170-
dtype = 'timedelta64[ns]'
171-
mask = isnull(lvalues) | isnull(rvalues)
172-
lvalues = lvalues.view('i8')
173-
rvalues = rvalues.view('i8')
174-
175-
# 2 timedeltas
176-
elif is_timedelta_lhs and is_timedelta_rhs:
177-
mask = isnull(lvalues) | isnull(rvalues)
178-
179-
# time delta division -> unit less
180-
if name in ['__div__','__truediv__']:
181-
dtype = 'float64'
182-
fill_value = np.nan
183-
lvalues = lvalues.astype(np.int64).astype(np.float64)
184-
rvalues = rvalues.astype(np.int64).astype(np.float64)
185-
186-
# another timedelta
187-
elif name in ['__add__','__sub__']:
188-
dtype = 'timedelta64[ns]'
189-
lvalues = lvalues.astype(np.int64)
190-
rvalues = rvalues.astype(np.int64)
191-
192-
else:
193-
raise TypeError("can only operate on a timedeltas for "
194-
"addition, subtraction, and division, but the operator [%s] was passed" % name)
195260

196-
# datetime and timedelta
197-
elif is_timedelta_rhs and is_datetime_lhs:
261+
time_converted = _TimeOp.maybe_convert_for_time_op(left, right, name)
198262

199-
if name not in ['__add__','__sub__']:
200-
raise TypeError("can only operate on a datetime with a rhs of a timedelta for "
201-
"addition and subtraction, but the operator [%s] was passed" % name)
202-
dtype = 'M8[ns]'
203-
lvalues = lvalues.view('i8')
204-
rvalues = rvalues.view('i8')
205-
206-
elif is_timedelta_lhs and is_datetime_rhs:
207-
208-
if name not in ['__add__']:
209-
raise TypeError("can only operate on a timedelta and a datetime for "
210-
"addition, but the operator [%s] was passed" % name)
211-
dtype = 'M8[ns]'
212-
lvalues = lvalues.view('i8')
213-
rvalues = rvalues.view('i8')
214-
215-
else:
216-
raise TypeError('cannot operate on a series with out a rhs '
217-
'of a series/ndarray of type datetime64[ns] '
218-
'or a timedelta')
219-
220-
# if we need to mask the results
221-
if mask is not None:
222-
if mask.any():
223-
def f(x):
224-
x = pa.array(x,dtype=dtype)
225-
np.putmask(x,mask,fill_value)
226-
return x
227-
wrap_results = f
263+
if time_converted is None:
264+
lvalues, rvalues = left, right
265+
dtype = None
266+
wrap_results = lambda x: x
267+
elif time_converted == NotImplemented:
268+
return NotImplemented
269+
else:
270+
lvalues = time_converted.lvalues
271+
rvalues = time_converted.rvalues
272+
dtype = time_converted.dtype
273+
wrap_results = time_converted.wrap_results
228274

229275
if isinstance(rvalues, Series):
230276

231-
if hasattr(lvalues,'values'):
232-
lvalues = lvalues.values
233-
if hasattr(rvalues,'values'):
234-
rvalues = rvalues.values
235-
236-
if self.index.equals(other.index):
237-
name = _maybe_match_name(self, other)
238-
return self._constructor(wrap_results(na_op(lvalues, rvalues)),
239-
index=self.index, dtype=dtype, name=name)
240-
241-
join_idx, lidx, ridx = self.index.join(other.index, how='outer',
277+
join_idx, lidx, ridx = left.index.join(rvalues.index, how='outer',
242278
return_indexers=True)
279+
rindex = rvalues.index
280+
name = _maybe_match_name(left, rvalues)
281+
lvalues = getattr(lvalues, 'values', lvalues)
282+
rvalues = getattr(rvalues, 'values', rvalues)
283+
if left.index.equals(rindex):
284+
index = left.index
285+
else:
286+
index = join_idx
243287

244-
if lidx is not None:
245-
lvalues = com.take_1d(lvalues, lidx)
288+
if lidx is not None:
289+
lvalues = com.take_1d(lvalues, lidx)
246290

247-
if ridx is not None:
248-
rvalues = com.take_1d(rvalues, ridx)
291+
if ridx is not None:
292+
rvalues = com.take_1d(rvalues, ridx)
249293

250294
arr = na_op(lvalues, rvalues)
251295

252-
name = _maybe_match_name(self, other)
253-
return self._constructor(wrap_results(arr), index=join_idx, name=name, dtype=dtype)
254-
elif isinstance(other, DataFrame):
296+
return left._constructor(wrap_results(arr), index=index,
297+
name=name, dtype=dtype)
298+
elif isinstance(right, DataFrame):
255299
return NotImplemented
256300
else:
257301
# scalars
258302
if hasattr(lvalues, 'values'):
259303
lvalues = lvalues.values
260-
return self._constructor(wrap_results(na_op(lvalues, rvalues)),
261-
index=self.index, name=self.name, dtype=dtype)
304+
return left._constructor(wrap_results(na_op(lvalues, rvalues)),
305+
index=left.index, name=left.name, dtype=dtype)
262306
return wrapper
263307

264308

0 commit comments

Comments
 (0)